cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [Cluster-devel] [PATCH 1/3] dlm: check if workqueues are NULL before destroying
@ 2019-01-08 15:08 David Windsor
  2019-01-08 15:08 ` [Cluster-devel] [PATCH 2/3] dlm: add TCP multihoming/failover support David Windsor
  2019-01-08 15:08 ` [Cluster-devel] [PATCH 3/3] dlm: allow binding to all network interfaces David Windsor
  0 siblings, 2 replies; 3+ messages in thread
From: David Windsor @ 2019-01-08 15:08 UTC (permalink / raw)
  To: cluster-devel.redhat.com

If a network failure occurs before any DLM traffic can be
generated, the send and receive workqueues can be NULL
when work_stop() is called.  Check to see if these workqueues
are NULL before calling destroy_workqueue().

Signed-off-by: David Windsor <dwindsor@redhat.com>
---
 fs/dlm/lowcomms.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 76976d6e50f9..905cbdbd31bc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1630,8 +1630,10 @@ static void clean_writequeues(void)
 
 static void work_stop(void)
 {
-	destroy_workqueue(recv_workqueue);
-	destroy_workqueue(send_workqueue);
+	if (recv_workqueue)
+		destroy_workqueue(recv_workqueue);
+	if (send_workqueue)
+		destroy_workqueue(send_workqueue);
 }
 
 static int work_start(void)
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Cluster-devel] [PATCH 2/3] dlm: add TCP multihoming/failover support
  2019-01-08 15:08 [Cluster-devel] [PATCH 1/3] dlm: check if workqueues are NULL before destroying David Windsor
@ 2019-01-08 15:08 ` David Windsor
  2019-01-08 15:08 ` [Cluster-devel] [PATCH 3/3] dlm: allow binding to all network interfaces David Windsor
  1 sibling, 0 replies; 3+ messages in thread
From: David Windsor @ 2019-01-08 15:08 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Add the ability to specify multiple source addresses
for DLM nodes so that multihomed configurations can
use multiple addresses and still be recognized by the
receiving node.

While each node is capable of being configured for multiple
IPs, DLM requires each node have only one active address
at a time.

This patch introduces a round-robin heuristic for selecting
the next active interface, but other heuristics could
easily be added later.

To support failover, a new configfs node is added by this patch:
/sys/kernel/config/dlm/cluster/comms/<comm>/error
This node is write-only, and is provided so that userspace
may signal the kernel when it detects a communications error.
The kernel will switch to the next local network interface
after 1 is written to the new configfs node.

Signed-off-by: David Windsor <dwindsor@redhat.com>
---
 fs/dlm/config.c   | 21 +++++++++++++++++++++
 fs/dlm/lowcomms.c | 34 +++++++++++++++++++++++-----------
 fs/dlm/lowcomms.h |  1 +
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 1270551d24e3..23d2677e10bd 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -31,6 +31,7 @@
  * /config/dlm/<cluster>/comms/<comm>/local
  * /config/dlm/<cluster>/comms/<comm>/addr      (write only)
  * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
+ * /config/dlm/<cluster>/comms/<comm>/error	(write only)
  * The <cluster> level is useless, but I haven't figured out how to avoid it.
  */
 
@@ -198,6 +199,7 @@ enum {
 	COMM_ATTR_LOCAL,
 	COMM_ATTR_ADDR,
 	COMM_ATTR_ADDR_LIST,
+	COMM_ATTR_ERROR,
 };
 
 enum {
@@ -662,8 +664,26 @@ static ssize_t comm_addr_list_show(struct config_item *item, char *buf)
 	return 4096 - allowance;
 }
 
+static ssize_t comm_error_store(struct config_item *item, const char *buf,
+				size_t len)
+{
+	int ret, i;
+
+	ret = kstrtoint(buf, 0, &i);
+	if (ret < 0)
+		return ret;
+
+	if (i == 0)
+		return 0;
+
+	dlm_lowcomms_next_addr();
+
+	return len;
+}
+
 CONFIGFS_ATTR(comm_, nodeid);
 CONFIGFS_ATTR(comm_, local);
+CONFIGFS_ATTR_WO(comm_, error);
 CONFIGFS_ATTR_WO(comm_, addr);
 CONFIGFS_ATTR_RO(comm_, addr_list);
 
@@ -672,6 +692,7 @@ static struct configfs_attribute *comm_attrs[] = {
 	[COMM_ATTR_LOCAL] = &comm_attr_local,
 	[COMM_ATTR_ADDR] = &comm_attr_addr,
 	[COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list,
+	[COMM_ATTR_ERROR] = &comm_attr_error,
 	NULL,
 };
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 905cbdbd31bc..d37af1372ed0 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -159,6 +159,7 @@ static DEFINE_SPINLOCK(dlm_node_addrs_spin);
 static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
 static int dlm_local_count;
 static int dlm_allow_conn;
+static int dlm_local_idx;
 
 /* Work queues */
 static struct workqueue_struct *recv_workqueue;
@@ -330,7 +331,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
 	if (!sa_out)
 		return 0;
 
-	if (dlm_local_addr[0]->ss_family == AF_INET) {
+	if (dlm_local_addr[dlm_local_idx]->ss_family == AF_INET) {
 		struct sockaddr_in *in4  = (struct sockaddr_in *) &sas;
 		struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
 		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
@@ -519,6 +520,8 @@ static void lowcomms_error_report(struct sock *sk)
 				   dlm_config.ci_tcp_port, sk->sk_err,
 				   sk->sk_err_soft);
 	}
+
+	dlm_lowcomms_next_addr();
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
 	if (orig_report)
@@ -572,7 +575,7 @@ static void add_sock(struct socket *sock, struct connection *con)
 static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 			  int *addr_len)
 {
-	saddr->ss_family =  dlm_local_addr[0]->ss_family;
+	saddr->ss_family =  dlm_local_addr[dlm_local_idx]->ss_family;
 	if (saddr->ss_family == AF_INET) {
 		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
 		in4_addr->sin_port = cpu_to_be16(port);
@@ -1169,7 +1172,7 @@ static void tcp_connect_to_sock(struct connection *con)
 
 	/* Bind to our cluster-known address connecting to avoid
 	   routing problems */
-	memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
+	memcpy(&src_addr, dlm_local_addr[dlm_local_idx], sizeof(src_addr));
 	make_sockaddr(&src_addr, 0, &addr_len);
 	result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
 				 addr_len);
@@ -1213,6 +1216,7 @@ static void tcp_connect_to_sock(struct connection *con)
 			  con->retries, result);
 		mutex_unlock(&con->sock_mutex);
 		msleep(1000);
+		dlm_lowcomms_next_addr();
 		lowcomms_connect_sock(con);
 		return;
 	}
@@ -1293,6 +1297,7 @@ static void init_local(void)
 	struct sockaddr_storage sas, *addr;
 	int i;
 
+	dlm_local_idx = 0;
 	dlm_local_count = 0;
 	for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) {
 		if (dlm_our_addr(&sas, i))
@@ -1375,16 +1380,9 @@ static int tcp_listen_for_all(void)
 	if (!con)
 		return -ENOMEM;
 
-	/* We don't support multi-homed hosts */
-	if (dlm_local_addr[1] != NULL) {
-		log_print("TCP protocol can't handle multi-homed hosts, "
-			  "try SCTP");
-		return -EINVAL;
-	}
-
 	log_print("Using TCP for communications");
 
-	sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
+	sock = tcp_create_listen_sock(con, dlm_local_addr[dlm_local_idx]);
 	if (sock) {
 		add_sock(sock, con);
 		result = 0;
@@ -1716,6 +1714,20 @@ static void work_flush(void)
 	} while (!ok);
 }
 
+void dlm_lowcomms_next_addr(void)
+{
+	if (!dlm_local_count)
+		init_local();
+
+	if (dlm_local_idx < dlm_local_count)
+		dlm_local_idx++;
+	else
+		dlm_local_idx = 0;
+
+	dlm_lowcomms_stop();
+	dlm_lowcomms_start();
+}
+
 void dlm_lowcomms_stop(void)
 {
 	/* Set all the flags to prevent any
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 67462e54fc2f..7c7c5e4b5549 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -22,6 +22,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 void dlm_lowcomms_commit_buffer(void *mh);
 int dlm_lowcomms_connect_node(int nodeid);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
+void dlm_lowcomms_next_addr(void);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Cluster-devel] [PATCH 3/3] dlm: allow binding to all network interfaces
  2019-01-08 15:08 [Cluster-devel] [PATCH 1/3] dlm: check if workqueues are NULL before destroying David Windsor
  2019-01-08 15:08 ` [Cluster-devel] [PATCH 2/3] dlm: add TCP multihoming/failover support David Windsor
@ 2019-01-08 15:08 ` David Windsor
  1 sibling, 0 replies; 3+ messages in thread
From: David Windsor @ 2019-01-08 15:08 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Currently, in the kernel, DLM only is able to bind its
listen socket to a single network interface.  To support
more robust network configurations, DLM should be able
to bind to all network interfaces.

This patch adds a configfs node to enable/disable binding
to all network interfaces.  When 1 is written to this
configfs node, the DLM listen socket will bind to all network
interfaces.  When 0 is written to the node, DLM will bind
only to its current local network interface.

Signed-off-by: David Windsor <dwindsor@redhat.com>
---
 fs/dlm/config.c   | 21 +++++++++++++++++++++
 fs/dlm/config.h   |  3 ++-
 fs/dlm/lowcomms.c | 19 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 23d2677e10bd..77dc325c1972 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -29,6 +29,7 @@
  * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
  * /config/dlm/<cluster>/comms/<comm>/nodeid
  * /config/dlm/<cluster>/comms/<comm>/local
+ * /config/dlm/<cluster>/comms/<comm>/bind_all
  * /config/dlm/<cluster>/comms/<comm>/addr      (write only)
  * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
  * /config/dlm/<cluster>/comms/<comm>/error	(write only)
@@ -39,6 +40,7 @@ static struct config_group *space_list;
 static struct config_group *comm_list;
 static struct dlm_comm *local_comm;
 static uint32_t dlm_comm_count;
+static int bind_all;
 
 struct dlm_clusters;
 struct dlm_cluster;
@@ -197,6 +199,7 @@ static struct configfs_attribute *cluster_attrs[] = {
 enum {
 	COMM_ATTR_NODEID = 0,
 	COMM_ATTR_LOCAL,
+	COMM_ATTR_BIND_ALL,
 	COMM_ATTR_ADDR,
 	COMM_ATTR_ADDR_LIST,
 	COMM_ATTR_ERROR,
@@ -681,8 +684,20 @@ static ssize_t comm_error_store(struct config_item *item, const char *buf,
 	return len;
 }
 
+static ssize_t comm_bind_all_show(struct config_item *item, char *buf)
+{
+	return sprintf(buf, "%d\n", bind_all);
+}
+
+static ssize_t comm_bind_all_store(struct config_item *item, const char *buf,
+				   size_t len)
+{
+	return kstrtoint(buf, 0, &bind_all);
+}
+
 CONFIGFS_ATTR(comm_, nodeid);
 CONFIGFS_ATTR(comm_, local);
+CONFIGFS_ATTR(comm_, bind_all);
 CONFIGFS_ATTR_WO(comm_, error);
 CONFIGFS_ATTR_WO(comm_, addr);
 CONFIGFS_ATTR_RO(comm_, addr_list);
@@ -693,6 +708,7 @@ static struct configfs_attribute *comm_attrs[] = {
 	[COMM_ATTR_ADDR] = &comm_attr_addr,
 	[COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list,
 	[COMM_ATTR_ERROR] = &comm_attr_error,
+	[COMM_ATTR_BIND_ALL] = &comm_attr_bind_all,
 	NULL,
 };
 
@@ -868,6 +884,11 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 	return 0;
 }
 
+int dlm_bind_all(void)
+{
+	return bind_all;
+}
+
 /* Config file defaults */
 #define DEFAULT_TCP_PORT       21064
 #define DEFAULT_BUFFER_SIZE     4096
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 6041eec886ab..e3fd8ce45874 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -21,7 +21,7 @@ struct dlm_config_node {
 	uint32_t comm_seq;
 };
 
-#define DLM_MAX_ADDR_COUNT 3
+#define DLM_MAX_ADDR_COUNT 9
 
 struct dlm_config_info {
 	int ci_tcp_port;
@@ -49,6 +49,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
 int dlm_comm_seq(int nodeid, uint32_t *seq);
 int dlm_our_nodeid(void);
 int dlm_our_addr(struct sockaddr_storage *addr, int num);
+int dlm_bind_all(void);
 
 #endif				/* __CONFIG_DOT_H__ */
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index d37af1372ed0..8b9d02485116 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1374,6 +1374,9 @@ static int sctp_listen_for_all(void)
 static int tcp_listen_for_all(void)
 {
 	struct socket *sock = NULL;
+	struct sockaddr_in *sin4;
+	struct sockaddr_in6 *sin6;
+	struct sockaddr_storage sas, laddr;
 	struct connection *con = nodeid2con(0, GFP_NOFS);
 	int result = -EINVAL;
 
@@ -1382,7 +1385,21 @@ static int tcp_listen_for_all(void)
 
 	log_print("Using TCP for communications");
 
-	sock = tcp_create_listen_sock(con, dlm_local_addr[dlm_local_idx]);
+	memcpy(&sas, dlm_local_addr[dlm_local_idx], sizeof(sas));
+	memcpy(&laddr, dlm_local_addr[dlm_local_idx], sizeof(laddr));
+	if (dlm_bind_all()) {
+		if (sas.ss_family == AF_INET) {
+			sin4 = (struct sockaddr_in *) &sas;
+			sin4->sin_addr.s_addr = htonl(INADDR_ANY);
+			memcpy(&laddr, sin4, sizeof(laddr));
+		} else {
+			sin6 = (struct sockaddr_in6 *) &sas;
+			sin6->sin6_addr = in6addr_any;
+			memcpy(&laddr, sin6, sizeof(laddr));
+		}
+	}
+
+	sock = tcp_create_listen_sock(con, &laddr);
 	if (sock) {
 		add_sock(sock, con);
 		result = 0;
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-01-08 15:08 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-01-08 15:08 [Cluster-devel] [PATCH 1/3] dlm: check if workqueues are NULL before destroying David Windsor
2019-01-08 15:08 ` [Cluster-devel] [PATCH 2/3] dlm: add TCP multihoming/failover support David Windsor
2019-01-08 15:08 ` [Cluster-devel] [PATCH 3/3] dlm: allow binding to all network interfaces David Windsor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).