All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [patch 0/3] OCFS Configurable timeouts from userspace
@ 2006-11-17 23:36 abeekhof
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 1/3] OCFS2 - Expose struct o2nm_cluster abeekhof
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: abeekhof @ 2006-11-17 23:36 UTC (permalink / raw)
  To: ocfs2-devel

--

Jeff has asked me to finish off his configurable timeouts patch.... 
here's the results :-)

One thing I'm not so happy about are the changes (I made) to o2net_msg struct 
in order to satisfy the logic in o2net_check_handshake()

Presumably the logic in o2net_check_handshake() should be changed?

Andrew

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 1/3] OCFS2 - Expose struct o2nm_cluster
  2006-11-17 23:36 [Ocfs2-devel] [patch 0/3] OCFS Configurable timeouts from userspace abeekhof
@ 2006-11-17 23:36 ` abeekhof
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes abeekhof
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: abeekhof @ 2006-11-17 23:36 UTC (permalink / raw)
  To: ocfs2-devel

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
---
 fs/ocfs2/cluster/nodemanager.c |   13 +------------
 fs/ocfs2/cluster/nodemanager.h |   14 ++++++++++++++
 2 files changed, 15 insertions(+), 12 deletions(-)

Index: fs/ocfs2/cluster/nodemanager.c
===================================================================
--- fs/ocfs2/cluster/nodemanager.c.orig	2006-11-15 11:58:34.000000000 +0100
+++ fs/ocfs2/cluster/nodemanager.c	2006-11-15 12:02:57.000000000 +0100
@@ -35,7 +35,7 @@
 /* for now we operate under the assertion that there can be only one
  * cluster active at a time.  Changing this will require trickling
  * cluster references throughout where nodes are looked up */
-static struct o2nm_cluster *o2nm_single_cluster = NULL;
+struct o2nm_cluster *o2nm_single_cluster = NULL;
 
 #define OCFS2_MAX_HB_CTL_PATH 256
 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
@@ -97,17 +97,6 @@ const char *o2nm_get_hb_ctl_path(void)
 }
 EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path);
 
-struct o2nm_cluster {
-	struct config_group	cl_group;
-	unsigned		cl_has_local:1;
-	u8			cl_local_node;
-	rwlock_t		cl_nodes_lock;
-	struct o2nm_node  	*cl_nodes[O2NM_MAX_NODES];
-	struct rb_root		cl_node_ip_tree;
-	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
-	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
-};
-
 struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
 {
 	struct o2nm_node *node = NULL;
Index: fs/ocfs2/cluster/nodemanager.h
===================================================================
--- fs/ocfs2/cluster/nodemanager.h.orig	2006-11-15 12:01:55.000000000 +0100
+++ fs/ocfs2/cluster/nodemanager.h	2006-11-15 12:03:25.000000000 +0100
@@ -53,6 +53,20 @@ struct o2nm_node {
 	unsigned long		nd_set_attributes;
 };
 
+struct o2nm_cluster {
+	struct config_group	cl_group;
+	unsigned		cl_has_local:1;
+	u8			cl_local_node;
+	rwlock_t		cl_nodes_lock;
+	struct o2nm_node  	*cl_nodes[O2NM_MAX_NODES];
+	struct rb_root		cl_node_ip_tree;
+
+	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
+	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
+};
+
+extern struct o2nm_cluster *o2nm_single_cluster;
+
 u8 o2nm_this_node(void);
 
 int o2nm_configured_node_map(unsigned long *map, unsigned bytes);

--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes
  2006-11-17 23:36 [Ocfs2-devel] [patch 0/3] OCFS Configurable timeouts from userspace abeekhof
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 1/3] OCFS2 - Expose struct o2nm_cluster abeekhof
@ 2006-11-17 23:36 ` abeekhof
  2006-11-19 19:57   ` Mark Fasheh
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts abeekhof
  2006-11-18  2:35 ` [Ocfs2-devel] Re: [patch 0/3] OCFS Configurable timeouts from userspace Andrew Beekhof
  3 siblings, 1 reply; 13+ messages in thread
From: abeekhof @ 2006-11-17 23:36 UTC (permalink / raw)
  To: ocfs2-devel

The addition of two dummy fields is a temporary measure to 
   satisfy the logic in o2net_check_handshake() and will be
   rectified in a future version of this patch

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
---
 fs/ocfs2/cluster/tcp.c          |   44 ++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/cluster/tcp.h          |    2 +
 fs/ocfs2/cluster/tcp_internal.h |    6 ++++-
 3 files changed, 51 insertions(+), 1 deletion(-)


Index: fs/ocfs2/cluster/tcp.c
===================================================================
--- fs/ocfs2/cluster/tcp.c.orig	2006-11-17 16:26:15.000000000 +0100
+++ fs/ocfs2/cluster/tcp.c	2006-11-17 16:26:27.000000000 +0100
@@ -1121,6 +1121,33 @@ static int o2net_check_handshake(struct 
 		return -1;
 	}
 
+	/*
+	 * Ensure timeouts are consistent with other nodes, otherwise
+	 * we can end up with one node thinking that the other must be down,
+	 * but isn't. This can ultimately cause corruption.
+	 */
+	if (be32_to_cpu(hand->o2net_idle_timeout_ms) !=
+				o2net_idle_timeout(sc->sc_node)) {
+		mlog(ML_NOTICE, SC_NODEF_FMT " uses network idle timeout of "
+		     "%u ms, but we use %u ms locally.  disconnecting\n",
+		     SC_NODEF_ARGS(sc),
+		     be32_to_cpu(hand->o2net_idle_timeout_ms),
+		     o2net_idle_timeout(sc->sc_node));
+		o2net_ensure_shutdown(nn, sc, -ENOTCONN);
+		return -1;
+	}
+
+	if (be32_to_cpu(hand->o2net_keepalive_delay_ms) !=
+			o2net_keepalive_delay(sc->sc_node)) {
+		mlog(ML_NOTICE, SC_NODEF_FMT " uses keepalive delay of "
+		     "%u ms, but we use %u ms locally.  disconnecting\n",
+		     SC_NODEF_ARGS(sc),
+		     be32_to_cpu(hand->o2net_keepalive_delay_ms),
+		     o2net_keepalive_delay(sc->sc_node));
+		o2net_ensure_shutdown(nn, sc, -ENOTCONN);
+		return -1;
+	}
+
 	sc->sc_handshake_ok = 1;
 
 	spin_lock(&nn->nn_lock);
@@ -1269,6 +1296,21 @@ static int o2net_set_nodelay(struct sock
 	return ret;
 }
 
+static void o2net_initialize_handshake(void)
+{
+	static int initialized = 0;
+	if(initialized)
+		return;
+
+	initialized = 1;
+	o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
+		o2net_idle_timeout(NULL));
+	o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
+		o2net_keepalive_delay(NULL));
+	o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
+		o2net_reconnect_delay(NULL));
+}
+
 /* ------------------------------------------------------------ */
 
 /* called when a connect completes and after a sock is accepted.  the
@@ -1281,6 +1323,7 @@ static void o2net_sc_connect_completed(v
               (unsigned long long)O2NET_PROTOCOL_VERSION,
 	      (unsigned long long)be64_to_cpu(o2net_hand->connector_id));
 
+	o2net_initialize_handshake();
 	o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
 	sc_put(sc);
 }
@@ -1668,6 +1711,7 @@ static int o2net_accept_one(struct socke
 	o2net_register_callbacks(sc->sc_sock->sk, sc);
 	o2net_sc_queue_work(sc, &sc->sc_rx_work);
 
+	o2net_initialize_handshake();
 	o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand));
 
 out:
Index: fs/ocfs2/cluster/tcp.h
===================================================================
--- fs/ocfs2/cluster/tcp.h.orig	2006-11-17 16:26:15.000000000 +0100
+++ fs/ocfs2/cluster/tcp.h	2006-11-17 16:26:27.000000000 +0100
@@ -47,6 +47,8 @@ struct o2net_msg
 	__be32 status;
 	__be32 key;
 	__be32 msg_num;
+	__be64 dummy;
+	__be32 dummy2;
 	__u8  buf[0];
 };
 
Index: fs/ocfs2/cluster/tcp_internal.h
===================================================================
--- fs/ocfs2/cluster/tcp_internal.h.orig	2006-11-17 16:26:15.000000000 +0100
+++ fs/ocfs2/cluster/tcp_internal.h	2006-11-17 16:26:54.000000000 +0100
@@ -48,10 +48,14 @@
  * 	- full 64 bit i_size in the metadata lock lvbs
  * 	- introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 4ULL
+#define O2NET_PROTOCOL_VERSION 5ULL
 struct o2net_handshake {
 	__be64	protocol_version;
 	__be64	connector_id;
+	__be32  o2hb_heartbeat_timeout_ms;
+	__be32  o2net_idle_timeout_ms;
+	__be32  o2net_keepalive_delay_ms;
+	__be32  o2net_reconnect_delay_ms;
 };
 
 struct o2net_node {

--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts
  2006-11-17 23:36 [Ocfs2-devel] [patch 0/3] OCFS Configurable timeouts from userspace abeekhof
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 1/3] OCFS2 - Expose struct o2nm_cluster abeekhof
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes abeekhof
@ 2006-11-17 23:36 ` abeekhof
  2006-11-19 19:47   ` Mark Fasheh
  2006-11-18  2:35 ` [Ocfs2-devel] Re: [patch 0/3] OCFS Configurable timeouts from userspace Andrew Beekhof
  3 siblings, 1 reply; 13+ messages in thread
From: abeekhof @ 2006-11-17 23:36 UTC (permalink / raw)
  To: ocfs2-devel

Signed-off-by: Andrew Beekhof <abeekhof@suse.de>
---
 fs/ocfs2/cluster/nodemanager.c  |  164 ++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/cluster/nodemanager.h  |    3 
 fs/ocfs2/cluster/tcp.c          |   60 +++++++++++---
 fs/ocfs2/cluster/tcp.h          |    7 +
 fs/ocfs2/cluster/tcp_internal.h |    6 -
 5 files changed, 222 insertions(+), 18 deletions(-)


Index: fs/ocfs2/cluster/nodemanager.c
===================================================================
--- fs/ocfs2/cluster/nodemanager.c.orig	2006-11-15 12:02:57.000000000 +0100
+++ fs/ocfs2/cluster/nodemanager.c	2006-11-15 12:06:24.000000000 +0100
@@ -532,6 +532,164 @@ static struct o2nm_node_group *to_o2nm_n
 }
 #endif
 
+struct o2nm_cluster_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(struct o2nm_cluster *, char *);
+	ssize_t (*store)(struct o2nm_cluster *, const char *, size_t);
+};
+
+static ssize_t o2nm_cluster_attr_write(const char *page, ssize_t count,
+                                       unsigned int *val)
+{
+	unsigned long tmp;
+	char *p = (char *)page;
+
+	tmp = simple_strtoul(p, &p, 0);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp == 0)
+		return -EINVAL;
+	if (tmp >= (u32)-1)
+		return -ERANGE;
+
+	*val = tmp;
+
+	return count;
+}
+
+static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(struct o2nm_cluster *cluster,
+                                                 char *page)
+{
+	return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
+}
+
+static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(struct o2nm_cluster *cluster,
+                                                  const char *page,
+						  size_t count)
+{
+	ssize_t ret;
+	unsigned int val;
+
+	ret =  o2nm_cluster_attr_write(page, count, &val);
+
+	if (ret > 0) {
+		if (val <= cluster->cl_keepalive_delay_ms) {
+			mlog(ML_NOTICE, "o2net: idle timeout must be larger "
+			     "than keepalive delay\n");
+			return -EINVAL;
+		}
+		cluster->cl_idle_timeout_ms = val;
+	}
+
+	return ret;
+}
+
+static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(struct o2nm_cluster *cluster,
+                                                 char *page)
+{
+	return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
+}
+
+static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(struct o2nm_cluster *cluster,
+                                                  const char *page,
+						  size_t count)
+{
+	ssize_t ret;
+	unsigned int val;
+
+	ret =  o2nm_cluster_attr_write(page, count, &val);
+
+	if (ret > 0) {
+		if (val >= cluster->cl_idle_timeout_ms) {
+			mlog(ML_NOTICE, "o2net: keepalive delay must be "
+			     "smaller than idle timeout\n");
+			return -EINVAL;
+		}
+		cluster->cl_keepalive_delay_ms = val;
+	}
+
+	return ret;
+}
+
+static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read(struct o2nm_cluster *cluster,
+                                                 char *page)
+{
+	return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms);
+}
+
+static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(struct o2nm_cluster *cluster,
+                                                  const char *page,
+						  size_t count)
+{
+	return o2nm_cluster_attr_write(page, count,
+	                               &cluster->cl_reconnect_delay_ms);
+}
+static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
+	.attr	= { .ca_owner = THIS_MODULE,
+		    .ca_name = "idle_timeout_ms",
+		    .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= o2nm_cluster_attr_idle_timeout_ms_read,
+	.store	= o2nm_cluster_attr_idle_timeout_ms_write,
+};
+
+static struct o2nm_cluster_attribute o2nm_cluster_attr_keepalive_delay_ms = {
+	.attr	= { .ca_owner = THIS_MODULE,
+		    .ca_name = "keepalive_delay_ms",
+		    .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= o2nm_cluster_attr_keepalive_delay_ms_read,
+	.store	= o2nm_cluster_attr_keepalive_delay_ms_write,
+};
+
+static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
+	.attr	= { .ca_owner = THIS_MODULE,
+		    .ca_name = "reconnect_delay_ms",
+		    .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= o2nm_cluster_attr_reconnect_delay_ms_read,
+	.store	= o2nm_cluster_attr_reconnect_delay_ms_write,
+};
+
+static struct configfs_attribute *o2nm_cluster_attrs[] = {
+	&o2nm_cluster_attr_idle_timeout_ms.attr,
+	&o2nm_cluster_attr_keepalive_delay_ms.attr,
+	&o2nm_cluster_attr_reconnect_delay_ms.attr,
+	NULL,
+};
+static ssize_t o2nm_cluster_show(struct config_item *item,
+                                 struct configfs_attribute *attr,
+                                 char *page)
+{
+	struct o2nm_cluster *cluster = to_o2nm_cluster(item);
+	struct o2nm_cluster_attribute *o2nm_cluster_attr =
+		container_of(attr, struct o2nm_cluster_attribute, attr);
+	ssize_t ret = 0;
+
+	if (o2nm_cluster_attr->show)
+		ret = o2nm_cluster_attr->show(cluster, page);
+	return ret;
+}
+
+static ssize_t o2nm_cluster_store(struct config_item *item,
+                                  struct configfs_attribute *attr,
+                                  const char *page, size_t count)
+{
+	struct o2nm_cluster *cluster = to_o2nm_cluster(item);
+	struct o2nm_cluster_attribute *o2nm_cluster_attr =
+		container_of(attr, struct o2nm_cluster_attribute, attr);
+	ssize_t ret;
+
+	if (o2nm_cluster_attr->store == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = o2nm_cluster_attr->store(cluster, page, count);
+	if (ret < count)
+		goto out;
+out:
+	return ret;
+}
+
 static struct config_item *o2nm_node_group_make_item(struct config_group *group,
 						     const char *name)
 {
@@ -613,10 +771,13 @@ static void o2nm_cluster_release(struct 
 
 static struct configfs_item_operations o2nm_cluster_item_ops = {
 	.release	= o2nm_cluster_release,
+	.show_attribute		= o2nm_cluster_show,
+	.store_attribute	= o2nm_cluster_store,
 };
 
 static struct config_item_type o2nm_cluster_type = {
 	.ct_item_ops	= &o2nm_cluster_item_ops,
+	.ct_attrs	= o2nm_cluster_attrs,
 	.ct_owner	= THIS_MODULE,
 };
 
@@ -667,6 +828,9 @@ static struct config_group *o2nm_cluster
 	cluster->cl_group.default_groups[2] = NULL;
 	rwlock_init(&cluster->cl_nodes_lock);
 	cluster->cl_node_ip_tree = RB_ROOT;
+	cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
+	cluster->cl_idle_timeout_ms    = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
+	cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
 
 	ret = &cluster->cl_group;
 	o2nm_single_cluster = cluster;
Index: fs/ocfs2/cluster/nodemanager.h
===================================================================
--- fs/ocfs2/cluster/nodemanager.h.orig	2006-11-15 12:03:25.000000000 +0100
+++ fs/ocfs2/cluster/nodemanager.h	2006-11-15 12:05:32.000000000 +0100
@@ -60,6 +60,9 @@ struct o2nm_cluster {
 	rwlock_t		cl_nodes_lock;
 	struct o2nm_node  	*cl_nodes[O2NM_MAX_NODES];
 	struct rb_root		cl_node_ip_tree;
+	unsigned int		cl_idle_timeout_ms;
+	unsigned int		cl_keepalive_delay_ms;
+	unsigned int		cl_reconnect_delay_ms;
 
 	/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
 	unsigned long	cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
Index: fs/ocfs2/cluster/tcp.c
===================================================================
--- fs/ocfs2/cluster/tcp.c.orig	2006-11-15 11:58:34.000000000 +0100
+++ fs/ocfs2/cluster/tcp.c	2006-11-15 12:06:24.000000000 +0100
@@ -147,6 +147,28 @@ static void o2net_listen_data_ready(stru
 static void o2net_sc_send_keep_req(void *arg);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
+
+/*
+ * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
+ * losing our parent link to the cluster during shutdown. This can be
+ * solved by adding a pre-removal callback to configfs, or passing
+ * around the cluster with the node. -jeffm
+ */
+static inline int o2net_reconnect_delay(struct o2nm_node *node)
+{
+	return o2nm_single_cluster->cl_reconnect_delay_ms;
+}
+
+static inline int o2net_keepalive_delay(struct o2nm_node *node)
+{
+	return o2nm_single_cluster->cl_keepalive_delay_ms;
+}
+
+static inline int o2net_idle_timeout(struct o2nm_node *node)
+{
+	return o2nm_single_cluster->cl_idle_timeout_ms;
+}
 
 static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
 {
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref 
 {
 	struct o2net_sock_container *sc = container_of(kref,
 					struct o2net_sock_container, sc_kref);
+	BUG_ON(timer_pending(&sc->sc_idle_timeout));
+
 	sclog(sc, "releasing\n");
 
 	if (sc->sc_sock) {
@@ -424,9 +448,9 @@ static void o2net_set_nn_state(struct o2
 		/* delay if we're withing a RECONNECT_DELAY of the
 		 * last attempt */
 		delay = (nn->nn_last_connect_attempt +
-			 msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+			 msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
 			- jiffies;
-		if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+		if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
 			delay = 0;
 		mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
 		queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -1103,7 +1127,7 @@ static int o2net_check_handshake(struct 
 	/* set valid and queue the idle timers only if it hasn't been
 	 * shut down already */
 	if (nn->nn_sc == sc) {
-		o2net_sc_postpone_idle(sc);
+		o2net_sc_reset_idle_timer(sc);
 		o2net_set_nn_state(nn, sc, 1, 0);
 	}
 	spin_unlock(&nn->nn_lock);
@@ -1280,8 +1304,10 @@ static void o2net_idle_timer(unsigned lo
 
 	do_gettimeofday(&now);
 
-	printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for 10 "
-	     "seconds, shutting it down.\n", SC_NODEF_ARGS(sc));
+	printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
+	     "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
+		     o2net_idle_timeout(sc->sc_node) / 1000,
+		     o2net_idle_timeout(sc->sc_node) % 1000);
 	mlog(ML_NOTICE, "here are some times that might help debug the "
 	     "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
 	     "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1299,14 +1325,21 @@ static void o2net_idle_timer(unsigned lo
 	o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
 }
 
-static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
 {
 	o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
 	o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
-				    O2NET_KEEPALIVE_DELAY_SECS * HZ);
+		      msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
 	do_gettimeofday(&sc->sc_tv_timer);
 	mod_timer(&sc->sc_idle_timeout,
-		  jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ));
+	       jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
+}
+
+static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+{
+	/* Only push out an existing timer */
+	if (timer_pending(&sc->sc_idle_timeout))
+		o2net_sc_reset_idle_timer(sc);
 }
 
 /* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1427,9 +1460,12 @@ static void o2net_connect_expired(void *
 
 	spin_lock(&nn->nn_lock);
 	if (!nn->nn_sc_valid) {
+		struct o2nm_node *node = nn->nn_sc->sc_node;
 		mlog(ML_ERROR, "no connection established with node %u after "
-		     "%u seconds, giving up and returning errors.\n",
-		     o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS);
+		     "%u.%u seconds, giving up and returning errors.\n",
+		     o2net_num_from_nn(nn),
+		     o2net_idle_timeout(node) / 1000,
+		     o2net_idle_timeout(node) % 1000);
 
 		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
 	}
@@ -1480,14 +1516,14 @@ static void o2net_hb_node_up_cb(struct o
 
 	/* ensure an immediate connect attempt */
 	nn->nn_last_connect_attempt = jiffies -
-		(msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1);
+		(msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
 
 	if (node_num != o2nm_this_node()) {
 		/* heartbeat doesn't work unless a local node number is
 		 * configured and doing so brings up the o2net_wq, so we can
 		 * use it.. */
 		queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
-				   O2NET_IDLE_TIMEOUT_SECS * HZ);
+		                   msecs_to_jiffies(o2net_idle_timeout(node)));
 
 		/* believe it or not, accept and node hearbeating testing
 		 * can succeed for this node before we got here.. so
Index: fs/ocfs2/cluster/tcp.h
===================================================================
--- fs/ocfs2/cluster/tcp.h.orig	2006-11-15 11:58:34.000000000 +0100
+++ fs/ocfs2/cluster/tcp.h	2006-11-15 12:06:25.000000000 +0100
@@ -54,6 +54,13 @@ typedef int (o2net_msg_handler_func)(str
 
 #define O2NET_MAX_PAYLOAD_BYTES  (4096 - sizeof(struct o2net_msg))
 
+/* same as hb delay, we're waiting for another node to recognize our hb */
+#define O2NET_RECONNECT_DELAY_MS_DEFAULT	2000	
+
+#define O2NET_KEEPALIVE_DELAY_MS_DEFAULT	5000
+#define O2NET_IDLE_TIMEOUT_MS_DEFAULT		10000
+
+
 /* TODO: figure this out.... */
 static inline int o2net_link_down(int err, struct socket *sock)
 {
Index: fs/ocfs2/cluster/tcp_internal.h
===================================================================
--- fs/ocfs2/cluster/tcp_internal.h.orig	2006-11-15 11:58:34.000000000 +0100
+++ fs/ocfs2/cluster/tcp_internal.h	2006-11-15 12:06:26.000000000 +0100
@@ -27,17 +27,11 @@
 #define O2NET_MSG_KEEP_REQ_MAGIC  ((u16)0xfa57)
 #define O2NET_MSG_KEEP_RESP_MAGIC ((u16)0xfa58)
 
-/* same as hb delay, we're waiting for another node to recognize our hb */
-#define O2NET_RECONNECT_DELAY_MS	O2HB_REGION_TIMEOUT_MS
-
 /* we're delaying our quorum decision so that heartbeat will have timed
  * out truly dead nodes by the time we come around to making decisions
  * on their number */
 #define O2NET_QUORUM_DELAY_MS	((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
 
-#define O2NET_KEEPALIVE_DELAY_SECS	5
-#define O2NET_IDLE_TIMEOUT_SECS		10
-
 /* 
  * This version number represents quite a lot, unfortunately.  It not
  * only represents the raw network message protocol on the wire but also

--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] Re: [patch 0/3] OCFS Configurable timeouts from userspace
  2006-11-17 23:36 [Ocfs2-devel] [patch 0/3] OCFS Configurable timeouts from userspace abeekhof
                   ` (2 preceding siblings ...)
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts abeekhof
@ 2006-11-18  2:35 ` Andrew Beekhof
  3 siblings, 0 replies; 13+ messages in thread
From: Andrew Beekhof @ 2006-11-18  2:35 UTC (permalink / raw)
  To: ocfs2-devel

Apologies, quilt seems to have misplaced the comments at the top of  
each patch:

Patch 1:
From: Andrew Beekhof <abeekhof@suse.de>
Subject: OCFS2 - Expose struct o2nm_cluster

Subsequent patches (namely userspace heartbeat and configurable  
timeouts)
   require access to the o2nm_cluster struct.  This patch does the
   necessary shuffling.

Patch 2:
From: Jeff Mahoney <jeffm@suse.de>
Subject: OCFS2 Configurable timeouts

Allow configuration of OCFS2 timeouts from userspace via configfs

Patch 3:
From: Andrew Beekhof <abeekhof@suse.de>
Subject: OCFS2 Configurable timeouts - Protocol changes

Modify the OCFS2 handshake to ensure essential timeouts are configured
   identically on all nodes.
Included is the field for userspace-heartbeat timeout to avoid the  
need for
   further protocol changes.

The addition of two dummy fields is a temporary measure to
    satisfy the logic in o2net_check_handshake() and will be
    rectified in a future version of this patch


On Nov 18, 2006, at 8:36 AM, abeekhof@suse.de wrote:

> --
>
> Jeff has asked me to finish off his configurable timeouts patch....
> here's the results :-)
>
> One thing I'm not so happy about are the changes (I made) to  
> o2net_msg struct
> in order to satisfy the logic in o2net_check_handshake()
>
> Presumably the logic in o2net_check_handshake() should be changed?
>
> Andrew

--
Andrew Beekhof

"Would the last person to leave please turn out the enlightenment?" -  
TISM

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts abeekhof
@ 2006-11-19 19:47   ` Mark Fasheh
  2006-11-19 23:30     ` Andrew Beekhof
  0 siblings, 1 reply; 13+ messages in thread
From: Mark Fasheh @ 2006-11-19 19:47 UTC (permalink / raw)
  To: ocfs2-devel

Hi Andrew,
	Thanks for sending these out. Your first patch looks fine. My
comments for this one are inlined below.

On Sat, Nov 18, 2006 at 08:36:02AM +0100, abeekhof@suse.de wrote:
> +static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(struct o2nm_cluster *cluster,
> +                                                 char *page)
> +{
> +	return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
> +}
> +
> +static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(struct o2nm_cluster *cluster,
> +                                                  const char *page,
> +						  size_t count)
> +{
> +	ssize_t ret;
> +	unsigned int val;
> +
> +	ret =  o2nm_cluster_attr_write(page, count, &val);
> +
> +	if (ret > 0) {
> +		if (val <= cluster->cl_keepalive_delay_ms) {
> +			mlog(ML_NOTICE, "o2net: idle timeout must be larger "
> +			     "than keepalive delay\n");
> +			return -EINVAL;
> +		}
> +		cluster->cl_idle_timeout_ms = val;
> +	}
> +
> +	return ret;
> +}
> +
> +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(struct o2nm_cluster *cluster,
> +                                                 char *page)
> +{
> +	return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
> +}
> +
> +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(struct o2nm_cluster *cluster,
> +                                                  const char *page,
> +						  size_t count)
> +{
> +	ssize_t ret;
> +	unsigned int val;
> +
> +	ret =  o2nm_cluster_attr_write(page, count, &val);
> +
> +	if (ret > 0) {
> +		if (val >= cluster->cl_idle_timeout_ms) {
> +			mlog(ML_NOTICE, "o2net: keepalive delay must be "
> +			     "smaller than idle timeout\n");
> +			return -EINVAL;
> +		}
> +		cluster->cl_keepalive_delay_ms = val;
> +	}
> +
> +	return ret;
> +}
Is it safe for us to allow the keepalive delay and the idle timeout values
to change while the cluster is up? The next patch should be checking that
the values are the same on all nodes, but if we can change it mid stream,
what's the point?


> Index: fs/ocfs2/cluster/tcp.c
> ===================================================================
> --- fs/ocfs2/cluster/tcp.c.orig	2006-11-15 11:58:34.000000000 +0100
> +++ fs/ocfs2/cluster/tcp.c	2006-11-15 12:06:24.000000000 +0100
> @@ -147,6 +147,28 @@ static void o2net_listen_data_ready(stru
>  static void o2net_sc_send_keep_req(void *arg);
>  static void o2net_idle_timer(unsigned long data);
>  static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
> +static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
> +
> +/*
> + * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
> + * losing our parent link to the cluster during shutdown. This can be
> + * solved by adding a pre-removal callback to configfs, or passing
> + * around the cluster with the node. -jeffm
> + */
> +static inline int o2net_reconnect_delay(struct o2nm_node *node)
> +{
> +	return o2nm_single_cluster->cl_reconnect_delay_ms;
> +}
This patch exists now - it's in ocfs2.git:

http://kernel.org/git/?p=linux/kernel/git/mfasheh/ocfs2.git;a=commit;h=e47575895914402e1e72f00b3db539f468b849e3

You can probably just add a patch on top of all these to fix that to use
the new callback.
	--Mark

--
Mark Fasheh
Senior Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes
  2006-11-17 23:36 ` [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes abeekhof
@ 2006-11-19 19:57   ` Mark Fasheh
  2006-11-19 23:22     ` Andrew Beekhof
  0 siblings, 1 reply; 13+ messages in thread
From: Mark Fasheh @ 2006-11-19 19:57 UTC (permalink / raw)
  To: ocfs2-devel

On Sat, Nov 18, 2006 at 08:36:03AM +0100, abeekhof@suse.de wrote:
> The addition of two dummy fields is a temporary measure to 
>    satisfy the logic in o2net_check_handshake() and will be
>    rectified in a future version of this patch
Maybe I'm being silly, but I just can't seem to figure out precisely which
logic you're talking about here. Can you elaborate, please?


> +static void o2net_initialize_handshake(void)
> +{
> +	static int initialized = 0;
> +	if(initialized)
> +		return;
> +
> +	initialized = 1;
> +	o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
> +		o2net_idle_timeout(NULL));
> +	o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
> +		o2net_keepalive_delay(NULL));
> +	o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
> +		o2net_reconnect_delay(NULL));
> +}
We should populate o2hb_heartbeat_timeout_ms here. Checking against it in
o2net_check_handshake() would also be good.

Thanks,
	--Mark

--
Mark Fasheh
Senior Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes
  2006-11-19 19:57   ` Mark Fasheh
@ 2006-11-19 23:22     ` Andrew Beekhof
  2006-11-20  9:38       ` Mark Fasheh
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Beekhof @ 2006-11-19 23:22 UTC (permalink / raw)
  To: ocfs2-devel


On Nov 20, 2006, at 4:57 AM, Mark Fasheh wrote:

> On Sat, Nov 18, 2006 at 08:36:03AM +0100, abeekhof@suse.de wrote:
>> The addition of two dummy fields is a temporary measure to
>>    satisfy the logic in o2net_check_handshake() and will be
>>    rectified in a future version of this patch
> Maybe I'm being silly, but I just can't seem to figure out  
> precisely which
> logic you're talking about here. Can you elaborate, please?

Sorry, I meant the logic that controlls when o2net_check_handshake()  
is called by o2net_advance_rx().

Specifically, this comment:
			/* this working relies on the handshake being
			 * smaller than the normal message header */


>> +static void o2net_initialize_handshake(void)
>> +{
>> +	static int initialized = 0;
>> +	if(initialized)
>> +		return;
>> +
>> +	initialized = 1;
>> +	o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
>> +		o2net_idle_timeout(NULL));
>> +	o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
>> +		o2net_keepalive_delay(NULL));
>> +	o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
>> +		o2net_reconnect_delay(NULL));
>> +}
> We should populate o2hb_heartbeat_timeout_ms here.

is a constant ok?  I left out all other references to heartbeat  
timeouts.

> Checking against it in
> o2net_check_handshake() would also be good.
>
> Thanks,
> 	--Mark
>
> --
> Mark Fasheh
> Senior Software Developer, Oracle
> mark.fasheh@oracle.com

--
Andrew Beekhof

"Would the last person to leave please turn out the enlightenment?" -  
TISM

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts
  2006-11-19 19:47   ` Mark Fasheh
@ 2006-11-19 23:30     ` Andrew Beekhof
  2006-11-20  9:39       ` Mark Fasheh
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Beekhof @ 2006-11-19 23:30 UTC (permalink / raw)
  To: ocfs2-devel


On Nov 20, 2006, at 4:47 AM, Mark Fasheh wrote:

> Hi Andrew,
> 	Thanks for sending these out. Your first patch looks fine. My
> comments for this one are inlined below.
>
> On Sat, Nov 18, 2006 at 08:36:02AM +0100, abeekhof@suse.de wrote:
>> +static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(struct  
>> o2nm_cluster *cluster,
>> +                                                 char *page)
>> +{
>> +	return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms);
>> +}
>> +
>> +static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(struct  
>> o2nm_cluster *cluster,
>> +                                                  const char *page,
>> +						  size_t count)
>> +{
>> +	ssize_t ret;
>> +	unsigned int val;
>> +
>> +	ret =  o2nm_cluster_attr_write(page, count, &val);
>> +
>> +	if (ret > 0) {
>> +		if (val <= cluster->cl_keepalive_delay_ms) {
>> +			mlog(ML_NOTICE, "o2net: idle timeout must be larger "
>> +			     "than keepalive delay\n");
>> +			return -EINVAL;
>> +		}
>> +		cluster->cl_idle_timeout_ms = val;
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(struct  
>> o2nm_cluster *cluster,
>> +                                                 char *page)
>> +{
>> +	return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms);
>> +}
>> +
>> +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(struct  
>> o2nm_cluster *cluster,
>> +                                                  const char *page,
>> +						  size_t count)
>> +{
>> +	ssize_t ret;
>> +	unsigned int val;
>> +
>> +	ret =  o2nm_cluster_attr_write(page, count, &val);
>> +
>> +	if (ret > 0) {
>> +		if (val >= cluster->cl_idle_timeout_ms) {
>> +			mlog(ML_NOTICE, "o2net: keepalive delay must be "
>> +			     "smaller than idle timeout\n");
>> +			return -EINVAL;
>> +		}
>> +		cluster->cl_keepalive_delay_ms = val;
>> +	}
>> +
>> +	return ret;
>> +}
> Is it safe for us to allow the keepalive delay and the idle timeout  
> values
> to change while the cluster is up? The next patch should be  
> checking that
> the values are the same on all nodes, but if we can change it mid  
> stream,
> what's the point?

good point.
The simplest option would seem to be to lock the values in once we've  
sent the first handshake.
Maybe one day we can allow nodes to (re-)negotiate the intervals but  
locking it in is probably enough for now.

>
>
>> Index: fs/ocfs2/cluster/tcp.c
>> ===================================================================
>> --- fs/ocfs2/cluster/tcp.c.orig	2006-11-15 11:58:34.000000000 +0100
>> +++ fs/ocfs2/cluster/tcp.c	2006-11-15 12:06:24.000000000 +0100
>> @@ -147,6 +147,28 @@ static void o2net_listen_data_ready(stru
>>  static void o2net_sc_send_keep_req(void *arg);
>>  static void o2net_idle_timer(unsigned long data);
>>  static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
>> +static void o2net_sc_reset_idle_timer(struct o2net_sock_container  
>> *sc);
>> +
>> +/*
>> + * FIXME: These should use to_o2nm_cluster_from_node(), but we  
>> end up
>> + * losing our parent link to the cluster during shutdown. This  
>> can be
>> + * solved by adding a pre-removal callback to configfs, or passing
>> + * around the cluster with the node. -jeffm
>> + */
>> +static inline int o2net_reconnect_delay(struct o2nm_node *node)
>> +{
>> +	return o2nm_single_cluster->cl_reconnect_delay_ms;
>> +}
> This patch exists now - it's in ocfs2.git:
>
> http://kernel.org/git/?p=linux/kernel/git/mfasheh/ 
> ocfs2.git;a=commit;h=e47575895914402e1e72f00b3db539f468b849e3
>
> You can probably just add a patch on top of all these to fix that  
> to use
> the new callback.

will do.

> 	--Mark
>
> --
> Mark Fasheh
> Senior Software Developer, Oracle
> mark.fasheh@oracle.com

--
Andrew Beekhof

"Would the last person to leave please turn out the enlightenment?" -  
TISM

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes
  2006-11-19 23:22     ` Andrew Beekhof
@ 2006-11-20  9:38       ` Mark Fasheh
  2006-11-20 10:13         ` Andrew Beekhof
  2006-11-20 10:39         ` Zach Brown
  0 siblings, 2 replies; 13+ messages in thread
From: Mark Fasheh @ 2006-11-20  9:38 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Nov 20, 2006 at 08:22:40AM +0100, Andrew Beekhof wrote:
> >On Sat, Nov 18, 2006 at 08:36:03AM +0100, abeekhof@suse.de wrote:
> >>The addition of two dummy fields is a temporary measure to
> >>   satisfy the logic in o2net_check_handshake() and will be
> >>   rectified in a future version of this patch
> >Maybe I'm being silly, but I just can't seem to figure out  
> >precisely which
> >logic you're talking about here. Can you elaborate, please?
> 
> Sorry, I meant the logic that controlls when o2net_check_handshake()  
> is called by o2net_advance_rx().
> 
> Specifically, this comment:
> 			/* this working relies on the handshake being
> 			 * smaller than the normal message header */
Ahh, ok. Do you have a specific fix in mind? I've CC'd Zach - he
might have an idea. Otherwise, I'm not really too worried about growing the
o2net_msg structure by a little bit - we ought to rename the fields to
something like 'reserved0' and 'reserved1' though.


> >>+static void o2net_initialize_handshake(void)
> >>+{
> >>+	static int initialized = 0;
> >>+	if(initialized)
> >>+		return;
> >>+
> >>+	initialized = 1;
> >>+	o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
> >>+		o2net_idle_timeout(NULL));
> >>+	o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
> >>+		o2net_keepalive_delay(NULL));
> >>+	o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
> >>+		o2net_reconnect_delay(NULL));
> >>+}
> >We should populate o2hb_heartbeat_timeout_ms here.
> 
> is a constant ok?  I left out all other references to heartbeat  
> timeouts.
Why not just populate it with O2HB_MAX_WRITE_TIMEOUT_MS? o2hb_dead_threshold
is already not allowed to be changed once the cluster is up btw.
	--Mark

--
Mark Fasheh
Senior Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts
  2006-11-19 23:30     ` Andrew Beekhof
@ 2006-11-20  9:39       ` Mark Fasheh
  0 siblings, 0 replies; 13+ messages in thread
From: Mark Fasheh @ 2006-11-20  9:39 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Nov 20, 2006 at 08:30:10AM +0100, Andrew Beekhof wrote:
> On Nov 20, 2006, at 4:47 AM, Mark Fasheh wrote:
> >Is it safe for us to allow the keepalive delay and the idle timeout  
> >values
> >to change while the cluster is up? The next patch should be  
> >checking that
> >the values are the same on all nodes, but if we can change it mid  
> >stream,
> >what's the point?
> 
> good point.
> The simplest option would seem to be to lock the values in once we've  
> sent the first handshake.
> Maybe one day we can allow nodes to (re-)negotiate the intervals but  
> locking it in is probably enough for now.
That sounds good to me.
	--Mark

--
Mark Fasheh
Senior Software Developer, Oracle
mark.fasheh@oracle.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes
  2006-11-20  9:38       ` Mark Fasheh
@ 2006-11-20 10:13         ` Andrew Beekhof
  2006-11-20 10:39         ` Zach Brown
  1 sibling, 0 replies; 13+ messages in thread
From: Andrew Beekhof @ 2006-11-20 10:13 UTC (permalink / raw)
  To: ocfs2-devel


On Nov 20, 2006, at 6:38 PM, Mark Fasheh wrote:

> On Mon, Nov 20, 2006 at 08:22:40AM +0100, Andrew Beekhof wrote:
>>> On Sat, Nov 18, 2006 at 08:36:03AM +0100, abeekhof@suse.de wrote:
>>>> The addition of two dummy fields is a temporary measure to
>>>>   satisfy the logic in o2net_check_handshake() and will be
>>>>   rectified in a future version of this patch
>>> Maybe I'm being silly, but I just can't seem to figure out
>>> precisely which
>>> logic you're talking about here. Can you elaborate, please?
>>
>> Sorry, I meant the logic that controlls when o2net_check_handshake()
>> is called by o2net_advance_rx().
>>
>> Specifically, this comment:
>> 			/* this working relies on the handshake being
>> 			 * smaller than the normal message header */
> Ahh, ok. Do you have a specific fix in mind?

not yet - i was mostly worried about making it work (and  
understanding why) so that I could get the rest out for review.

> I've CC'd Zach - he
> might have an idea. Otherwise, I'm not really too worried about  
> growing the
> o2net_msg structure by a little bit - we ought to rename the fields to
> something like 'reserved0' and 'reserved1' though.

chuckle... "what's in a name"

>
>
>>>> +static void o2net_initialize_handshake(void)
>>>> +{
>>>> +	static int initialized = 0;
>>>> +	if(initialized)
>>>> +		return;
>>>> +
>>>> +	initialized = 1;
>>>> +	o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(
>>>> +		o2net_idle_timeout(NULL));
>>>> +	o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32(
>>>> +		o2net_keepalive_delay(NULL));
>>>> +	o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32(
>>>> +		o2net_reconnect_delay(NULL));
>>>> +}
>>> We should populate o2hb_heartbeat_timeout_ms here.
>>
>> is a constant ok?  I left out all other references to heartbeat
>> timeouts.
> Why not just populate it with O2HB_MAX_WRITE_TIMEOUT_MS?

ok

> o2hb_dead_threshold
> is already not allowed to be changed once the cluster is up btw.

ok


--
Andrew Beekhof

"Would the last person to leave please turn out the enlightenment?" -  
TISM

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes
  2006-11-20  9:38       ` Mark Fasheh
  2006-11-20 10:13         ` Andrew Beekhof
@ 2006-11-20 10:39         ` Zach Brown
  1 sibling, 0 replies; 13+ messages in thread
From: Zach Brown @ 2006-11-20 10:39 UTC (permalink / raw)
  To: ocfs2-devel


>> Sorry, I meant the logic that controlls when o2net_check_handshake()  
>> is called by o2net_advance_rx().
>>
>> Specifically, this comment:
>> 			/* this working relies on the handshake being
>> 			 * smaller than the normal message header */
> Ahh, ok. Do you have a specific fix in mind? I've CC'd Zach - he
> might have an idea. Otherwise, I'm not really too worried about growing the
> o2net_msg structure by a little bit - we ought to rename the fields to
> something like 'reserved0' and 'reserved1' though.

The current o2net_advance_rx() is being lazy and is putting the check
for handshake negotiation inside the block which receives the message
header.  The current code will mistake a partially received handshake
header for a msg header once it exceeds the size of the message header.

The fix is easy, just hoist the handshake checking up into it's own
block before the header is received.  It'd look like the header
receiving loop, but would call _check_handshake().  It needs to be
careful not to keep trying to parse headers if it doesn't get the whole
handshake.  So something like:

if (unlikely(!handshake_ok)) {
    setup data and datalen, recv into them;
    if +ve, inc page_off
    if (page_off == sizeof(handshake))
      ret = _check_handshake()
    if (!handshake_ok)
       goto out;
}

You'd want to think harder about the return codes than I have here.

- z

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2006-11-20 10:39 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-11-17 23:36 [Ocfs2-devel] [patch 0/3] OCFS Configurable timeouts from userspace abeekhof
2006-11-17 23:36 ` [Ocfs2-devel] [patch 1/3] OCFS2 - Expose struct o2nm_cluster abeekhof
2006-11-17 23:36 ` [Ocfs2-devel] [patch 3/3] OCFS2 Configurable timeouts - Protocol changes abeekhof
2006-11-19 19:57   ` Mark Fasheh
2006-11-19 23:22     ` Andrew Beekhof
2006-11-20  9:38       ` Mark Fasheh
2006-11-20 10:13         ` Andrew Beekhof
2006-11-20 10:39         ` Zach Brown
2006-11-17 23:36 ` [Ocfs2-devel] [patch 2/3] OCFS2 Configurable timeouts abeekhof
2006-11-19 19:47   ` Mark Fasheh
2006-11-19 23:30     ` Andrew Beekhof
2006-11-20  9:39       ` Mark Fasheh
2006-11-18  2:35 ` [Ocfs2-devel] Re: [patch 0/3] OCFS Configurable timeouts from userspace Andrew Beekhof

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.