* [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 @ 2006-11-29 1:04 abeekhof 2006-11-29 1:04 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof 2006-11-30 17:50 ` [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 Joel Becker 0 siblings, 2 replies; 9+ messages in thread From: abeekhof @ 2006-11-29 1:04 UTC (permalink / raw) To: ocfs2-devel Hopefully this is the final version :-) -- Added a global spinlock around modifications to o2net_connected_peer (as discussed with Mark). I have a separate patch that uses to_o2nm_cluster_from_node() but since I cant reproduce the problem mentioned in Jeff's comment (it apparently needs the userspace heartbeating modifications), I'd prefer to leave it out. Andrew ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch 2006-11-29 1:04 [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 abeekhof @ 2006-11-29 1:04 ` abeekhof 2006-11-29 15:30 ` Zach Brown 2006-11-29 15:31 ` Mark Fasheh 2006-11-30 17:50 ` [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 Joel Becker 1 sibling, 2 replies; 9+ messages in thread From: abeekhof @ 2006-11-29 1:04 UTC (permalink / raw) To: ocfs2-devel From: Andrew Beekhof <abeekhof@suse.de> Subject: [patch 1/1] OCFS2 Configurable timeouts - Protocol changes Modify the OCFS2 handshake to ensure essential timeouts are configured identically on all nodes. Only allow changes when there are no connected peers Improves the logic in o2net_advance_rx() which broke now that sizeof(struct o2net_handshake) is greater than sizeof(struct o2net_msg) Included is the field for userspace-heartbeat timeout to avoid the need for further protocol changes. Uses a global spinlock to ensure the decisions to update configfs entries are made on the correct value. The region covered by the spinlock when incrimenting the counter is much larger as this is the more critical case. Signed-off-by: Andrew Beekhof <abeekhof@suse.de> --- fs/ocfs2/cluster/nodemanager.c | 19 +++++++ fs/ocfs2/cluster/tcp.c | 96 +++++++++++++++++++++++++++++++++++----- fs/ocfs2/cluster/tcp.h | 1 fs/ocfs2/cluster/nodemanager.c | 55 ++++++++++++++------ fs/ocfs2/cluster/tcp.c | 105 +++++++++++++++++++++++++++++++++++----- fs/ocfs2/cluster/tcp.h | 2 fs/ocfs2/cluster/tcp_internal.h | 6 +- 4 files changed, 139 insertions(+), 29 deletions(-) Index: fs/ocfs2/cluster/nodemanager.c =================================================================== --- fs/ocfs2/cluster/nodemanager.c.orig 2006-11-20 16:25:58.000000000 +0100 +++ fs/ocfs2/cluster/nodemanager.c 2006-11-27 09:57:56.000000000 +0100 @@ -558,15 +558,14 @@ static ssize_t o2nm_cluster_attr_write(c return count; } -static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(struct o2nm_cluster *cluster, - char *page) +static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( + struct o2nm_cluster *cluster, char *page) { return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); } -static ssize_t o2nm_cluster_attr_idle_timeout_ms_write(struct o2nm_cluster *cluster, - const char *page, - size_t count) +static ssize_t o2nm_cluster_attr_idle_timeout_ms_write( + struct o2nm_cluster *cluster, const char *page, size_t count) { ssize_t ret; unsigned int val; @@ -574,10 +573,22 @@ static ssize_t o2nm_cluster_attr_idle_ti ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { + if (cluster->cl_idle_timeout_ms != val) { + spin_lock(&connected_lock); + if(o2net_num_connected_peers()) { + mlog(ML_NOTICE, + "o2net: cannot change idle timeout after " + "the first peer has agreed to it." + " %d connected peers\n", + o2net_num_connected_peers()); + ret = -EINVAL; + } + spin_unlock(&connected_lock); + } if (val <= cluster->cl_keepalive_delay_ms) { mlog(ML_NOTICE, "o2net: idle timeout must be larger " "than keepalive delay\n"); - return -EINVAL; + ret = -EINVAL; } cluster->cl_idle_timeout_ms = val; } @@ -585,15 +596,14 @@ static ssize_t o2nm_cluster_attr_idle_ti return ret; } -static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read(struct o2nm_cluster *cluster, - char *page) +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_read( + struct o2nm_cluster *cluster, char *page) { return sprintf(page, "%u\n", cluster->cl_keepalive_delay_ms); } -static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write(struct o2nm_cluster *cluster, - const char *page, - size_t count) +static ssize_t o2nm_cluster_attr_keepalive_delay_ms_write( + struct o2nm_cluster *cluster, const char *page, size_t count) { ssize_t ret; unsigned int val; @@ -601,10 +611,22 @@ static ssize_t o2nm_cluster_attr_keepali ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { + if (cluster->cl_keepalive_delay_ms != val) { + spin_lock(&connected_lock); + if(o2net_num_connected_peers()) { + mlog(ML_NOTICE, + "o2net: cannot change keepalive delay after" + " the first peer has agreed to it." + " %d connected peers\n", + o2net_num_connected_peers()); + ret = -EINVAL; + } + spin_unlock(&connected_lock); + } if (val >= cluster->cl_idle_timeout_ms) { mlog(ML_NOTICE, "o2net: keepalive delay must be " "smaller than idle timeout\n"); - return -EINVAL; + ret = -EINVAL; } cluster->cl_keepalive_delay_ms = val; } @@ -612,15 +634,14 @@ static ssize_t o2nm_cluster_attr_keepali return ret; } -static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read(struct o2nm_cluster *cluster, - char *page) +static ssize_t o2nm_cluster_attr_reconnect_delay_ms_read( + struct o2nm_cluster *cluster, char *page) { return sprintf(page, "%u\n", cluster->cl_reconnect_delay_ms); } -static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(struct o2nm_cluster *cluster, - const char *page, - size_t count) +static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write( + struct o2nm_cluster *cluster, const char *page, size_t count) { return o2nm_cluster_attr_write(page, count, &cluster->cl_reconnect_delay_ms); Index: fs/ocfs2/cluster/tcp.c =================================================================== --- fs/ocfs2/cluster/tcp.c.orig 2006-11-20 16:19:12.000000000 +0100 +++ fs/ocfs2/cluster/tcp.c 2006-11-27 10:41:20.000000000 +0100 @@ -1121,6 +1121,44 @@ static int o2net_check_handshake(struct return -1; } + /* + * Ensure timeouts are consistent with other nodes, otherwise + * we can end up with one node thinking that the other must be down, + * but isn't. This can ultimately cause corruption. + */ + if (be32_to_cpu(hand->o2net_idle_timeout_ms) != + o2net_idle_timeout(sc->sc_node)) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_idle_timeout_ms), + o2net_idle_timeout(sc->sc_node)); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + + if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != + o2net_keepalive_delay(sc->sc_node)) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + o2net_keepalive_delay(sc->sc_node)); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + + if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != + O2HB_MAX_WRITE_TIMEOUT_MS) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + O2HB_MAX_WRITE_TIMEOUT_MS); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + sc->sc_handshake_ok = 1; spin_lock(&nn->nn_lock); @@ -1153,6 +1191,26 @@ static int o2net_advance_rx(struct o2net sclog(sc, "receiving\n"); do_gettimeofday(&sc->sc_tv_advance_start); + if(unlikely(sc->sc_handshake_ok == 0)) { + if(sc->sc_page_off < sizeof(struct o2net_handshake)) { + data = page_address(sc->sc_page) + sc->sc_page_off; + datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; + ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); + if (ret > 0) + sc->sc_page_off += ret; + } + + if (sc->sc_page_off == sizeof(struct o2net_handshake)) { + o2net_check_handshake(sc); + if(sc->sc_handshake_ok == 0) { + BUG_ON(sizeof(struct o2net_handshake) + == sizeof(struct o2net_msg)); + ret = -EPROTO; + } + goto out; + } + } + /* do we need more header? */ if (sc->sc_page_off < sizeof(struct o2net_msg)) { data = page_address(sc->sc_page) + sc->sc_page_off; @@ -1160,15 +1218,6 @@ static int o2net_advance_rx(struct o2net ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); if (ret > 0) { sc->sc_page_off += ret; - - /* this working relies on the handshake being - * smaller than the normal message header */ - if (sc->sc_page_off >= sizeof(struct o2net_handshake)&& - !sc->sc_handshake_ok && o2net_check_handshake(sc)) { - ret = -EPROTO; - goto out; - } - /* only swab incoming here.. we can * only get here once as we cross from * being under to over */ @@ -1178,8 +1227,7 @@ static int o2net_advance_rx(struct o2net O2NET_MAX_PAYLOAD_BYTES) ret = -EOVERFLOW; } - } - if (ret <= 0) + } else goto out; } @@ -1269,6 +1317,18 @@ static int o2net_set_nodelay(struct sock return ret; } +static void o2net_initialize_handshake(void) +{ + o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( + O2HB_MAX_WRITE_TIMEOUT_MS); + o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( + o2net_idle_timeout(NULL)); + o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( + o2net_keepalive_delay(NULL)); + o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( + o2net_reconnect_delay(NULL)); +} + /* ------------------------------------------------------------ */ /* called when a connect completes and after a sock is accepted. the @@ -1281,6 +1341,7 @@ static void o2net_sc_connect_completed(v (unsigned long long)O2NET_PROTOCOL_VERSION, (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); + o2net_initialize_handshake(); o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); sc_put(sc); } @@ -1481,11 +1542,23 @@ static void o2net_still_up(void *arg) /* ------------------------------------------------------------ */ +static int o2net_connected_peers = 0; +spinlock_t connected_lock; + +int o2net_num_connected_peers(void) +{ + return o2net_connected_peers; +} + void o2net_disconnect_node(struct o2nm_node *node) { struct o2net_node *nn = o2net_nn_from_num(node->nd_num); /* don't reconnect until it's heartbeating again */ + spin_lock(&connected_lock); + o2net_connected_peers--; + spin_unlock(&connected_lock); + spin_lock(&nn->nn_lock); o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); spin_unlock(&nn->nn_lock); @@ -1505,13 +1578,17 @@ static void o2net_hb_node_down_cb(struct if (node_num != o2nm_this_node()) o2net_disconnect_node(node); + + BUG_ON(o2net_connected_peers < 0); } + static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, void *data) { struct o2net_node *nn = o2net_nn_from_num(node_num); + spin_lock(&connected_lock); o2quo_hb_up(node_num); /* ensure an immediate connect attempt */ @@ -1519,6 +1596,8 @@ static void o2net_hb_node_up_cb(struct o (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); if (node_num != o2nm_this_node()) { + o2net_connected_peers++; + /* heartbeat doesn't work unless a local node number is * configured and doing so brings up the o2net_wq, so we can * use it.. */ @@ -1534,6 +1613,8 @@ static void o2net_hb_node_up_cb(struct o o2net_set_nn_state(nn, NULL, 0, 0); spin_unlock(&nn->nn_lock); } + + spin_unlock(&connected_lock); } void o2net_unregister_hb_callbacks(void) @@ -1668,6 +1749,7 @@ static int o2net_accept_one(struct socke o2net_register_callbacks(sc->sc_sock->sk, sc); o2net_sc_queue_work(sc, &sc->sc_rx_work); + o2net_initialize_handshake(); o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); out: @@ -1834,6 +1916,7 @@ int o2net_init(void) unsigned long i; o2quo_init(); + spin_lock_init(&connected_lock); o2net_hand = kcalloc(1, sizeof(struct o2net_handshake), GFP_KERNEL); o2net_keep_req = kcalloc(1, sizeof(struct o2net_msg), GFP_KERNEL); Index: fs/ocfs2/cluster/tcp.h =================================================================== --- fs/ocfs2/cluster/tcp.h.orig 2006-11-20 16:19:12.000000000 +0100 +++ fs/ocfs2/cluster/tcp.h 2006-11-27 09:52:12.000000000 +0100 @@ -103,11 +103,13 @@ int o2net_register_handler(u32 msg_type, void o2net_unregister_handler_list(struct list_head *list); struct o2nm_node; +extern spinlock_t connected_lock; int o2net_register_hb_callbacks(void); void o2net_unregister_hb_callbacks(void); int o2net_start_listening(struct o2nm_node *node); void o2net_stop_listening(struct o2nm_node *node); void o2net_disconnect_node(struct o2nm_node *node); +int o2net_num_connected_peers(void); int o2net_init(void); void o2net_exit(void); Index: fs/ocfs2/cluster/tcp_internal.h =================================================================== --- fs/ocfs2/cluster/tcp_internal.h.orig 2006-11-20 16:19:12.000000000 +0100 +++ fs/ocfs2/cluster/tcp_internal.h 2006-11-20 16:25:36.000000000 +0100 @@ -48,10 +48,14 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 4ULL +#define O2NET_PROTOCOL_VERSION 5ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; + __be32 o2hb_heartbeat_timeout_ms; + __be32 o2net_idle_timeout_ms; + __be32 o2net_keepalive_delay_ms; + __be32 o2net_reconnect_delay_ms; }; struct o2net_node { -- ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch 2006-11-29 1:04 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof @ 2006-11-29 15:30 ` Zach Brown 2006-11-29 15:31 ` Mark Fasheh 1 sibling, 0 replies; 9+ messages in thread From: Zach Brown @ 2006-11-29 15:30 UTC (permalink / raw) To: ocfs2-devel > Only allow changes when there are no connected peers I think we can do this differently. > if (ret > 0) { > + if (cluster->cl_idle_timeout_ms != val) { > + spin_lock(&connected_lock); > + if(o2net_num_connected_peers()) { > + mlog(ML_NOTICE, > + "o2net: cannot change idle timeout after " > + "the first peer has agreed to it." > + " %d connected peers\n", > + o2net_num_connected_peers()); > + ret = -EINVAL; > + } > + spin_unlock(&connected_lock); > + } Lose the locking here so this just becomes (paraphrasing) : if (cluster != val && connected()) { ... } > +static int o2net_connected_peers = 0; > +spinlock_t connected_lock; > + > +int o2net_num_connected_peers(void) > +{ > + return o2net_connected_peers; > +} Make this an "atomic_t o2net_connected_peers = ATOMIC_INIT(0);" and then "return atomic_read();". We probably don't really need a heavy-weight atomic_t, but it's trivial and this isn't a fast path. > void o2net_disconnect_node(struct o2nm_node *node) > { > struct o2net_node *nn = o2net_nn_from_num(node->nd_num); > > /* don't reconnect until it's heartbeating again */ > + spin_lock(&connected_lock); > + o2net_connected_peers--; > + spin_unlock(&connected_lock); > + Then don't do this work here and in other places, do it once in o2net_set_nn_state(). something like: if (old_sc != sc) { if (old_sc) atomic_dec(&o2net_connected_peers); else atomic_inc(&o2net_connected_peers); } That's less confusing and catches the place where sockets are in use by a node. - z ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch 2006-11-29 1:04 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof 2006-11-29 15:30 ` Zach Brown @ 2006-11-29 15:31 ` Mark Fasheh 2006-11-30 4:25 ` Andrew Beekhof 1 sibling, 1 reply; 9+ messages in thread From: Mark Fasheh @ 2006-11-29 15:31 UTC (permalink / raw) To: ocfs2-devel Hi Andrew, Things are looking much better, but there's still a few issues that I found while reviewing the patch. I got Zach to look at it too (he's the original author of the ocfs2 network code) which has generated some good comments. On Wed, Nov 29, 2006 at 09:51:31AM +0100, abeekhof@suse.de wrote: > > From: Andrew Beekhof <abeekhof@suse.de> > Subject: [patch 1/1] OCFS2 Configurable timeouts - Protocol changes > > Modify the OCFS2 handshake to ensure essential timeouts are configured > identically on all nodes. > Only allow changes when there are no connected peers > Improves the logic in o2net_advance_rx() which broke now that > sizeof(struct o2net_handshake) is greater than sizeof(struct o2net_msg) > Included is the field for userspace-heartbeat timeout to avoid the need for > further protocol changes. > Uses a global spinlock to ensure the decisions to update configfs entries > are made on the correct value. The region covered by the spinlock when > incrimenting the counter is much larger as this is the more critical case. Nitpick: Can you format that commit log to be a bit more in line with standard kernel commits (the indenting is weird) > Index: fs/ocfs2/cluster/nodemanager.c > =================================================================== > --- fs/ocfs2/cluster/nodemanager.c.orig 2006-11-20 16:25:58.000000000 +0100 > +++ fs/ocfs2/cluster/nodemanager.c 2006-11-27 09:57:56.000000000 +0100 > @@ -558,15 +558,14 @@ static ssize_t o2nm_cluster_attr_write(c > return count; > } > > -static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(struct o2nm_cluster *cluster, > - char *page) > +static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( > + struct o2nm_cluster *cluster, char *page) > { > return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); > } Can you not re-write the function prototypes unless they're actually changing please? It clutters up the patch and makes it harder to find the actual code to check (see below). > @@ -574,10 +573,22 @@ static ssize_t o2nm_cluster_attr_idle_ti > ret = o2nm_cluster_attr_write(page, count, &val); > > if (ret > 0) { > + if (cluster->cl_idle_timeout_ms != val) { > + spin_lock(&connected_lock); > + if(o2net_num_connected_peers()) { > + mlog(ML_NOTICE, > + "o2net: cannot change idle timeout after " > + "the first peer has agreed to it." > + " %d connected peers\n", > + o2net_num_connected_peers()); > + ret = -EINVAL; > + } > + spin_unlock(&connected_lock); > + } > if (val <= cluster->cl_keepalive_delay_ms) { > mlog(ML_NOTICE, "o2net: idle timeout must be larger " > "than keepalive delay\n"); > - return -EINVAL; > + ret = -EINVAL; > } > cluster->cl_idle_timeout_ms = val; I don't know how I missed this before, but you're erroring with a negative return value, yet continuing with the work of setting cluster->cl_idle_timeout_ms anyway. I think we're missing some goto's here and in the similar blocks below. > @@ -1121,6 +1121,44 @@ static int o2net_check_handshake(struct > return -1; > } > > + /* > + * Ensure timeouts are consistent with other nodes, otherwise > + * we can end up with one node thinking that the other must be down, > + * but isn't. This can ultimately cause corruption. > + */ > + if (be32_to_cpu(hand->o2net_idle_timeout_ms) != > + o2net_idle_timeout(sc->sc_node)) { > + mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " > + "%u ms, but we use %u ms locally. disconnecting\n", > + SC_NODEF_ARGS(sc), > + be32_to_cpu(hand->o2net_idle_timeout_ms), > + o2net_idle_timeout(sc->sc_node)); > + o2net_ensure_shutdown(nn, sc, -ENOTCONN); > + return -1; > + } > + > + if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != > + o2net_keepalive_delay(sc->sc_node)) { > + mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " > + "%u ms, but we use %u ms locally. disconnecting\n", > + SC_NODEF_ARGS(sc), > + be32_to_cpu(hand->o2net_keepalive_delay_ms), > + o2net_keepalive_delay(sc->sc_node)); > + o2net_ensure_shutdown(nn, sc, -ENOTCONN); > + return -1; > + } > + > + if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != > + O2HB_MAX_WRITE_TIMEOUT_MS) { > + mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " > + "%u ms, but we use %u ms locally. disconnecting\n", > + SC_NODEF_ARGS(sc), > + be32_to_cpu(hand->o2net_keepalive_delay_ms), We check hearbeat timeout here, but print keepalive delay... > @@ -1153,6 +1191,26 @@ static int o2net_advance_rx(struct o2net > sclog(sc, "receiving\n"); > do_gettimeofday(&sc->sc_tv_advance_start); > > + if(unlikely(sc->sc_handshake_ok == 0)) { > + if(sc->sc_page_off < sizeof(struct o2net_handshake)) { > + data = page_address(sc->sc_page) + sc->sc_page_off; > + datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; > + ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); > + if (ret > 0) > + sc->sc_page_off += ret; > + } > + > + if (sc->sc_page_off == sizeof(struct o2net_handshake)) { > + o2net_check_handshake(sc); > + if(sc->sc_handshake_ok == 0) { > + BUG_ON(sizeof(struct o2net_handshake) > + == sizeof(struct o2net_msg)); Is this necessary? Didn't we fix the logic such that the relative sizes don't matter any more? If it _is_ necessary, then it should be a BUILD_BUG_ON() in a more visible place, with a nice fat comment explaining why... > + ret = -EPROTO; > + } > + goto out; Do you mean to move that goto within the if (sc->sc_handshake_ok == 0) { block? I _think_ it's ok for us to continue otherwise... > @@ -1178,8 +1227,7 @@ static int o2net_advance_rx(struct o2net > O2NET_MAX_PAYLOAD_BYTES) > ret = -EOVERFLOW; > } > - } > - if (ret <= 0) > + } else > goto out; > } Why are you doing that? We'll continue now if we want to return -EOVERFLOW where we would error out before. Thanks, --Mark -- Mark Fasheh Senior Software Developer, Oracle mark.fasheh@oracle.com ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch 2006-11-29 15:31 ` Mark Fasheh @ 2006-11-30 4:25 ` Andrew Beekhof 2006-11-30 9:37 ` Mark Fasheh 0 siblings, 1 reply; 9+ messages in thread From: Andrew Beekhof @ 2006-11-30 4:25 UTC (permalink / raw) To: ocfs2-devel On Nov 30, 2006, at 12:31 AM, Mark Fasheh wrote: > Hi Andrew, > Things are looking much better, but there's still a few issues that > I found while reviewing the patch. I got Zach to look at it too > (he's the > original author of the ocfs2 network code) which has generated some > good > comments. > > On Wed, Nov 29, 2006 at 09:51:31AM +0100, abeekhof@suse.de wrote: >> >> From: Andrew Beekhof <abeekhof@suse.de> >> Subject: [patch 1/1] OCFS2 Configurable timeouts - Protocol changes >> >> Modify the OCFS2 handshake to ensure essential timeouts are >> configured >> identically on all nodes. >> Only allow changes when there are no connected peers >> Improves the logic in o2net_advance_rx() which broke now that >> sizeof(struct o2net_handshake) is greater than sizeof(struct >> o2net_msg) >> Included is the field for userspace-heartbeat timeout to avoid the >> need for >> further protocol changes. >> Uses a global spinlock to ensure the decisions to update configfs >> entries >> are made on the correct value. The region covered by the >> spinlock when >> incrimenting the counter is much larger as this is the more >> critical case. > Nitpick: Can you format that commit log to be a bit more in line with > standard kernel commits (the indenting is weird) sure > > >> Index: fs/ocfs2/cluster/nodemanager.c >> =================================================================== >> --- fs/ocfs2/cluster/nodemanager.c.orig 2006-11-20 >> 16:25:58.000000000 +0100 >> +++ fs/ocfs2/cluster/nodemanager.c 2006-11-27 09:57:56.000000000 >> +0100 >> @@ -558,15 +558,14 @@ static ssize_t o2nm_cluster_attr_write(c >> return count; >> } >> >> -static ssize_t o2nm_cluster_attr_idle_timeout_ms_read(struct >> o2nm_cluster *cluster, >> - char *page) >> +static ssize_t o2nm_cluster_attr_idle_timeout_ms_read( >> + struct o2nm_cluster *cluster, char *page) >> { >> return sprintf(page, "%u\n", cluster->cl_idle_timeout_ms); >> } > Can you not re-write the function prototypes unless they're actually > changing please? It clutters up the patch and makes it harder to > find the > actual code to check (see below). ah, bad habit i picked up working on smaller projects. is it ok in a separate patch? or have I got my wrap points set too small by default? > > >> @@ -574,10 +573,22 @@ static ssize_t o2nm_cluster_attr_idle_ti >> ret = o2nm_cluster_attr_write(page, count, &val); >> >> if (ret > 0) { >> + if (cluster->cl_idle_timeout_ms != val) { >> + spin_lock(&connected_lock); >> + if(o2net_num_connected_peers()) { >> + mlog(ML_NOTICE, >> + "o2net: cannot change idle timeout after " >> + "the first peer has agreed to it." >> + " %d connected peers\n", >> + o2net_num_connected_peers()); >> + ret = -EINVAL; >> + } >> + spin_unlock(&connected_lock); >> + } >> if (val <= cluster->cl_keepalive_delay_ms) { >> mlog(ML_NOTICE, "o2net: idle timeout must be larger " >> "than keepalive delay\n"); >> - return -EINVAL; >> + ret = -EINVAL; >> } >> cluster->cl_idle_timeout_ms = val; > I don't know how I missed this before, but you're erroring with a > negative return > value, yet continuing with the work of setting cluster- > >cl_idle_timeout_ms > anyway. I think we're missing some goto's here and in the similar > blocks > below. my bad - fixed > >> @@ -1121,6 +1121,44 @@ static int o2net_check_handshake(struct >> return -1; >> } >> >> + /* >> + * Ensure timeouts are consistent with other nodes, otherwise >> + * we can end up with one node thinking that the other must be >> down, >> + * but isn't. This can ultimately cause corruption. >> + */ >> + if (be32_to_cpu(hand->o2net_idle_timeout_ms) != >> + o2net_idle_timeout(sc->sc_node)) { >> + mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " >> + "%u ms, but we use %u ms locally. disconnecting\n", >> + SC_NODEF_ARGS(sc), >> + be32_to_cpu(hand->o2net_idle_timeout_ms), >> + o2net_idle_timeout(sc->sc_node)); >> + o2net_ensure_shutdown(nn, sc, -ENOTCONN); >> + return -1; >> + } >> + >> + if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != >> + o2net_keepalive_delay(sc->sc_node)) { >> + mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " >> + "%u ms, but we use %u ms locally. disconnecting\n", >> + SC_NODEF_ARGS(sc), >> + be32_to_cpu(hand->o2net_keepalive_delay_ms), >> + o2net_keepalive_delay(sc->sc_node)); >> + o2net_ensure_shutdown(nn, sc, -ENOTCONN); >> + return -1; >> + } >> + >> + if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != >> + O2HB_MAX_WRITE_TIMEOUT_MS) { >> + mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " >> + "%u ms, but we use %u ms locally. disconnecting\n", >> + SC_NODEF_ARGS(sc), >> + be32_to_cpu(hand->o2net_keepalive_delay_ms), > We check hearbeat timeout here, but print keepalive delay... > fixed > >> @@ -1153,6 +1191,26 @@ static int o2net_advance_rx(struct o2net >> sclog(sc, "receiving\n"); >> do_gettimeofday(&sc->sc_tv_advance_start); >> >> + if(unlikely(sc->sc_handshake_ok == 0)) { >> + if(sc->sc_page_off < sizeof(struct o2net_handshake)) { >> + data = page_address(sc->sc_page) + sc->sc_page_off; >> + datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; >> + ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); >> + if (ret > 0) >> + sc->sc_page_off += ret; >> + } >> + >> + if (sc->sc_page_off == sizeof(struct o2net_handshake)) { >> + o2net_check_handshake(sc); >> + if(sc->sc_handshake_ok == 0) { >> + BUG_ON(sizeof(struct o2net_handshake) >> + == sizeof(struct o2net_msg)); > Is this necessary? I wasnt sure at the time - see below - so i wanted to make sure it at least died sanely apparently i still needed education on how that is done :) > Didn't we fix the logic such that the relative sizes > don't matter any more? If it _is_ necessary, then it should be a > BUILD_BUG_ON() in a more visible place, ah, I was not familiar with that macro yet > with a nice fat comment explaining > why... > >> + ret = -EPROTO; >> + } >> + goto out; > Do you mean to move that goto within the > > if (sc->sc_handshake_ok == 0) { > > block? I _think_ it's ok for us to continue otherwise... i did - but if we never want to process an o2net_msg if the handshake has not been completed, then i can structure things a little differently/clearly > > >> @@ -1178,8 +1227,7 @@ static int o2net_advance_rx(struct o2net >> O2NET_MAX_PAYLOAD_BYTES) >> ret = -EOVERFLOW; >> } >> - } >> - if (ret <= 0) >> + } else >> goto out; >> } > Why are you doing that? We'll continue now if we want to return - > EOVERFLOW > where we would error out before. damn - i should have noticed that I'll resubmit once we sort out the o2net_msg / o2net_handshake situation -- Andrew Beekhof "Would the last person to leave please turn out the enlightenment?" - TISM ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch 2006-11-30 4:25 ` Andrew Beekhof @ 2006-11-30 9:37 ` Mark Fasheh 0 siblings, 0 replies; 9+ messages in thread From: Mark Fasheh @ 2006-11-30 9:37 UTC (permalink / raw) To: ocfs2-devel On Thu, Nov 30, 2006 at 01:24:29PM +0100, Andrew Beekhof wrote: > >Can you not re-write the function prototypes unless they're actually > >changing please? It clutters up the patch and makes it harder to > >find the > >actual code to check (see below). > > ah, bad habit i picked up working on smaller projects. > is it ok in a separate patch? or have I got my wrap points set too > small by default? In general if you see something that's inconsisent with Documentation/CodingStyle, then yeah fixing it in a seperate patch is fine. As far as those functions are concerned, if you're not happy with the indentation, then it would be nicest if you got them right in the patch which introduced them. > >>+ if (sc->sc_page_off == sizeof(struct o2net_handshake)) { > >>+ o2net_check_handshake(sc); > >>+ if(sc->sc_handshake_ok == 0) { > >>+ BUG_ON(sizeof(struct o2net_handshake) > >>+ == sizeof(struct o2net_msg)); > >Is this necessary? > > I wasnt sure at the time - see below - so i wanted to make sure it at > least died sanely > apparently i still needed education on how that is done :) > > >Didn't we fix the logic such that the relative sizes > >don't matter any more? If it _is_ necessary, then it should be a > >BUILD_BUG_ON() in a more visible place, > > ah, I was not familiar with that macro yet Yeah, I'm mostly going on your description of the patch, and Zach's description of the problem. I'll have to look more closely to see if this is still something we need to trap or not. > >with a nice fat comment explaining > >why... > > > >>+ ret = -EPROTO; > >>+ } > >>+ goto out; > >Do you mean to move that goto within the > > > >if (sc->sc_handshake_ok == 0) { > > > >block? I _think_ it's ok for us to continue otherwise... > > i did - but if we never want to process an o2net_msg if the handshake > has not been completed, then i can structure things a little > differently/clearly Hmm, yeah... It looks like what'd happen if we don't get a properly sized handshake is that we'd continue to process the o2net_msg, which I don't think we want to do. If we skipped that, then we don't have to depend on the sizes not matching... Which is fine because I don't think we ought to be processing messages from nodes which haven't properly connected to us yet. > I'll resubmit once we sort out the o2net_msg / o2net_handshake > situation Great, thanks alot Andrew! --Mark -- Mark Fasheh Senior Software Developer, Oracle mark.fasheh@oracle.com ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 2006-11-29 1:04 [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 abeekhof 2006-11-29 1:04 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof @ 2006-11-30 17:50 ` Joel Becker 2006-12-01 0:16 ` Andrew Beekhof 1 sibling, 1 reply; 9+ messages in thread From: Joel Becker @ 2006-11-30 17:50 UTC (permalink / raw) To: ocfs2-devel On Wed, Nov 29, 2006 at 09:51:30AM +0100, abeekhof@suse.de wrote: > Added a global spinlock around modifications to o2net_connected_peer (as discussed with Mark). > > I have a separate patch that uses to_o2nm_cluster_from_node() but since I cant reproduce the problem mentioned in Jeff's comment (it apparently needs the userspace heartbeating modifications), I'd prefer to leave it out. Which problem do you mean? I'm trying to know if the callback changes (->disconnect_notify()) make you happy and are good enough to push towards mainline. Have you tested with them, etc? Joel -- Bram's Law: The easier a piece of software is to write, the worse it's implemented in practice. Joel Becker Principal Software Developer Oracle E-mail: joel.becker@oracle.com Phone: (650) 506-8127 ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 2006-11-30 17:50 ` [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 Joel Becker @ 2006-12-01 0:16 ` Andrew Beekhof 0 siblings, 0 replies; 9+ messages in thread From: Andrew Beekhof @ 2006-12-01 0:16 UTC (permalink / raw) To: ocfs2-devel On Dec 1, 2006, at 2:50 AM, Joel Becker wrote: > On Wed, Nov 29, 2006 at 09:51:30AM +0100, abeekhof@suse.de wrote: >> Added a global spinlock around modifications to >> o2net_connected_peer (as discussed with Mark). >> >> I have a separate patch that uses to_o2nm_cluster_from_node() but >> since I cant reproduce the problem mentioned in Jeff's comment (it >> apparently needs the userspace heartbeating modifications), I'd >> prefer to leave it out. > > Which problem do you mean? I'm trying to know if the callback > changes (->disconnect_notify()) make you happy and are good enough to > push towards mainline. Have you tested with them, etc? I'm referring to this problem noted by Jeff /* * FIXME: These should use to_o2nm_cluster_from_node(), but we end up * losing our parent link to the cluster during shutdown. This can be * solved by adding a pre-removal callback to configfs, or passing * around the cluster with the node. -jeffm */ Without the userspace heartbeat code I can't seem to create this situation and everything works nicely. So I'm not yet in a position to really comment on the .disconnect_notify changes. -- Andrew Beekhof "Would the last person to leave please turn out the enlightenment?" - TISM ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 @ 2006-11-21 7:12 abeekhof 2006-11-21 7:12 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof 0 siblings, 1 reply; 9+ messages in thread From: abeekhof @ 2006-11-21 7:12 UTC (permalink / raw) To: ocfs2-devel The following is an update to the 3rd patch in the configurable timeouts series. Changes to the original patch include: - setting and checking of o2hb_heartbeat_timeout_ms in handshakes - better logic in o2net_advance_rx() removing the need for dummy fields in o2net_msg struct - tracks how many peers are connected - prevents o2net_idle_timeout_ms and o2net_keepalive_delay_ms from being modified when we have connected peers The rationale for including these changes in the final patch is that patch 2 creates the capability and patch 3 ensures it it used sanely across all nodes. Still to come is a patch which makes use of the to_o2nm_cluster_from_node() function as previously suggested by Mark. Andrew -- ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch 2006-11-21 7:12 abeekhof @ 2006-11-21 7:12 ` abeekhof 0 siblings, 0 replies; 9+ messages in thread From: abeekhof @ 2006-11-21 7:12 UTC (permalink / raw) To: ocfs2-devel From: Andrew Beekhof <abeekhof@suse.de> Subject: [patch 1/1] OCFS2 Configurable timeouts - Protocol changes Modify the OCFS2 handshake to ensure essential timeouts are configured identically on all nodes. Included is the field for userspace-heartbeat timeout to avoid the need for further protocol changes. The addition of two dummy fields is a temporary measure to satisfy the logic in o2net_check_handshake() and will be rectified in a future version of this patch Signed-off-by: Andrew Beekhof <abeekhof@suse.de> --- fs/ocfs2/cluster/nodemanager.c | 19 ++++++- fs/ocfs2/cluster/tcp.c | 102 ++++++++++++++++++++++++++++++++++------ fs/ocfs2/cluster/tcp.h | 1 fs/ocfs2/cluster/tcp_internal.h | 6 +- 4 files changed, 111 insertions(+), 17 deletions(-) Index: fs/ocfs2/cluster/nodemanager.c =================================================================== --- fs/ocfs2/cluster/nodemanager.c.orig 2006-11-20 16:25:58.000000000 +0100 +++ fs/ocfs2/cluster/nodemanager.c 2006-11-21 12:49:03.000000000 +0100 @@ -574,7 +574,14 @@ static ssize_t o2nm_cluster_attr_idle_ti ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { - if (val <= cluster->cl_keepalive_delay_ms) { + if(cluster->cl_idle_timeout_ms != val + && o2net_num_connected_peers()) { + mlog(ML_NOTICE, "o2net: cannot change idle timeout after " + "the first peer has agreed to it. %d connected peers\n", + o2net_num_connected_peers()); + return -EINVAL; + + } else if (val <= cluster->cl_keepalive_delay_ms) { mlog(ML_NOTICE, "o2net: idle timeout must be larger " "than keepalive delay\n"); return -EINVAL; @@ -601,7 +608,15 @@ static ssize_t o2nm_cluster_attr_keepali ret = o2nm_cluster_attr_write(page, count, &val); if (ret > 0) { - if (val >= cluster->cl_idle_timeout_ms) { + + if(cluster->cl_keepalive_delay_ms != val + && o2net_num_connected_peers()) { + mlog(ML_NOTICE, "o2net: cannot change keepalive delay after " + "the first peer has agreed to it. %d connected peers\n", + o2net_num_connected_peers()); + return -EINVAL; + + } else if (val >= cluster->cl_idle_timeout_ms) { mlog(ML_NOTICE, "o2net: keepalive delay must be " "smaller than idle timeout\n"); return -EINVAL; Index: fs/ocfs2/cluster/tcp.c =================================================================== --- fs/ocfs2/cluster/tcp.c.orig 2006-11-20 16:19:12.000000000 +0100 +++ fs/ocfs2/cluster/tcp.c 2006-11-21 15:42:01.000000000 +0100 @@ -1121,6 +1121,44 @@ static int o2net_check_handshake(struct return -1; } + /* + * Ensure timeouts are consistent with other nodes, otherwise + * we can end up with one node thinking that the other must be down, + * but isn't. This can ultimately cause corruption. + */ + if (be32_to_cpu(hand->o2net_idle_timeout_ms) != + o2net_idle_timeout(sc->sc_node)) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_idle_timeout_ms), + o2net_idle_timeout(sc->sc_node)); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + + if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != + o2net_keepalive_delay(sc->sc_node)) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + o2net_keepalive_delay(sc->sc_node)); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + + if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != + O2HB_MAX_WRITE_TIMEOUT_MS) { + mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " + "%u ms, but we use %u ms locally. disconnecting\n", + SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + O2HB_MAX_WRITE_TIMEOUT_MS); + o2net_ensure_shutdown(nn, sc, -ENOTCONN); + return -1; + } + sc->sc_handshake_ok = 1; spin_lock(&nn->nn_lock); @@ -1153,6 +1191,26 @@ static int o2net_advance_rx(struct o2net sclog(sc, "receiving\n"); do_gettimeofday(&sc->sc_tv_advance_start); + if(unlikely(sc->sc_handshake_ok == 0)) { + if(sc->sc_page_off < sizeof(struct o2net_handshake)) { + data = page_address(sc->sc_page) + sc->sc_page_off; + datalen = sizeof(struct o2net_handshake) - sc->sc_page_off; + ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); + if (ret > 0) + sc->sc_page_off += ret; + } + + if (sc->sc_page_off == sizeof(struct o2net_handshake)) { + o2net_check_handshake(sc); + if(sc->sc_handshake_ok == 0) { + BUG_ON(sizeof(struct o2net_handshake) + == sizeof(struct o2net_msg)); + ret = -EPROTO; + } + goto out; + } + } + /* do we need more header? */ if (sc->sc_page_off < sizeof(struct o2net_msg)) { data = page_address(sc->sc_page) + sc->sc_page_off; @@ -1160,26 +1218,16 @@ static int o2net_advance_rx(struct o2net ret = o2net_recv_tcp_msg(sc->sc_sock, data, datalen); if (ret > 0) { sc->sc_page_off += ret; - - /* this working relies on the handshake being - * smaller than the normal message header */ - if (sc->sc_page_off >= sizeof(struct o2net_handshake)&& - !sc->sc_handshake_ok && o2net_check_handshake(sc)) { - ret = -EPROTO; - goto out; - } - - /* only swab incoming here.. we can - * only get here once as we cross from - * being under to over */ if (sc->sc_page_off == sizeof(struct o2net_msg)) { + /* only swab incoming here.. we can + * only get here once as we cross from + * being under to over */ hdr = page_address(sc->sc_page); if (be16_to_cpu(hdr->data_len) > O2NET_MAX_PAYLOAD_BYTES) ret = -EOVERFLOW; } - } - if (ret <= 0) + } else goto out; } @@ -1269,6 +1317,18 @@ static int o2net_set_nodelay(struct sock return ret; } +static void o2net_initialize_handshake(void) +{ + o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( + O2HB_MAX_WRITE_TIMEOUT_MS); + o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( + o2net_idle_timeout(NULL)); + o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( + o2net_keepalive_delay(NULL)); + o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( + o2net_reconnect_delay(NULL)); +} + /* ------------------------------------------------------------ */ /* called when a connect completes and after a sock is accepted. the @@ -1281,6 +1341,7 @@ static void o2net_sc_connect_completed(v (unsigned long long)O2NET_PROTOCOL_VERSION, (unsigned long long)be64_to_cpu(o2net_hand->connector_id)); + o2net_initialize_handshake(); o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); sc_put(sc); } @@ -1481,12 +1542,20 @@ static void o2net_still_up(void *arg) /* ------------------------------------------------------------ */ +static int o2net_connected_peers = 0; + +int o2net_num_connected_peers(void) +{ + return o2net_connected_peers; +} + void o2net_disconnect_node(struct o2nm_node *node) { struct o2net_node *nn = o2net_nn_from_num(node->nd_num); /* don't reconnect until it's heartbeating again */ spin_lock(&nn->nn_lock); + o2net_connected_peers--; o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); spin_unlock(&nn->nn_lock); @@ -1505,8 +1574,11 @@ static void o2net_hb_node_down_cb(struct if (node_num != o2nm_this_node()) o2net_disconnect_node(node); + + BUG_ON(o2net_connected_peers < 0); } + static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, void *data) { @@ -1530,6 +1602,7 @@ static void o2net_hb_node_up_cb(struct o * only use set_nn_state to clear the persistent error * if that hasn't already happened */ spin_lock(&nn->nn_lock); + o2net_connected_peers++; if (nn->nn_persistent_error) o2net_set_nn_state(nn, NULL, 0, 0); spin_unlock(&nn->nn_lock); @@ -1668,6 +1741,7 @@ static int o2net_accept_one(struct socke o2net_register_callbacks(sc->sc_sock->sk, sc); o2net_sc_queue_work(sc, &sc->sc_rx_work); + o2net_initialize_handshake(); o2net_sendpage(sc, o2net_hand, sizeof(*o2net_hand)); out: Index: fs/ocfs2/cluster/tcp.h =================================================================== --- fs/ocfs2/cluster/tcp.h.orig 2006-11-20 16:19:12.000000000 +0100 +++ fs/ocfs2/cluster/tcp.h 2006-11-21 15:32:53.000000000 +0100 @@ -108,6 +108,7 @@ void o2net_unregister_hb_callbacks(void) int o2net_start_listening(struct o2nm_node *node); void o2net_stop_listening(struct o2nm_node *node); void o2net_disconnect_node(struct o2nm_node *node); +int o2net_num_connected_peers(void); int o2net_init(void); void o2net_exit(void); Index: fs/ocfs2/cluster/tcp_internal.h =================================================================== --- fs/ocfs2/cluster/tcp_internal.h.orig 2006-11-20 16:19:12.000000000 +0100 +++ fs/ocfs2/cluster/tcp_internal.h 2006-11-20 16:25:36.000000000 +0100 @@ -48,10 +48,14 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 4ULL +#define O2NET_PROTOCOL_VERSION 5ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; + __be32 o2hb_heartbeat_timeout_ms; + __be32 o2net_idle_timeout_ms; + __be32 o2net_keepalive_delay_ms; + __be32 o2net_reconnect_delay_ms; }; struct o2net_node { -- ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2006-12-01 0:16 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-11-29 1:04 [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 abeekhof 2006-11-29 1:04 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof 2006-11-29 15:30 ` Zach Brown 2006-11-29 15:31 ` Mark Fasheh 2006-11-30 4:25 ` Andrew Beekhof 2006-11-30 9:37 ` Mark Fasheh 2006-11-30 17:50 ` [Ocfs2-devel] [patch 0/1] OCFS Configurable timeouts - Revision 2 Joel Becker 2006-12-01 0:16 ` Andrew Beekhof -- strict thread matches above, loose matches on Subject: below -- 2006-11-21 7:12 abeekhof 2006-11-21 7:12 ` [Ocfs2-devel] [patch 1/1] ocfs2-timeout-protocol.patch abeekhof
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.