From: Alexander Aring <aahringo@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [RFC PATCH dlm/next 07/16] fs: dlm: add more midcomms hooks
Date: Fri, 13 Nov 2020 17:58:05 -0500 [thread overview]
Message-ID: <20201113225814.461167-8-aahringo@redhat.com> (raw)
In-Reply-To: <20201113225814.461167-1-aahringo@redhat.com>
This patch prepares hooks to redirect to the midcomms layer which will
be used by the midcomms retransmit handling.
There exists the new concept of stateless buffers allocation and
commits. This can be used to bypass the midcomms retransmit handling. It
is used by RCOM_STATUS and RCOM_NAMES messages, because they have their
own ping-like retransmit handling. As well these two messages will be
used to determine the DLM version per node, because these two messages
are per observation the first messages which are exchanged.
The midcomms_remove_member() hook should be called when there is nothing
to send to the other node and the other node is still capable to
transmit dlm messages to the other node which called
midcomms_remove_member(). I experienced that the dlm protocol has a lack
of support for synchronize this event on protocol level. The result was
that there was still something to transmit but the other node was already
gone. This hook can be used to provide such synchronization. Although I
am not totally sure about the placement of this hook, I did not observed
issues yet when providing such synchronization on protocol layer.
Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
fs/dlm/config.c | 3 ++-
fs/dlm/lock.c | 6 ++---
fs/dlm/lockspace.c | 5 +++--
fs/dlm/member.c | 16 ++++++++++++++
fs/dlm/member.h | 1 +
fs/dlm/midcomms.c | 44 +++++++++++++++++++++++++++++++++++++
fs/dlm/midcomms.h | 10 +++++++++
fs/dlm/rcom.c | 55 +++++++++++++++++++++++++++-------------------
fs/dlm/recoverd.c | 4 ++++
9 files changed, 116 insertions(+), 28 deletions(-)
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index ab26cf135710..ba8b1f104df3 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -20,6 +20,7 @@
#include <net/sock.h>
#include "config.h"
+#include "midcomms.h"
#include "lowcomms.h"
/*
@@ -532,7 +533,7 @@ static void drop_comm(struct config_group *g, struct config_item *i)
struct dlm_comm *cm = config_item_to_comm(i);
if (local_comm == cm)
local_comm = NULL;
- dlm_lowcomms_close(cm->nodeid);
+ dlm_midcomms_close(cm->nodeid);
while (cm->addr_count--)
kfree(cm->addr[cm->addr_count]);
config_item_put(i);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index b93df39d0915..b3fd823009f4 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -59,7 +59,7 @@
#include "dlm_internal.h"
#include <linux/dlm_device.h>
#include "memory.h"
-#include "lowcomms.h"
+#include "midcomms.h"
#include "requestqueue.h"
#include "util.h"
#include "dir.h"
@@ -3537,7 +3537,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
pass into lowcomms_commit and a message buffer (mb) that we
write our data into */
- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+ mh = dlm_midcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
if (!mh)
return -ENOBUFS;
@@ -3589,7 +3589,7 @@ static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
{
dlm_message_out(ms);
- dlm_lowcomms_commit_buffer(mh);
+ dlm_midcomms_commit_buffer(mh);
return 0;
}
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 561dcad08ad6..c9e0f5ac9f9a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -16,6 +16,7 @@
#include "member.h"
#include "recoverd.h"
#include "dir.h"
+#include "midcomms.h"
#include "lowcomms.h"
#include "config.h"
#include "memory.h"
@@ -390,7 +391,7 @@ static int threads_start(void)
}
/* Thread for sending/receiving messages for all lockspace's */
- error = dlm_lowcomms_start();
+ error = dlm_midcomms_start();
if (error) {
log_print("cannot start dlm lowcomms %d", error);
goto scand_fail;
@@ -407,7 +408,7 @@ static int threads_start(void)
static void threads_stop(void)
{
dlm_scand_stop();
- dlm_lowcomms_stop();
+ dlm_midcomms_stop();
}
static int new_lockspace(const char *name, const char *cluster,
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index ceef3f2074ff..8291566766f3 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -15,6 +15,7 @@
#include "recover.h"
#include "rcom.h"
#include "config.h"
+#include "midcomms.h"
#include "lowcomms.h"
int dlm_slots_version(struct dlm_header *h)
@@ -521,6 +522,20 @@ static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
return NULL;
}
+void dlm_flush_removed_members(struct dlm_ls *ls, struct dlm_recover *rv)
+{
+ const struct dlm_config_node *node;
+ const struct dlm_member *memb;
+
+ list_for_each_entry(memb, &ls->ls_nodes, list) {
+ node = find_config_node(rv, memb->nodeid);
+ if (node && !node->new)
+ continue;
+
+ midcomms_remove_member(memb->nodeid);
+ }
+}
+
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
{
struct dlm_member *memb, *safe;
@@ -563,6 +578,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
if (dlm_is_member(ls, node->nodeid))
continue;
dlm_add_member(ls, node);
+ midcomms_add_member(node->nodeid);
log_rinfo(ls, "add member %d", node->nodeid);
}
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 433b2fac9f4a..3a6dd2324283 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -26,6 +26,7 @@ int dlm_slots_copy_in(struct dlm_ls *ls);
int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
struct dlm_slot **slots_out, uint32_t *gen_out);
void dlm_lsop_recover_done(struct dlm_ls *ls);
+void dlm_flush_removed_members(struct dlm_ls *ls, struct dlm_recover *rv);
#endif /* __MEMBER_DOT_H__ */
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index fde3a6afe4be..717a3ed34f6a 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -30,6 +30,50 @@
#include "lock.h"
#include "midcomms.h"
+void *dlm_midcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+{
+ return dlm_lowcomms_get_buffer(nodeid, len, allocation, ppc);
+}
+
+void dlm_midcomms_commit_buffer(void *mh)
+{
+ dlm_lowcomms_commit_buffer(mh);
+}
+
+void *dlm_midcomms_stateless_get_buffer(int nodeid, int len, gfp_t allocation,
+ char **ppc)
+{
+ return dlm_lowcomms_get_buffer(nodeid, len, allocation, ppc);
+}
+
+void dlm_midcomms_stateless_commit_buffer(void *mh)
+{
+ dlm_lowcomms_commit_buffer(mh);
+}
+
+void midcomms_add_member(int nodeid)
+{
+}
+
+void midcomms_remove_member(int nodeid)
+{
+}
+
+int dlm_midcomms_close(int nodeid)
+{
+ return dlm_lowcomms_close(nodeid);
+}
+
+int dlm_midcomms_start(void)
+{
+ return dlm_lowcomms_start();
+}
+
+void dlm_midcomms_stop(void)
+{
+ dlm_lowcomms_stop();
+}
+
/*
* Called from the low-level comms layer to process a buffer of
* commands.
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 61e90a921849..134c57430cdd 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -13,6 +13,16 @@
#define __MIDCOMMS_DOT_H__
int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int buflen);
+void *dlm_midcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
+void dlm_midcomms_commit_buffer(void *mh);
+void *dlm_midcomms_stateless_get_buffer(int nodeid, int len, gfp_t allocation,
+ char **ppc);
+void dlm_midcomms_stateless_commit_buffer(void *mh);
+void midcomms_add_member(int nodeid);
+void midcomms_remove_member(int nodeid);
+int dlm_midcomms_close(int nodeid);
+int dlm_midcomms_start(void);
+void dlm_midcomms_stop(void);
#endif /* __MIDCOMMS_DOT_H__ */
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index f5b1bd65728d..7a7d4a8e4706 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -28,14 +28,18 @@ static int rcom_response(struct dlm_ls *ls)
}
static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
- struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
+ struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret,
+ bool stateless)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
char *mb;
int mb_len = sizeof(struct dlm_rcom) + len;
- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+ if (stateless)
+ mh = dlm_midcomms_stateless_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+ else
+ mh = dlm_midcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
if (!mh) {
log_print("create_rcom to %d type %d len %d ENOBUFS",
to_nodeid, type, len);
@@ -62,10 +66,13 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
}
static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
- struct dlm_rcom *rc)
+ struct dlm_rcom *rc, bool stateless)
{
dlm_rcom_out(rc);
- dlm_lowcomms_commit_buffer(mh);
+ if (stateless)
+ dlm_midcomms_stateless_commit_buffer(mh);
+ else
+ dlm_midcomms_commit_buffer(mh);
}
static void set_rcom_status(struct dlm_ls *ls, struct rcom_status *rs,
@@ -154,7 +161,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
- sizeof(struct rcom_status), &rc, &mh);
+ sizeof(struct rcom_status), &rc, &mh, true);
if (error)
goto out;
@@ -163,7 +170,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, true);
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
@@ -219,7 +226,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
do_create:
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY,
- len, &rc, &mh);
+ len, &rc, &mh, true);
if (error)
return;
@@ -246,7 +253,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
spin_unlock(&ls->ls_recover_lock);
do_send:
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, true);
}
static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -277,7 +284,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
ls->ls_recover_nodeid = nodeid;
retry:
- error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
+ error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh,
+ true);
if (error)
goto out;
memcpy(rc->rc_buf, last_name, last_len);
@@ -285,7 +293,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN);
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, true);
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
@@ -305,7 +313,8 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
outlen = LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom);
- error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
+ error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh,
+ true);
if (error)
return;
rc->rc_id = rc_in->rc_id;
@@ -313,7 +322,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, true);
}
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
@@ -324,13 +333,13 @@ int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
int error;
error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length,
- &rc, &mh);
+ &rc, &mh, false);
if (error)
goto out;
memcpy(rc->rc_buf, r->res_name, r->res_length);
rc->rc_id = (unsigned long) r->res_id;
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, false);
out:
return error;
}
@@ -342,7 +351,8 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid;
int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
- error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
+ error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh,
+ false);
if (error)
return;
@@ -361,7 +371,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, false);
}
static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -410,7 +420,8 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
if (lkb->lkb_lvbptr)
len += ls->ls_lvblen;
- error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh);
+ error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh,
+ false);
if (error)
goto out;
@@ -418,7 +429,7 @@ int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
pack_rcom_lock(r, lkb, rl);
rc->rc_id = (unsigned long) r;
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, false);
out:
return error;
}
@@ -433,7 +444,7 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
dlm_recover_master_copy(ls, rc_in);
error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY,
- sizeof(struct rcom_lock), &rc, &mh);
+ sizeof(struct rcom_lock), &rc, &mh, false);
if (error)
return;
@@ -444,7 +455,7 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
- send_rcom(ls, mh, rc);
+ send_rcom(ls, mh, rc, false);
}
/* If the lockspace doesn't exist then still send a status message
@@ -458,7 +469,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
char *mb;
int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
- mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
+ mh = dlm_midcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
if (!mh)
return -ENOBUFS;
@@ -479,7 +490,7 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rf->rf_lvblen = cpu_to_le32(~0U);
dlm_rcom_out(rc);
- dlm_lowcomms_commit_buffer(mh);
+ dlm_midcomms_commit_buffer(mh);
return 0;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 85e245392715..763d129aa741 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -16,6 +16,7 @@
#include "ast.h"
#include "recover.h"
#include "lowcomms.h"
+#include "midcomms.h"
#include "lock.h"
#include "requestqueue.h"
#include "recoverd.h"
@@ -53,6 +54,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
unsigned long start;
int error, neg = 0;
+ dlm_flush_removed_members(ls, rv);
+
log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq);
mutex_lock(&ls->ls_recoverd_active);
@@ -239,6 +242,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
mutex_unlock(&ls->ls_recoverd_active);
dlm_lsop_recover_done(ls);
+
return 0;
fail:
--
2.26.2
next prev parent reply other threads:[~2020-11-13 22:58 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-13 22:57 [Cluster-devel] [RFC PATCH dlm/next 00/16] fs: dlm: introduce dlm retransmission layer Alexander Aring
2020-11-13 22:57 ` [Cluster-devel] [RFC PATCH dlm/next 01/16] fs: dlm: add errno handling to check callback Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 02/16] fs: dlm: add check if dlm is currently running Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 03/16] fs: dlm: add check for minimum allocation length Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 04/16] fs: dlm: public utils header utils Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 05/16] fs: dlm: use GFP_ZERO for page buffer Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 06/16] fs: dlm: simplify writequeue handling Alexander Aring
2020-11-13 22:58 ` Alexander Aring [this message]
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 08/16] fs: dlm: make buffer handling per msg Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 09/16] fs: dlm: make new buffer handling softirq ready Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 10/16] fs: dlm: add functionality to retransmit a message Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 11/16] fs: dlm: move out some hash functionality Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 12/16] fs: dlm: remove unaligned memory access handling Alexander Aring
2020-11-24 14:50 ` Alexander Ahring Oder Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 13/16] fs: dlm: check on minimum header size Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 14/16] fs: dlm: add union in dlm header for lockspace id Alexander Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 15/16] fs: dlm: add reliable connection if reconnect Alexander Aring
2020-11-16 18:45 ` Alexander Ahring Oder Aring
2020-11-17 18:48 ` Alexander Ahring Oder Aring
2020-11-23 22:40 ` Alexander Ahring Oder Aring
2020-11-13 22:58 ` [Cluster-devel] [RFC PATCH dlm/next 16/16] fs: dlm: don't allow half transmitted messages Alexander Aring
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201113225814.461167-8-aahringo@redhat.com \
--to=aahringo@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).