From: Alexander Aring <aahringo@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 04/15] fs: dlm: reconnect if socket error report occurs
Date: Fri, 14 May 2021 20:35:38 -0400 [thread overview]
Message-ID: <20210515003549.1118171-5-aahringo@redhat.com> (raw)
In-Reply-To: <20210515003549.1118171-1-aahringo@redhat.com>
This patch will change the reconnect handling that if an error occurs
if a socket error callback is occurred. This will also handle reconnects
in a non blocking connecting case which is currently missing. If error
ECONNREFUSED is reported we delay the reconnect by one second.
Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
fs/dlm/lowcomms.c | 60 ++++++++++++++++++++++++++++++-----------------
1 file changed, 39 insertions(+), 21 deletions(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4944aef24aa5..051f22dbb83a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -79,6 +79,8 @@ struct connection {
#define CF_CLOSING 8
#define CF_SHUTDOWN 9
#define CF_CONNECTED 10
+#define CF_RECONNECT 11
+#define CF_DELAY_CONNECT 12
struct list_head writequeue; /* List of outgoing writequeue_entries */
spinlock_t writequeue_lock;
void (*connect_action) (struct connection *); /* What to do to connect */
@@ -87,6 +89,7 @@ struct connection {
#define MAX_CONNECT_RETRIES 3
struct hlist_node list;
struct connection *othercon;
+ struct connection *sendcon;
struct work_struct rwork; /* Receive workqueue */
struct work_struct swork; /* Send workqueue */
wait_queue_head_t shutdown_wait; /* wait for graceful shutdown */
@@ -585,6 +588,22 @@ static void lowcomms_error_report(struct sock *sk)
dlm_config.ci_tcp_port, sk->sk_err,
sk->sk_err_soft);
}
+
+ /* below sendcon only handling */
+ if (test_bit(CF_IS_OTHERCON, &con->flags))
+ con = con->sendcon;
+
+ switch (sk->sk_err) {
+ case ECONNREFUSED:
+ set_bit(CF_DELAY_CONNECT, &con->flags);
+ break;
+ default:
+ break;
+ }
+
+ if (!test_and_set_bit(CF_RECONNECT, &con->flags))
+ queue_work(send_workqueue, &con->swork);
+
out:
read_unlock_bh(&sk->sk_callback_lock);
if (orig_report)
@@ -702,6 +721,8 @@ static void close_connection(struct connection *con, bool and_other,
con->rx_leftover = 0;
con->retries = 0;
clear_bit(CF_CONNECTED, &con->flags);
+ clear_bit(CF_DELAY_CONNECT, &con->flags);
+ clear_bit(CF_RECONNECT, &con->flags);
mutex_unlock(&con->sock_mutex);
clear_bit(CF_CLOSING, &con->flags);
}
@@ -840,18 +861,15 @@ static int receive_from_sock(struct connection *con)
out_close:
mutex_unlock(&con->sock_mutex);
- if (ret != -EAGAIN) {
- /* Reconnect when there is something to send */
+ if (ret == 0) {
close_connection(con, false, true, false);
- if (ret == 0) {
- log_print("connection %p got EOF from %d",
- con, con->nodeid);
- /* handling for tcp shutdown */
- clear_bit(CF_SHUTDOWN, &con->flags);
- wake_up(&con->shutdown_wait);
- /* signal to breaking receive worker */
- ret = -1;
- }
+ log_print("connection %p got EOF from %d",
+ con, con->nodeid);
+ /* handling for tcp shutdown */
+ clear_bit(CF_SHUTDOWN, &con->flags);
+ wake_up(&con->shutdown_wait);
+ /* signal to breaking receive worker */
+ ret = -1;
}
return ret;
}
@@ -940,6 +958,7 @@ static int accept_from_sock(struct listen_connection *con)
lockdep_set_subclass(&othercon->sock_mutex, 1);
set_bit(CF_IS_OTHERCON, &othercon->flags);
newcon->othercon = othercon;
+ othercon->sendcon = newcon;
} else {
/* close other sock con if we have something new */
close_connection(othercon, false, true, false);
@@ -1504,7 +1523,7 @@ static void send_to_sock(struct connection *con)
cond_resched();
goto out;
} else if (ret < 0)
- goto send_error;
+ goto out;
}
/* Don't starve people filling buffers */
@@ -1521,14 +1540,6 @@ static void send_to_sock(struct connection *con)
mutex_unlock(&con->sock_mutex);
return;
-send_error:
- mutex_unlock(&con->sock_mutex);
- close_connection(con, false, false, true);
- /* Requeue the send work. When the work daemon runs again, it will try
- a new connection, then call this function again. */
- queue_work(send_workqueue, &con->swork);
- return;
-
out_connect:
mutex_unlock(&con->sock_mutex);
queue_work(send_workqueue, &con->swork);
@@ -1605,8 +1616,15 @@ static void process_send_sockets(struct work_struct *work)
WARN_ON(test_bit(CF_IS_OTHERCON, &con->flags));
clear_bit(CF_WRITE_PENDING, &con->flags);
- if (con->sock == NULL) /* not mutex protected so check it inside too */
+
+ if (test_and_clear_bit(CF_RECONNECT, &con->flags))
+ close_connection(con, false, false, true);
+
+ if (con->sock == NULL) { /* not mutex protected so check it inside too */
+ if (test_and_clear_bit(CF_DELAY_CONNECT, &con->flags))
+ msleep(1000);
con->connect_action(con);
+ }
if (!list_empty(&con->writequeue))
send_to_sock(con);
}
--
2.26.3
next prev parent reply other threads:[~2021-05-15 0:35 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-15 0:35 [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 00/15] fs: dlm: introduce dlm re-transmission layer Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 01/15] fs: dlm: add dlm macros for ratelimit log Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 02/15] fs: dlm: fix srcu read lock usage Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 03/15] fs: dlm: set is othercon flag Alexander Aring
2021-05-15 0:35 ` Alexander Aring [this message]
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 05/15] fs: dlm: fix connection tcp EOF handling Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 06/15] fs: dlm: public header in out utility Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 07/15] fs: dlm: add more midcomms hooks Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 08/15] fs: dlm: make buffer handling per msg Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 09/15] fs: dlm: add functionality to re-transmit a message Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 10/15] fs: dlm: move out some hash functionality Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 11/15] fs: dlm: add union in dlm header for lockspace id Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 12/15] fs: dlm: add force flush of writequeue function Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 13/15] fs: dlm: add reliable connection if reconnect Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 14/15] fs: dlm: add midcomms debugfs functionality Alexander Aring
2021-05-15 0:35 ` [Cluster-devel] [PATCHv5 v5.13-rc1 dlm/next 15/15] fs: dlm: don't allow half transmitted messages Alexander Aring
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210515003549.1118171-5-aahringo@redhat.com \
--to=aahringo@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).