* [PATCH 1/5] libceph: encapsulate connection backoff
2013-02-22 17:21 ` [PATCH 0/5, v2] libceph clean up con_work() Alex Elder
@ 2013-02-22 17:23 ` Alex Elder
2013-02-22 17:23 ` [PATCH 2/5] libceph: separate non-locked fault handling Alex Elder
` (4 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Alex Elder @ 2013-02-22 17:23 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org >> ceph-devel
Collect the code that tests for and implements a backoff delay for a
ceph connection into a new function, ceph_backoff().
Make the debug output messages in that part of the code report
things consistently by reporting a message in the socket closed
case, and by making the one for PREOPEN state report the connection
pointer like the rest.
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased
net/ceph/messenger.c | 37 ++++++++++++++++++++++++-------------
1 file changed, 24 insertions(+), 13 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index ed9e237..9a29d8a 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2345,6 +2345,24 @@ static bool con_sock_closed(struct
ceph_connection *con)
return true;
}
+static bool con_backoff(struct ceph_connection *con)
+{
+ int ret;
+
+ if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
+ return false;
+
+ ret = queue_con_delay(con, round_jiffies_relative(con->delay));
+ if (ret) {
+ dout("%s: con %p FAILED to back off %lu\n", __func__,
+ con, con->delay);
+ BUG_ON(ret == -ENOENT);
+ con_flag_set(con, CON_FLAG_BACKOFF);
+ }
+
+ return true;
+}
+
/*
* Do some work on a connection. Drop a connection ref when we're done.
*/
@@ -2356,21 +2374,14 @@ static void con_work(struct work_struct *work)
mutex_lock(&con->mutex);
restart:
- if (con_sock_closed(con))
+ if (con_sock_closed(con)) {
+ dout("%s: con %p SOCK_CLOSED\n", __func__, con);
goto fault;
-
- if (con_flag_test_and_clear(con, CON_FLAG_BACKOFF)) {
- dout("con_work %p backing off\n", con);
- ret = queue_con_delay(con, round_jiffies_relative(con->delay));
- if (ret) {
- dout("con_work %p FAILED to back off %lu\n", con,
- con->delay);
- BUG_ON(ret == -ENOENT);
- con_flag_set(con, CON_FLAG_BACKOFF);
- }
+ }
+ if (con_backoff(con)) {
+ dout("%s: con %p BACKOFF\n", __func__, con);
goto done;
}
-
if (con->state == CON_STATE_STANDBY) {
dout("con_work %p STANDBY\n", con);
goto done;
@@ -2381,7 +2392,7 @@ restart:
goto done;
}
if (con->state == CON_STATE_PREOPEN) {
- dout("con_work OPENING\n");
+ dout("%s: con %p OPENING\n", __func__, con);
BUG_ON(con->sock);
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 2/5] libceph: separate non-locked fault handling
2013-02-22 17:21 ` [PATCH 0/5, v2] libceph clean up con_work() Alex Elder
2013-02-22 17:23 ` [PATCH 1/5] libceph: encapsulate connection backoff Alex Elder
@ 2013-02-22 17:23 ` Alex Elder
2013-02-22 17:26 ` Alex Elder
` (3 subsequent siblings)
5 siblings, 0 replies; 13+ messages in thread
From: Alex Elder @ 2013-02-22 17:23 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org >> ceph-devel
An error occurring on a ceph connection is treated as a fault,
causing the connection to be reset. The initial part of this fault
handling has to be done while holding the connection mutex, but
it must then be dropped for the last part.
Separate the part of this fault handling that executes without the
lock into its own function, con_fault_finish(). Move the call to
this new function, as well as call that drops the connection mutex,
into ceph_fault(). Rename that function con_fault() to reflect that
it's only handling the connection part of the fault handling.
The motivation for this was a warning from sparse about the locking
being done here. Rearranging things this way keeps all the mutex
manipulation within ceph_fault(), and this stops sparse from
complaining.
This partially resolves:
http://tracker.ceph.com/issues/4184
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased
net/ceph/messenger.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9a29d8a..c3b9060 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -166,7 +166,7 @@ static struct lock_class_key socket_class;
static void queue_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
-static void ceph_fault(struct ceph_connection *con);
+static void con_fault(struct ceph_connection *con);
/*
* Nicely render a sockaddr as a string. An array of formatted
@@ -2363,6 +2363,23 @@ static bool con_backoff(struct ceph_connection *con)
return true;
}
+/* Finish fault handling; con->mutex must *not* be held here */
+
+static void con_fault_finish(struct ceph_connection *con)
+{
+ /*
+ * in case we faulted due to authentication, invalidate our
+ * current tickets so that we can get new ones.
+ */
+ if (con->auth_retry && con->ops->invalidate_authorizer) {
+ dout("calling invalidate_authorizer()\n");
+ con->ops->invalidate_authorizer(con);
+ }
+
+ if (con->ops->fault)
+ con->ops->fault(con);
+}
+
/*
* Do some work on a connection. Drop a connection ref when we're done.
*/
@@ -2419,7 +2436,9 @@ done_unlocked:
return;
fault:
- ceph_fault(con); /* error/fault path */
+ con_fault(con);
+ mutex_unlock(&con->mutex);
+ con_fault_finish(con);
goto done_unlocked;
}
@@ -2428,8 +2447,7 @@ fault:
* Generic error/fault handler. A retry mechanism is used with
* exponential backoff
*/
-static void ceph_fault(struct ceph_connection *con)
- __releases(con->mutex)
+static void con_fault(struct ceph_connection *con)
{
pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
@@ -2445,7 +2463,7 @@ static void ceph_fault(struct ceph_connection *con)
if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
dout("fault on LOSSYTX channel, marking CLOSED\n");
con->state = CON_STATE_CLOSED;
- goto out_unlock;
+ return;
}
if (con->in_msg) {
@@ -2476,20 +2494,6 @@ static void ceph_fault(struct ceph_connection *con)
con_flag_set(con, CON_FLAG_BACKOFF);
queue_con(con);
}
-
-out_unlock:
- mutex_unlock(&con->mutex);
- /*
- * in case we faulted due to authentication, invalidate our
- * current tickets so that we can get new ones.
- */
- if (con->auth_retry && con->ops->invalidate_authorizer) {
- dout("calling invalidate_authorizer()\n");
- con->ops->invalidate_authorizer(con);
- }
-
- if (con->ops->fault)
- con->ops->fault(con);
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 2/5] libceph: separate non-locked fault handling
2013-02-22 17:21 ` [PATCH 0/5, v2] libceph clean up con_work() Alex Elder
2013-02-22 17:23 ` [PATCH 1/5] libceph: encapsulate connection backoff Alex Elder
2013-02-22 17:23 ` [PATCH 2/5] libceph: separate non-locked fault handling Alex Elder
@ 2013-02-22 17:26 ` Alex Elder
2013-02-22 17:32 ` Alex Elder
2013-02-22 17:26 ` [PATCH 3/5, v2] libceph: use a flag to indicate a fault has occurred Alex Elder
` (2 subsequent siblings)
5 siblings, 1 reply; 13+ messages in thread
From: Alex Elder @ 2013-02-22 17:26 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org >> ceph-devel
An error occurring on a ceph connection is treated as a fault,
causing the connection to be reset. The initial part of this fault
handling has to be done while holding the connection mutex, but
it must then be dropped for the last part.
Separate the part of this fault handling that executes without the
lock into its own function, con_fault_finish(). Move the call to
this new function, as well as call that drops the connection mutex,
into ceph_fault(). Rename that function con_fault() to reflect that
it's only handling the connection part of the fault handling.
The motivation for this was a warning from sparse about the locking
being done here. Rearranging things this way keeps all the mutex
manipulation within ceph_fault(), and this stops sparse from
complaining.
This partially resolves:
http://tracker.ceph.com/issues/4184
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased
net/ceph/messenger.c | 42 +++++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 19 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9a29d8a..c3b9060 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -166,7 +166,7 @@ static struct lock_class_key socket_class;
static void queue_con(struct ceph_connection *con);
static void con_work(struct work_struct *);
-static void ceph_fault(struct ceph_connection *con);
+static void con_fault(struct ceph_connection *con);
/*
* Nicely render a sockaddr as a string. An array of formatted
@@ -2363,6 +2363,23 @@ static bool con_backoff(struct ceph_connection *con)
return true;
}
+/* Finish fault handling; con->mutex must *not* be held here */
+
+static void con_fault_finish(struct ceph_connection *con)
+{
+ /*
+ * in case we faulted due to authentication, invalidate our
+ * current tickets so that we can get new ones.
+ */
+ if (con->auth_retry && con->ops->invalidate_authorizer) {
+ dout("calling invalidate_authorizer()\n");
+ con->ops->invalidate_authorizer(con);
+ }
+
+ if (con->ops->fault)
+ con->ops->fault(con);
+}
+
/*
* Do some work on a connection. Drop a connection ref when we're done.
*/
@@ -2419,7 +2436,9 @@ done_unlocked:
return;
fault:
- ceph_fault(con); /* error/fault path */
+ con_fault(con);
+ mutex_unlock(&con->mutex);
+ con_fault_finish(con);
goto done_unlocked;
}
@@ -2428,8 +2447,7 @@ fault:
* Generic error/fault handler. A retry mechanism is used with
* exponential backoff
*/
-static void ceph_fault(struct ceph_connection *con)
- __releases(con->mutex)
+static void con_fault(struct ceph_connection *con)
{
pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
@@ -2445,7 +2463,7 @@ static void ceph_fault(struct ceph_connection *con)
if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
dout("fault on LOSSYTX channel, marking CLOSED\n");
con->state = CON_STATE_CLOSED;
- goto out_unlock;
+ return;
}
if (con->in_msg) {
@@ -2476,20 +2494,6 @@ static void ceph_fault(struct ceph_connection *con)
con_flag_set(con, CON_FLAG_BACKOFF);
queue_con(con);
}
-
-out_unlock:
- mutex_unlock(&con->mutex);
- /*
- * in case we faulted due to authentication, invalidate our
- * current tickets so that we can get new ones.
- */
- if (con->auth_retry && con->ops->invalidate_authorizer) {
- dout("calling invalidate_authorizer()\n");
- con->ops->invalidate_authorizer(con);
- }
-
- if (con->ops->fault)
- con->ops->fault(con);
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 3/5, v2] libceph: use a flag to indicate a fault has occurred
2013-02-22 17:21 ` [PATCH 0/5, v2] libceph clean up con_work() Alex Elder
` (2 preceding siblings ...)
2013-02-22 17:26 ` Alex Elder
@ 2013-02-22 17:26 ` Alex Elder
2013-02-22 17:30 ` [PATCH 4/5, v2] libceph: use a do..while loop in con_work() Alex Elder
2013-02-22 17:30 ` [PATCH 5/5, v2] libceph: indent properly Alex Elder
5 siblings, 0 replies; 13+ messages in thread
From: Alex Elder @ 2013-02-22 17:26 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org >> ceph-devel
This just rearranges the logic in con_work() a little bit so that a
flag is used to indicate a fault has occurred. This allows both the
fault and non-fault case to be handled the same way and avoids a
couple of nearly consecutive gotos.
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased
net/ceph/messenger.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c3b9060..18eb788 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2387,13 +2387,15 @@ static void con_work(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
+ bool fault = false;
int ret;
mutex_lock(&con->mutex);
restart:
if (con_sock_closed(con)) {
dout("%s: con %p SOCK_CLOSED\n", __func__, con);
- goto fault;
+ fault = true;
+ goto done;
}
if (con_backoff(con)) {
dout("%s: con %p BACKOFF\n", __func__, con);
@@ -2418,7 +2420,8 @@ restart:
goto restart;
if (ret < 0) {
con->error_msg = "socket error on read";
- goto fault;
+ fault = true;
+ goto done;
}
ret = try_write(con);
@@ -2426,20 +2429,17 @@ restart:
goto restart;
if (ret < 0) {
con->error_msg = "socket error on write";
- goto fault;
+ fault = true;
}
-
done:
+ if (fault)
+ con_fault(con);
mutex_unlock(&con->mutex);
-done_unlocked:
- con->ops->put(con);
- return;
-fault:
- con_fault(con);
- mutex_unlock(&con->mutex);
- con_fault_finish(con);
- goto done_unlocked;
+ if (fault)
+ con_fault_finish(con);
+
+ con->ops->put(con);
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 4/5, v2] libceph: use a do..while loop in con_work()
2013-02-22 17:21 ` [PATCH 0/5, v2] libceph clean up con_work() Alex Elder
` (3 preceding siblings ...)
2013-02-22 17:26 ` [PATCH 3/5, v2] libceph: use a flag to indicate a fault has occurred Alex Elder
@ 2013-02-22 17:30 ` Alex Elder
2013-02-22 17:30 ` [PATCH 5/5, v2] libceph: indent properly Alex Elder
5 siblings, 0 replies; 13+ messages in thread
From: Alex Elder @ 2013-02-22 17:30 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org >> ceph-devel
This just converts a manually-implemented loop into a do..while loop
in con_work(). It also moves handling of EAGAIN inside the blocks
where it's already been determined an error code was returned.
Also update a few dout() calls near the affected code for
consistency.
NOTE:
This was done in two steps in order to facilitate review. The
This patch will be squashed into the next one before commit.
next patch simply indents the loop properly.
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased
net/ceph/messenger.c | 39 ++++++++++++++++++++-------------------
1 file changed, 20 insertions(+), 19 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 18eb788..223406f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2387,51 +2387,53 @@ static void con_work(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
- bool fault = false;
- int ret;
+ bool fault;
mutex_lock(&con->mutex);
-restart:
- if (con_sock_closed(con)) {
+while (true) {
+ int ret;
+
+ if ((fault = con_sock_closed(con))) {
dout("%s: con %p SOCK_CLOSED\n", __func__, con);
- fault = true;
- goto done;
+ break;
}
if (con_backoff(con)) {
dout("%s: con %p BACKOFF\n", __func__, con);
- goto done;
+ break;
}
if (con->state == CON_STATE_STANDBY) {
- dout("con_work %p STANDBY\n", con);
- goto done;
+ dout("%s: con %p STANDBY\n", __func__, con);
+ break;
}
if (con->state == CON_STATE_CLOSED) {
- dout("con_work %p CLOSED\n", con);
+ dout("%s: con %p CLOSED\n", __func__, con);
BUG_ON(con->sock);
- goto done;
+ break;
}
if (con->state == CON_STATE_PREOPEN) {
- dout("%s: con %p OPENING\n", __func__, con);
+ dout("%s: con %p PREOPEN\n", __func__, con);
BUG_ON(con->sock);
}
ret = try_read(con);
- if (ret == -EAGAIN)
- goto restart;
if (ret < 0) {
+ if (ret == -EAGAIN)
+ continue;
con->error_msg = "socket error on read";
fault = true;
- goto done;
+ break;
}
ret = try_write(con);
- if (ret == -EAGAIN)
- goto restart;
if (ret < 0) {
+ if (ret == -EAGAIN)
+ continue;
con->error_msg = "socket error on write";
fault = true;
}
-done:
+
+ break; /* If we make it to here, we're done */
+}
if (fault)
con_fault(con);
mutex_unlock(&con->mutex);
@@ -2442,7 +2444,6 @@ done:
con->ops->put(con);
}
-
/*
* Generic error/fault handler. A retry mechanism is used with
* exponential backoff
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 5/5, v2] libceph: indent properly
2013-02-22 17:21 ` [PATCH 0/5, v2] libceph clean up con_work() Alex Elder
` (4 preceding siblings ...)
2013-02-22 17:30 ` [PATCH 4/5, v2] libceph: use a do..while loop in con_work() Alex Elder
@ 2013-02-22 17:30 ` Alex Elder
5 siblings, 0 replies; 13+ messages in thread
From: Alex Elder @ 2013-02-22 17:30 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org >> ceph-devel
This is just the second part of a two-part patch. It simply
indents a block of code. This patch is going to be merged into
its predecessor following review.
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased
net/ceph/messenger.c | 80
+++++++++++++++++++++++++-------------------------
1 file changed, 40 insertions(+), 40 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 223406f..2c0669f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2390,50 +2390,50 @@ static void con_work(struct work_struct *work)
bool fault;
mutex_lock(&con->mutex);
-while (true) {
- int ret;
+ while (true) {
+ int ret;
- if ((fault = con_sock_closed(con))) {
- dout("%s: con %p SOCK_CLOSED\n", __func__, con);
- break;
- }
- if (con_backoff(con)) {
- dout("%s: con %p BACKOFF\n", __func__, con);
- break;
- }
- if (con->state == CON_STATE_STANDBY) {
- dout("%s: con %p STANDBY\n", __func__, con);
- break;
- }
- if (con->state == CON_STATE_CLOSED) {
- dout("%s: con %p CLOSED\n", __func__, con);
- BUG_ON(con->sock);
- break;
- }
- if (con->state == CON_STATE_PREOPEN) {
- dout("%s: con %p PREOPEN\n", __func__, con);
- BUG_ON(con->sock);
- }
+ if ((fault = con_sock_closed(con))) {
+ dout("%s: con %p SOCK_CLOSED\n", __func__, con);
+ break;
+ }
+ if (con_backoff(con)) {
+ dout("%s: con %p BACKOFF\n", __func__, con);
+ break;
+ }
+ if (con->state == CON_STATE_STANDBY) {
+ dout("%s: con %p STANDBY\n", __func__, con);
+ break;
+ }
+ if (con->state == CON_STATE_CLOSED) {
+ dout("%s: con %p CLOSED\n", __func__, con);
+ BUG_ON(con->sock);
+ break;
+ }
+ if (con->state == CON_STATE_PREOPEN) {
+ dout("%s: con %p PREOPEN\n", __func__, con);
+ BUG_ON(con->sock);
+ }
- ret = try_read(con);
- if (ret < 0) {
- if (ret == -EAGAIN)
- continue;
- con->error_msg = "socket error on read";
- fault = true;
- break;
- }
+ ret = try_read(con);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ continue;
+ con->error_msg = "socket error on read";
+ fault = true;
+ break;
+ }
- ret = try_write(con);
- if (ret < 0) {
- if (ret == -EAGAIN)
- continue;
- con->error_msg = "socket error on write";
- fault = true;
- }
+ ret = try_write(con);
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ continue;
+ con->error_msg = "socket error on write";
+ fault = true;
+ }
- break; /* If we make it to here, we're done */
-}
+ break; /* If we make it to here, we're done */
+ }
if (fault)
con_fault(con);
mutex_unlock(&con->mutex);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread