* [PATCH 1/4] libceph: avoid using freed osd in __kick_osd_requests()
2012-12-10 0:33 [PATCH 0/4] libceph: four bug fixes Alex Elder
@ 2012-12-10 0:35 ` Alex Elder
2012-12-10 0:35 ` [PATCH 2/4] libceph: register request before unregister linger Alex Elder
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Alex Elder @ 2012-12-10 0:35 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org
If an osd has no requests and no linger requests, __reset_osd()
will just remove it with a call to __remove_osd(). That drops
a reference to the osd, and therefore the osd may have been free
by the time __reset_osd() returns. That function offers no
indication this may have occurred, and as a result the osd will
continue to be used even when it's no longer valid.
Change__reset_osd() so it returns an error (ENODEV) when it
deletes the osd being reset. And change __kick_osd_requests() so it
returns immediately (before referencing osd again) if __reset_osd()
returns *any* error.
Signed-off-by: Alex Elder <elder@inktank.com>
---
net/ceph/osd_client.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d1177ec..79a071b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -507,7 +507,7 @@ static void __kick_osd_requests(struct
ceph_osd_client *osdc,
dout("__kick_osd_requests osd%d\n", osd->o_osd);
err = __reset_osd(osdc, osd);
- if (err == -EAGAIN)
+ if (err)
return;
list_for_each_entry(req, &osd->o_requests, r_osd_item) {
@@ -671,6 +671,7 @@ static int __reset_osd(struct ceph_osd_client *osdc,
struct ceph_osd *osd)
if (list_empty(&osd->o_requests) &&
list_empty(&osd->o_linger_requests)) {
__remove_osd(osdc, osd);
+ ret = -ENODEV;
} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
&osd->o_con.peer_addr,
sizeof(osd->o_con.peer_addr)) == 0 &&
--
1.7.9.5
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/4] libceph: register request before unregister linger
2012-12-10 0:33 [PATCH 0/4] libceph: four bug fixes Alex Elder
2012-12-10 0:35 ` [PATCH 1/4] libceph: avoid using freed osd in __kick_osd_requests() Alex Elder
@ 2012-12-10 0:35 ` Alex Elder
2012-12-10 0:36 ` [PATCH 3/4] libceph: socket can close in any connection state Alex Elder
2012-12-10 0:36 ` [PATCH 4/4] libceph: init osd->o_node in create_osd() Alex Elder
3 siblings, 0 replies; 5+ messages in thread
From: Alex Elder @ 2012-12-10 0:35 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org
In kick_requests(), we need to register the request before we
unregister the linger request. Otherwise the unregister will
reset the request's osd pointer to NULL.
Signed-off-by: Alex Elder <elder@inktank.com>
---
net/ceph/osd_client.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 79a071b..1a42701 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1253,8 +1253,8 @@ static void kick_requests(struct ceph_osd_client
*osdc, int force_resend)
dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1);
- __unregister_linger_request(osdc, req);
__register_request(osdc, req);
+ __unregister_linger_request(osdc, req);
}
mutex_unlock(&osdc->request_mutex);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 3/4] libceph: socket can close in any connection state
2012-12-10 0:33 [PATCH 0/4] libceph: four bug fixes Alex Elder
2012-12-10 0:35 ` [PATCH 1/4] libceph: avoid using freed osd in __kick_osd_requests() Alex Elder
2012-12-10 0:35 ` [PATCH 2/4] libceph: register request before unregister linger Alex Elder
@ 2012-12-10 0:36 ` Alex Elder
2012-12-10 0:36 ` [PATCH 4/4] libceph: init osd->o_node in create_osd() Alex Elder
3 siblings, 0 replies; 5+ messages in thread
From: Alex Elder @ 2012-12-10 0:36 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org
A connection's socket can close for any reason, independent of the
state of the connection (and without irrespective of the connection
mutex). As a result, the connectino can be in pretty much any state
at the time its socket is closed.
Handle those other cases at the top of con_work(). Pull this whole
block of code into a separate function to reduce the clutter.
Signed-off-by: Alex Elder <elder@inktank.com>
---
net/ceph/messenger.c | 47 ++++++++++++++++++++++++++++++-----------------
1 file changed, 30 insertions(+), 17 deletions(-)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1041114..4b04ccc 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2273,6 +2273,35 @@ static void queue_con(struct ceph_connection *con)
(void) queue_con_delay(con, 0);
}
+static bool con_sock_closed(struct ceph_connection *con)
+{
+ if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags))
+ return false;
+
+#define CASE(x) \
+ case CON_STATE_ ## x: \
+ con->error_msg = "socket closed (con state " #x ")"; \
+ break;
+
+ switch (con->state) {
+ CASE(CLOSED);
+ CASE(PREOPEN);
+ CASE(CONNECTING);
+ CASE(NEGOTIATING);
+ CASE(OPEN);
+ CASE(STANDBY);
+ default:
+ pr_warning("%s con %p unrecognized state %lu\n",
+ __func__, con, con->state);
+ con->error_msg = "unrecognized con state";
+ BUG();
+ break;
+ }
+#undef CASE
+
+ return true;
+}
+
/*
* Do some work on a connection. Drop a connection ref when we're done.
*/
@@ -2284,24 +2313,8 @@ static void con_work(struct work_struct *work)
mutex_lock(&con->mutex);
restart:
- if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
- switch (con->state) {
- case CON_STATE_CONNECTING:
- con->error_msg = "connection failed";
- break;
- case CON_STATE_NEGOTIATING:
- con->error_msg = "negotiation failed";
- break;
- case CON_STATE_OPEN:
- con->error_msg = "socket closed";
- break;
- default:
- dout("unrecognized con state %d\n", (int)con->state);
- con->error_msg = "unrecognized con state";
- BUG();
- }
+ if (con_sock_closed(con))
goto fault;
- }
if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
dout("con_work %p backing off\n", con);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 4/4] libceph: init osd->o_node in create_osd()
2012-12-10 0:33 [PATCH 0/4] libceph: four bug fixes Alex Elder
` (2 preceding siblings ...)
2012-12-10 0:36 ` [PATCH 3/4] libceph: socket can close in any connection state Alex Elder
@ 2012-12-10 0:36 ` Alex Elder
3 siblings, 0 replies; 5+ messages in thread
From: Alex Elder @ 2012-12-10 0:36 UTC (permalink / raw)
To: ceph-devel@vger.kernel.org
It turns out to be harmless but the red-black node o_node in the
ceph osd structure is not initialized in create_osd(). Add a
call to rb_init_node() initialize it.
Signed-off-by: Alex Elder <elder@inktank.com>
---
net/ceph/osd_client.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1a42701..c018e52 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -568,6 +568,7 @@ static struct ceph_osd *create_osd(struct
ceph_osd_client *osdc, int onum)
atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc;
osd->o_osd = onum;
+ rb_init_node(&osd->o_node);
INIT_LIST_HEAD(&osd->o_requests);
INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 5+ messages in thread