* [cld patch 1/1] fix crash in cldc_close due to array reallocations
@ 2010-05-15 19:32 Pete Zaitcev
2010-05-15 20:36 ` Jeff Garzik
0 siblings, 1 reply; 2+ messages in thread
From: Pete Zaitcev @ 2010-05-15 19:32 UTC (permalink / raw)
To: Jeff Garzik; +Cc: Project Hail List
A long-running tabled is virtually guaranteed to crash when it's shut
down, with the traceback like this:
Program terminated with signal 11, Segmentation fault.
#0 cldc_close (fh=0x10a71c8, copts=0x7fff2fdef780) at cldc.c:1049
1049 if (sess->expired) {
(gdb) where
#0 cldc_close (fh=0x10a71c8, copts=0x7fff2fdef780) at cldc.c:1049
#1 0x00007f8a878ed7c7 in ncld_close (fh=0x10a7790) at cldc.c:2215
#2 0x00007f8a878ed918 in ncld_sess_close (nsess=0x10a6f10) at cldc.c:2247
#3 0x0000000000406252 in cld_end () at cldu.c:701
#4 0x000000000040d6c4 in main (argc=<value optimized out>,
argv=<value optimized out>) at server.c:1980
(gdb) print *fh
$3 = {fh = 140232956496032, sess = 0x0, valid = 176}
Obviously the boolean value should not be 176.
The reason this happens is array reallocation. The API for libcldc
(used internally by ncld) returns a pointer to struct cldc_fh that
the user is expected to save. However, this structure is placed
inside a GArray. As array expands, it can be relocated in memory
and the file handle pointer is left hanging. Tabled is vulnerable
to this because 1) it keeps an open handle for the CLD file that
it locked and 2) it periodically opens and closes CLD files for
chunkservers, causing array to grow.
We observed that the indexes into the array are not the same
as file handles. Therefore, we never access elements by index
and thus it is not necessary to use an array structure.
This patch replaces array with a list, so that pointers to handle
structures can be stable.
Also, the patch frees the handles when they are closed, thus
ending the endless growth of the file handle table. This is
done in a compatible way, so handle is available to callers
of cldc_close (ncld does not need it, but it may be useful).
Although we change globally visible structures, we only replace
one ostensibly opaque pointer type (GArray) with another one
(GList), so applications need not be rebuilt.
Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
---
include/cldc.h | 2 -
lib/cldc.c | 56 +++++++++++++++++++++++++++++++----------------
2 files changed, 38 insertions(+), 20 deletions(-)
commit 93c91d8ce452f03a774055f156ada967d1b6b0a5
Author: Pete Zaitcev <zaitcev@yahoo.com>
Date: Sat May 15 12:21:43 2010 -0600
Switch from array to list. Fixes reallocation and unbounded growth.
diff --git a/include/cldc.h b/include/cldc.h
index 12acd32..c64eef9 100644
--- a/include/cldc.h
+++ b/include/cldc.h
@@ -110,7 +110,7 @@ struct cldc_session {
uint8_t addr[64]; /* server address */
size_t addr_len;
- GArray *fh; /* file handle table */
+ GList *cfh; /* cldc_fh table */
GList *out_msg;
time_t msg_scan_time;
diff --git a/lib/cldc.c b/lib/cldc.c
index 70e765f..72472f2 100644
--- a/lib/cldc.c
+++ b/lib/cldc.c
@@ -280,7 +280,7 @@ static int rxmsg_event(struct cldc_session *sess,
XDR xdrs;
struct cld_msg_event ev;
struct cldc_fh *fh = NULL;
- int i;
+ GList *tmp;
xdrmem_create(&xdrs, sess->msg_buf, sess->msg_buf_len, XDR_DECODE);
if (!xdr_cld_msg_event(&xdrs, &ev)) {
@@ -291,8 +291,8 @@ static int rxmsg_event(struct cldc_session *sess,
}
xdr_destroy(&xdrs);
- for (i = 0; i < sess->fh->len; i++) {
- fh = &g_array_index(sess->fh, struct cldc_fh, i);
+ for (tmp = sess->cfh; tmp; tmp = tmp->next) {
+ fh = tmp->data;
if (fh->fh == ev.fh)
break;
else
@@ -765,8 +765,11 @@ static void sess_free(struct cldc_session *sess)
if (!sess)
return;
- if (sess->fh)
- g_array_free(sess->fh, TRUE);
+ if (sess->cfh) {
+ for (tmp = sess->cfh; tmp; tmp = tmp->next)
+ free(tmp->data);
+ g_list_free(sess->cfh);
+ }
tmp = sess->out_msg;
while (tmp) {
@@ -775,7 +778,7 @@ static void sess_free(struct cldc_session *sess)
}
g_list_free(sess->out_msg);
- memset(sess, 0, sizeof(*sess));
+ memset(sess, 0x55, sizeof(*sess));
free(sess);
}
@@ -846,7 +849,6 @@ static int cldc_new_sess_log(const struct cldc_ops *ops,
sess->private = private;
sess->ops = ops;
sess->log = *log; /* save off caller's stack */
- sess->fh = g_array_sized_new(FALSE, TRUE, sizeof(struct cldc_fh), 16);
strcpy(sess->user, user);
strcpy(sess->secret_key, secret_key);
@@ -994,9 +996,8 @@ int cldc_open(struct cldc_session *sess,
{
struct cldc_msg *msg;
struct cld_msg_open open;
- struct cldc_fh fh, *fhtmp;
+ struct cldc_fh *fh;
size_t plen;
- int fh_idx;
*fh_out = NULL;
@@ -1020,22 +1021,38 @@ int cldc_open(struct cldc_session *sess,
if (!msg)
return -ENOMEM;
- /* add fh to fh table; get pointer to new fh */
- memset(&fh, 0, sizeof(fh));
- fh.sess = sess;
- fh_idx = sess->fh->len;
- g_array_append_val(sess->fh, fh);
-
- fhtmp = &g_array_index(sess->fh, struct cldc_fh, fh_idx);
+ fh = calloc(1, sizeof(*fh));
+ if (!fh) {
+ cldc_msg_free(msg);
+ return -ENOMEM;
+ }
+ fh->sess = sess;
+ sess->cfh = g_list_append(sess->cfh, fh); // kept open - to front
msg->cb = open_end_cb;
- msg->cb_private = fhtmp;
+ msg->cb_private = fh;
- *fh_out = fhtmp;
+ *fh_out = fh;
return sess_send(sess, msg);
}
+static ssize_t close_end_cb(struct cldc_msg *msg, const void *resp_p,
+ size_t resp_len, enum cle_err_codes resp_rc)
+{
+ struct cldc_fh *fh = msg->cb_private;
+ struct cldc_session *sess = fh->sess;
+
+ if (msg->copts.cb)
+ return msg->copts.cb(&msg->copts, resp_rc);
+
+ sess->cfh = g_list_remove(sess->cfh, fh);
+ memset(fh, 0x77, sizeof(*fh));
+ free(fh);
+
+ return 0;
+}
+
int cldc_close(struct cldc_fh *fh, const struct cldc_call_opts *copts)
{
struct cldc_session *sess;
@@ -1061,7 +1078,8 @@ int cldc_close(struct cldc_fh *fh, const struct cldc_call_opts *copts)
/* mark FH as invalid from this point forward */
fh->valid = false;
- msg->cb = generic_end_cb;
+ msg->cb = close_end_cb;
+ msg->cb_private = fh;
return sess_send(sess, msg);
}
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [cld patch 1/1] fix crash in cldc_close due to array reallocations
2010-05-15 19:32 [cld patch 1/1] fix crash in cldc_close due to array reallocations Pete Zaitcev
@ 2010-05-15 20:36 ` Jeff Garzik
0 siblings, 0 replies; 2+ messages in thread
From: Jeff Garzik @ 2010-05-15 20:36 UTC (permalink / raw)
To: Pete Zaitcev; +Cc: Project Hail List
On 05/15/2010 03:32 PM, Pete Zaitcev wrote:
> A long-running tabled is virtually guaranteed to crash when it's shut
> down, with the traceback like this:
>
> Program terminated with signal 11, Segmentation fault.
> #0 cldc_close (fh=0x10a71c8, copts=0x7fff2fdef780) at cldc.c:1049
> 1049 if (sess->expired) {
> (gdb) where
> #0 cldc_close (fh=0x10a71c8, copts=0x7fff2fdef780) at cldc.c:1049
> #1 0x00007f8a878ed7c7 in ncld_close (fh=0x10a7790) at cldc.c:2215
> #2 0x00007f8a878ed918 in ncld_sess_close (nsess=0x10a6f10) at cldc.c:2247
> #3 0x0000000000406252 in cld_end () at cldu.c:701
> #4 0x000000000040d6c4 in main (argc=<value optimized out>,
> argv=<value optimized out>) at server.c:1980
> (gdb) print *fh
> $3 = {fh = 140232956496032, sess = 0x0, valid = 176}
>
> Obviously the boolean value should not be 176.
>
> The reason this happens is array reallocation. The API for libcldc
> (used internally by ncld) returns a pointer to struct cldc_fh that
> the user is expected to save. However, this structure is placed
> inside a GArray. As array expands, it can be relocated in memory
> and the file handle pointer is left hanging. Tabled is vulnerable
> to this because 1) it keeps an open handle for the CLD file that
> it locked and 2) it periodically opens and closes CLD files for
> chunkservers, causing array to grow.
>
> We observed that the indexes into the array are not the same
> as file handles. Therefore, we never access elements by index
> and thus it is not necessary to use an array structure.
>
> This patch replaces array with a list, so that pointers to handle
> structures can be stable.
>
> Also, the patch frees the handles when they are closed, thus
> ending the endless growth of the file handle table. This is
> done in a compatible way, so handle is available to callers
> of cldc_close (ncld does not need it, but it may be useful).
>
> Although we change globally visible structures, we only replace
> one ostensibly opaque pointer type (GArray) with another one
> (GList), so applications need not be rebuilt.
>
> Signed-off-by: Pete Zaitcev<zaitcev@redhat.com>
applied
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2010-05-15 20:36 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-15 19:32 [cld patch 1/1] fix crash in cldc_close due to array reallocations Pete Zaitcev
2010-05-15 20:36 ` Jeff Garzik
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.