All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pete Zaitcev <zaitcev@redhat.com>
To: Jeff Garzik <jeff@garzik.org>
Cc: Project Hail List <hail-devel@vger.kernel.org>,
	Jeff Darcy <jdarcy@redhat.com>
Subject: [Patch 2/7] tabled: fix the endless recusion when reading long objects
Date: Thu, 1 Apr 2010 19:51:16 -0600	[thread overview]
Message-ID: <20100401195116.32142697@redhat.com> (raw)

At certain network and disk speeds, tabled can blow its stack by
filling it with (essentially) endless recursion:

#2  0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb910, done=<value optimized out>) at server.c:397
#3  0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525
#4  0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561
#5  0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039
#6  0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb8d0, done=<value optimized out>) at server.c:397
#7  0x000000000040ca55 in cli_writable (cli=0x686e90) at server.c:525
#8  0x000000000040da65 in cli_write_start (cli=0x686e90) at server.c:561
#9  0x0000000000408ad5 in object_get_poke (cli=0x686e90) at object.c:1039
#10 0x000000000040c077 in cli_write_free (cli=<value optimized out>, tmp=
    0x7bb890, done=<value optimized out>) at server.c:397

The fix is to deliver callbacks only from the top level.

Callbacks must be delivered every time a send is completed,
which amounts to every call to is_writeable(). Since there
is a large number of callers to it, we found it advantageous
to run callbacks from every source of events. In other words,
every function that is passed to event_set must invoke
cli_write_run_compl. Mind that storage.c contains calls to
event_set.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>

---
 server/object.c |    4 +++
 server/server.c |   52 +++++++++++++++++++++++++++++++++++-----------
 server/tabled.h |    6 +++++
 3 files changed, 50 insertions(+), 12 deletions(-)

commit 88959fb8c4cc8232f92805618c5bcaeee9099390
Author: Pete Zaitcev <zaitcev@yahoo.com>
Date:   Thu Apr 1 19:04:13 2010 -0600

    Fix the endless recursion when reading long objects.

diff --git a/server/object.c b/server/object.c
index d61a446..6d32316 100644
--- a/server/object.c
+++ b/server/object.c
@@ -994,6 +994,9 @@ static bool object_get_poke(struct client *cli)
 	char *buf;
 	ssize_t bytes;
 
+	/* XXX flow control - what treshold? */
+	/*  if (cli_wqueued(cli) >= 1000000000) return false; */
+
 	buf = malloc(CLI_DATA_BUF_SZ);
 	if (!buf)
 		return false;
@@ -1052,6 +1055,7 @@ static bool object_get_more(struct client *cli, void *cb_data, bool done)
 static void object_get_event(struct open_chunk *ochunk)
 {
 	object_get_poke(ochunk->cli);
+	cli_write_run_compl(ochunk->cli);
 }
 
 bool object_get_body(struct client *cli, const char *user, const char *bucket,
diff --git a/server/server.c b/server/server.c
index 72db151..600c8de 100644
--- a/server/server.c
+++ b/server/server.c
@@ -380,6 +380,8 @@ static void stats_dump(void)
 	       tabled_srv.stats.event,
 	       tabled_srv.stats.tcp_accept,
 	       tabled_srv.stats.opt_write);
+	applog(LOG_INFO, "DEBUG: max_write_buf %lu",
+	       tabled_srv.stats.max_write_buf);
 	stor_stats();
 	rep_stats();
 }
@@ -405,16 +407,24 @@ bool stat_status(struct client *cli, GList *content)
 	content = g_list_append(content, str);
 	if (asprintf(&str,
 		     "<p>Stats: "
-		     "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n",
+		     "poll %lu event %lu tcp_accept %lu opt_write %lu</p>\r\n"
+		     "<p>Debug: max_write_buf %lu</p>\r\n",
 		     tabled_srv.stats.poll,
 		     tabled_srv.stats.event,
 		     tabled_srv.stats.tcp_accept,
-		     tabled_srv.stats.opt_write) < 0)
+		     tabled_srv.stats.opt_write,
+		     tabled_srv.stats.max_write_buf) < 0)
 		return false;
 	content = g_list_append(content, str);
 	return true;
 }
 
+static void cli_write_complete(struct client *cli, struct client_write *tmp)
+{
+	list_del(&tmp->node);
+	list_add_tail(&tmp->node, &cli->write_compl_q);
+}
+
 static bool cli_write_free(struct client *cli, struct client_write *tmp,
 			   bool done)
 {
@@ -433,11 +443,28 @@ static void cli_write_free_all(struct client *cli)
 {
 	struct client_write *wr, *tmp;
 
+	list_for_each_entry_safe(wr, tmp, &cli->write_compl_q, node) {
+		cli_write_free(cli, wr, true);
+	}
 	list_for_each_entry_safe(wr, tmp, &cli->write_q, node) {
 		cli_write_free(cli, wr, false);
 	}
 }
 
+bool cli_write_run_compl(struct client *cli)
+{
+	struct client_write *wr;
+	bool do_loop;
+
+	do_loop = false;
+	while (!list_empty(&cli->write_compl_q)) {
+		wr = list_entry(cli->write_compl_q.next, struct client_write,
+				node);
+		do_loop |= cli_write_free(cli, wr, true);
+	}
+	return do_loop;
+}
+
 static void cli_free(struct client *cli)
 {
 	cli_write_free_all(cli);
@@ -455,6 +482,9 @@ static void cli_free(struct client *cli)
 
 	req_free(&cli->req);
 
+	if (cli->write_cnt_max > tabled_srv.stats.max_write_buf)
+		tabled_srv.stats.max_write_buf = cli->write_cnt_max;
+
 	if (debugging)
 		applog(LOG_INFO, "client %s ended", cli->addr_host);
 
@@ -505,10 +535,6 @@ static void cli_writable(struct client *cli)
 	struct client_write *tmp;
 	ssize_t rc;
 	struct iovec iov[CLI_MAX_WR_IOV];
-	bool more_work;
-
-restart:
-	more_work = false;
 
 	/* accumulate pending writes into iovec */
 	n_iov = 0;
@@ -548,16 +574,13 @@ do_write:
 		rc -= sz;
 
 		/* if tmp->len reaches zero, write is complete,
-		 * call callback and clean up
+		 * so schedule it for clean up (cannot call callback
+		 * right away or an endless recursion will result)
 		 */
 		if (tmp->togo == 0)
-			if (cli_write_free(cli, tmp, true))
-				more_work = true;
+			cli_write_complete(cli, tmp);
 	}
 
-	if (more_work)
-		goto restart;
-
 	/* if we emptied the queue, clear write notification */
 	if (list_empty(&cli->write_q)) {
 		cli->writing = false;
@@ -622,6 +645,8 @@ int cli_writeq(struct client *cli, const void *buf, unsigned int buflen,
 	wr->cb_data = cb_data;
 	list_add_tail(&wr->node, &cli->write_q);
 	cli->write_cnt += buflen;
+	if (cli->write_cnt > cli->write_cnt_max)
+		cli->write_cnt_max = cli->write_cnt;
 
 	return 0;
 }
@@ -1275,6 +1300,7 @@ static struct client *cli_alloc(bool is_status)
 	cli->state = evt_read_req;
 	cli->evt_table = is_status? evt_funcs_status: evt_funcs_server;
 	INIT_LIST_HEAD(&cli->write_q);
+	INIT_LIST_HEAD(&cli->write_compl_q);
 	INIT_LIST_HEAD(&cli->out_ch);
 	cli->req_ptr = cli->req_buf;
 	memset(&cli->req, 0, sizeof(cli->req) - sizeof(cli->req.hdr));
@@ -1287,6 +1313,7 @@ static void tcp_cli_wr_event(int fd, short events, void *userdata)
 	struct client *cli = userdata;
 
 	cli_writable(cli);
+	cli_write_run_compl(cli);
 }
 
 static void tcp_cli_event(int fd, short events, void *userdata)
@@ -1296,6 +1323,7 @@ static void tcp_cli_event(int fd, short events, void *userdata)
 
 	do {
 		loop = cli->evt_table[cli->state](cli, events);
+		loop |= cli_write_run_compl(cli);
 	} while (loop);
 }
 
diff --git a/server/tabled.h b/server/tabled.h
index b4f51ed..3ef4a49 100644
--- a/server/tabled.h
+++ b/server/tabled.h
@@ -164,8 +164,11 @@ struct client {
 	struct event		write_ev;
 
 	struct list_head	write_q;	/* list of async writes */
+	struct list_head	write_compl_q;	/* list of done writes */
 	size_t			write_cnt;	/* water level */
 	bool			writing;
+	/* some debugging stats */
+	size_t			write_cnt_max;
 
 	unsigned int		req_used;	/* amount of req_buf in use */
 	char			*req_ptr;	/* start of unexamined data */
@@ -212,6 +215,8 @@ struct server_stats {
 	unsigned long		event;		/* events dispatched */
 	unsigned long		tcp_accept;	/* TCP accepted cxns */
 	unsigned long		opt_write;	/* optimistic writes */
+
+	unsigned long		max_write_buf;
 };
 
 struct listen_cfg {
@@ -325,6 +330,7 @@ extern int cli_writeq(struct client *cli, const void *buf, unsigned int buflen,
 extern size_t cli_wqueued(struct client *cli);
 extern bool cli_cb_free(struct client *cli, void *cb_data, bool done);
 extern bool cli_write_start(struct client *cli);
+extern bool cli_write_run_compl(struct client *cli);
 extern int cli_req_avail(struct client *cli);
 extern void applog(int prio, const char *fmt, ...);
 extern int stor_update_cb(void);

             reply	other threads:[~2010-04-02  1:51 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-02  1:51 Pete Zaitcev [this message]
2010-04-06 17:02 ` [Patch 2/7] tabled: fix the endless recusion when reading long objects Jeff Garzik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100401195116.32142697@redhat.com \
    --to=zaitcev@redhat.com \
    --cc=hail-devel@vger.kernel.org \
    --cc=jdarcy@redhat.com \
    --cc=jeff@garzik.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.