xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wei Liu <wei.liu2@citrix.com>
To: xen-devel@lists.xen.org
Cc: Wei Liu <wei.liu2@citrix.com>, ian.jackson@eu.citrix.com
Subject: [PATCH 2/2] Switch to poll() in cxenstored's IO loop
Date: Mon, 25 Mar 2013 11:17:31 +0000	[thread overview]
Message-ID: <1364210251-12626-3-git-send-email-wei.liu2@citrix.com> (raw)
In-Reply-To: <1364210251-12626-1-git-send-email-wei.liu2@citrix.com>

Poll() can support more file descriptors than select(). We've done this for
xenconsoled, now do this for cxenstored as well.

The code is taken from xenconsoled and modified to adapt to cxenstored.

Note that poll() semantic is a bit different from select(). In Linux, if a fd
is set in IN/OUT fd_set and error occurs inside select(), this fd is still
considered readable / writable, and it is set in the returned IN/OUT fd_set.
So in later handle_input / handle_output, the connection will eventually be
talloc_free'ed(). After switching to poll(), we should take care of any error
right away, making the code clearer.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 tools/xenstore/xenstored_core.c |  191 +++++++++++++++++++++++++++------------
 tools/xenstore/xenstored_core.h |    2 +
 2 files changed, 133 insertions(+), 60 deletions(-)

diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bd44645..a2cf2a6 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -19,7 +19,7 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/select.h>
+#include <poll.h>
 #ifndef NO_SOCKETS
 #include <sys/socket.h>
 #include <sys/un.h>
@@ -55,6 +55,12 @@
 #include "hashtable.h"
 
 extern xc_evtchn *xce_handle; /* in xenstored_domain.c */
+static int xce_pollfd_idx = -1;
+static struct pollfd *fds;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
 
 static bool verbose = false;
 LIST_HEAD(connections);
@@ -62,6 +68,7 @@ static int tracefd = -1;
 static bool recovery = true;
 static bool remove_local = true;
 static int reopen_log_pipe[2];
+static int reopen_log_pipe0_pollfd_idx = -1;
 static char *tracefile = NULL;
 static TDB_CONTEXT *tdb_ctx = NULL;
 
@@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type)
 /**
  * Signal handler for SIGHUP, which requests that the trace log is reopened
  * (in the main loop).  A single byte is written to reopen_log_pipe, to awaken
- * the select() in the main loop.
+ * the poll() in the main loop.
  */
 static void trigger_reopen_log(int signal __attribute__((unused)))
 {
@@ -279,15 +286,12 @@ static int destroy_conn(void *_conn)
 
 	/* Flush outgoing if possible, but don't block. */
 	if (!conn->domain) {
-		fd_set set;
-		struct timeval none;
-
-		FD_ZERO(&set);
-		FD_SET(conn->fd, &set);
-		none.tv_sec = none.tv_usec = 0;
+		struct pollfd pfd;
+		pfd.fd = conn->fd;
+		pfd.events = POLLOUT;
 
 		while (!list_empty(&conn->out_list)
-		       && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+		       && poll(&pfd, 1, 0) == 1)
 			if (!write_messages(conn))
 				break;
 		close(conn->fd);
@@ -299,53 +303,77 @@ static int destroy_conn(void *_conn)
 	return 0;
 }
 
-
-static void set_fd(int fd, fd_set *set, int *max)
+/* This function returns index inside the array if succeed, -1 if fail */
+static int set_fd(int fd, short events)
 {
-	if (fd < 0)
-		return;
-	FD_SET(fd, set);
-	if (fd > *max)
-		*max = fd;
-}
+	int ret;
+	if (current_array_size < nr_fds + 1) {
+		struct pollfd *new_fds = NULL;
+		unsigned long newsize;
+
+		/* Round up to 2^8 boundary, in practice this just
+		 * make newsize larger than current_array_size.
+		 */
+		newsize = ROUNDUP(nr_fds + 1, 8);
+
+		new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
+		if (!new_fds)
+			goto fail;
+		fds = new_fds;
+
+		memset(&fds[0] + current_array_size, 0,
+		       sizeof(struct pollfd ) * (newsize-current_array_size));
+		current_array_size = newsize;
+	}
 
+	fds[nr_fds].fd = fd;
+	fds[nr_fds].events = events;
+	ret = nr_fds;
+	nr_fds++;
 
-static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
-			  struct timeval **ptimeout)
+	return ret;
+fail:
+	syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+	return -1;
+}
+
+static void initialize_fds(int sock, int *p_sock_pollfd_idx,
+			   int ro_sock, int *p_ro_sock_pollfd_idx,
+			   int *ptimeout)
 {
-	static struct timeval zero_timeout = { 0 };
 	struct connection *conn;
-	int max = -1;
 
-	*ptimeout = NULL;
+	if (fds)
+		memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+	nr_fds = 0;
 
-	FD_ZERO(inset);
-	FD_ZERO(outset);
+	*ptimeout = -1;
 
 	if (sock != -1)
-		set_fd(sock, inset, &max);
+		*p_sock_pollfd_idx = set_fd(sock, POLLIN|POLLPRI);
 	if (ro_sock != -1)
-		set_fd(ro_sock, inset, &max);
+		*p_ro_sock_pollfd_idx = set_fd(ro_sock, POLLIN|POLLPRI);
 	if (reopen_log_pipe[0] != -1)
-		set_fd(reopen_log_pipe[0], inset, &max);
+		reopen_log_pipe0_pollfd_idx =
+			set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
 
 	if (xce_handle != NULL)
-		set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+		xce_pollfd_idx = set_fd(xc_evtchn_fd(xce_handle),
+					POLLIN|POLLPRI);
 
 	list_for_each_entry(conn, &connections, list) {
 		if (conn->domain) {
 			if (domain_can_read(conn) ||
 			    (domain_can_write(conn) &&
 			     !list_empty(&conn->out_list)))
-				*ptimeout = &zero_timeout;
+				*ptimeout = 0;
 		} else {
-			set_fd(conn->fd, inset, &max);
+			short events = POLLIN|POLLPRI;
 			if (!list_empty(&conn->out_list))
-				FD_SET(conn->fd, outset);
+				events |= POLLOUT;
+			conn->pollfd_idx = set_fd(conn->fd, events);
 		}
 	}
-
-	return max;
 }
 
 /* Is child a subnode of parent, or equal? */
@@ -1330,6 +1358,7 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
 		return NULL;
 
 	new->fd = -1;
+	new->pollfd_idx = -1;
 	new->write = write;
 	new->read = read;
 	new->can_write = true;
@@ -1770,14 +1799,13 @@ int priv_domid = 0;
 
 int main(int argc, char *argv[])
 {
-	int opt, *sock, *ro_sock, max;
-	fd_set inset, outset;
+	int opt, *sock, *ro_sock;
+	int sock_pollfd_idx = -1, ro_sock_pollfd_idx = -1;
 	bool dofork = true;
 	bool outputpid = false;
 	bool no_domain_init = false;
 	const char *pidfile = NULL;
-	int evtchn_fd = -1;
-	struct timeval *timeout;
+	int timeout;
 
 	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
 				  NULL)) != -1) {
@@ -1880,11 +1908,9 @@ int main(int argc, char *argv[])
 
 	signal(SIGHUP, trigger_reopen_log);
 
-	if (xce_handle != NULL)
-		evtchn_fd = xc_evtchn_fd(xce_handle);
-
 	/* Get ready to listen to the tools. */
-	max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
+	initialize_fds(*sock, &sock_pollfd_idx, *ro_sock, &ro_sock_pollfd_idx,
+		       &timeout);
 
 	/* Tell the kernel we're up and running. */
 	xenbus_notify_running();
@@ -1893,27 +1919,57 @@ int main(int argc, char *argv[])
 	for (;;) {
 		struct connection *conn, *next;
 
-		if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
+		if (poll(fds, nr_fds, timeout) < 0) {
 			if (errno == EINTR)
 				continue;
-			barf_perror("Select failed");
+			barf_perror("Poll failed");
 		}
 
-		if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
-			char c;
-			if (read(reopen_log_pipe[0], &c, 1) != 1)
-				barf_perror("read failed");
-			reopen_log();
+		if (reopen_log_pipe0_pollfd_idx != -1) {
+			if (fds[reopen_log_pipe0_pollfd_idx].revents
+			    & ~POLLIN) {
+				close(reopen_log_pipe[0]);
+				close(reopen_log_pipe[1]);
+				init_pipe(reopen_log_pipe);
+			} else if (fds[reopen_log_pipe0_pollfd_idx].revents
+				   & POLLIN) {
+				char c;
+				if (read(reopen_log_pipe[0], &c, 1) != 1)
+					barf_perror("read failed");
+				reopen_log();
+			}
+			reopen_log_pipe0_pollfd_idx = -1;
 		}
 
-		if (*sock != -1 && FD_ISSET(*sock, &inset))
-			accept_connection(*sock, true);
+		if (sock_pollfd_idx != -1) {
+			if (fds[sock_pollfd_idx].revents & ~POLLIN) {
+				barf_perror("sock poll failed");
+				break;
+			} else if (fds[sock_pollfd_idx].revents & POLLIN) {
+				accept_connection(*sock, true);
+				sock_pollfd_idx = -1;
+			}
+		}
 
-		if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset))
-			accept_connection(*ro_sock, false);
+		if (ro_sock_pollfd_idx != -1) {
+			if (fds[ro_sock_pollfd_idx].revents & ~POLLIN) {
+				barf_perror("ro sock poll failed");
+				break;
+			} else if (fds[ro_sock_pollfd_idx].revents & POLLIN) {
+				accept_connection(*ro_sock, false);
+				ro_sock_pollfd_idx = -1;
+			}
+		}
 
-		if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
-			handle_event();
+		if (xce_pollfd_idx != -1) {
+			if (fds[xce_pollfd_idx].revents & ~POLLIN) {
+				barf_perror("xce_handle poll failed");
+				break;
+			} else if (fds[xce_pollfd_idx].revents & POLLIN) {
+				handle_event();
+				xce_pollfd_idx = -1;
+			}
+		}
 
 		next = list_entry(connections.next, typeof(*conn), list);
 		if (&next->list != &connections)
@@ -1939,21 +1995,36 @@ int main(int argc, char *argv[])
 				if (talloc_free(conn) == 0)
 					continue;
 			} else {
-				if (FD_ISSET(conn->fd, &inset))
-					handle_input(conn);
+				if (conn->pollfd_idx != -1) {
+					if (fds[conn->pollfd_idx].revents
+					    & ~(POLLIN|POLLOUT))
+						talloc_free(conn);
+					else if (fds[conn->pollfd_idx].revents
+						 & POLLIN)
+						handle_input(conn);
+				}
 				if (talloc_free(conn) == 0)
 					continue;
 
 				talloc_increase_ref_count(conn);
-				if (FD_ISSET(conn->fd, &outset))
-					handle_output(conn);
+
+				if (conn->pollfd_idx != -1) {
+					if (fds[conn->pollfd_idx].revents
+					    & ~(POLLIN|POLLOUT))
+						talloc_free(conn);
+					else if (fds[conn->pollfd_idx].revents
+						 & POLLOUT)
+						handle_output(conn);
+				}
 				if (talloc_free(conn) == 0)
 					continue;
+
+				conn->pollfd_idx = -1;
 			}
 		}
 
-		max = initialize_set(&inset, &outset, *sock, *ro_sock,
-				     &timeout);
+		initialize_fds(*sock, &sock_pollfd_idx, *ro_sock,
+			       &ro_sock_pollfd_idx, &timeout);
 	}
 }
 
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 492ca0d..cfbcf6f 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -60,6 +60,8 @@ struct connection
 
 	/* The file descriptor we came in on. */
 	int fd;
+	/* The index of pollfd in global pollfd array */
+	int pollfd_idx;
 
 	/* Who am I? 0 for socket connections. */
 	unsigned int id;
-- 
1.7.10.4

  parent reply	other threads:[~2013-03-25 11:17 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-25 11:17 [PATCH 0/2 V3] Switch to poll() in cxenstored's IO loop Wei Liu
2013-03-25 11:17 ` [PATCH 1/2] mini-os: implement poll(2) Wei Liu
2013-03-25 11:17 ` Wei Liu [this message]
2013-04-11 14:54 ` [PATCH 0/2 V3] Switch to poll() in cxenstored's IO loop Ian Campbell
  -- strict thread matches above, loose matches on Subject: below --
2013-02-20 11:05 [PATCH 0/2] switch " Wei Liu
2013-02-20 11:05 ` [PATCH 2/2] Switch " Wei Liu
2013-03-12 15:27   ` Ian Campbell
2013-03-12 15:34     ` Wei Liu
2013-03-12 15:37       ` Ian Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1364210251-12626-3-git-send-email-wei.liu2@citrix.com \
    --to=wei.liu2@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).