All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Switch from select() to poll() in xenconsoled's IO loop
@ 2013-01-08 11:50 Wei Liu
  2013-01-08 12:52 ` Wei Liu
  0 siblings, 1 reply; 4+ messages in thread
From: Wei Liu @ 2013-01-08 11:50 UTC (permalink / raw)
  To: xen-devel; +Cc: ian.jackson, Wei Liu, ian.campbell, mats.petersson

In Linux select() typically supports up to 1024 file descriptors. This can be
a problem when user tries to boot up many guests. Switching to poll() has
minimum impact on existing code and has better scalibility.

pollfd array is dynamically allocated / reallocated. If the array fails to
expand, we just ignore the incoming fd.

Change from V2:
  * remove unnecessary malloc in initialize_pollfd_arrays
  * use ROUND_UP to get new size of arrays

Change from V3:
  * remove initialize and destroy function for array
  * embedded tracking structure in struct domain, eliminate fd_to_pollfd

Change from V4:
  * make xs_pollfd local to io.c
  * add back the 5 ms fuzz
  * handle POLLERR and POLLHUP
  * treat POLLPRI as error if set in revents
  * abort if xenconsoled's own fds get screwed
  * handle broken tty if tty's fds get screwed

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 tools/console/daemon/io.c |  189 +++++++++++++++++++++++++++++++--------------
 1 file changed, 131 insertions(+), 58 deletions(-)

diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
index 48fe151..8d16cac 100644
--- a/tools/console/daemon/io.c
+++ b/tools/console/daemon/io.c
@@ -28,7 +28,7 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <string.h>
-#include <sys/select.h>
+#include <poll.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <termios.h>
@@ -69,6 +69,7 @@ static int log_hv_fd = -1;
 static evtchn_port_or_error_t log_hv_evtchn = -1;
 static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
 static xc_evtchn *xce_handle = NULL;
+static struct pollfd *xce_pollfd = NULL;
 
 struct buffer {
 	char *data;
@@ -81,7 +82,9 @@ struct buffer {
 struct domain {
 	int domid;
 	int master_fd;
+	struct pollfd *master_pollfd;
 	int slave_fd;
+	struct pollfd *slave_pollfd;
 	int log_fd;
 	bool is_dead;
 	unsigned last_seen;
@@ -92,6 +95,7 @@ struct domain {
 	evtchn_port_or_error_t local_port;
 	evtchn_port_or_error_t remote_port;
 	xc_evtchn *xce_handle;
+	struct pollfd *xce_pollfd;
 	struct xencons_interface *interface;
 	int event_count;
 	long long next_period;
@@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
 	return (sizeof(intf->in) - space);
 }
 
+static void domain_handle_broken_tty(struct domain *dom, int recreate)
+{
+	domain_close_tty(dom);
+
+	if (recreate) {
+		domain_create_tty(dom);
+	} else {
+		shutdown_domain(dom);
+	}
+}
+
 static void handle_tty_read(struct domain *dom)
 {
 	ssize_t len = 0;
@@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
 	 * keep the slave open for the duration.
 	 */
 	if (len < 0) {
-		domain_close_tty(dom);
-
-		if (domain_is_valid(dom->domid)) {
-			domain_create_tty(dom);
-		} else {
-			shutdown_domain(dom);
-		}
+		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
 	} else if (domain_is_valid(dom->domid)) {
 		prod = intf->in_prod;
 		for (i = 0; i < len; i++) {
@@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
  	if (len < 1) {
 		dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
 		      dom->domid, len, errno);
-
-		domain_close_tty(dom);
-
-		if (domain_is_valid(dom->domid)) {
-			domain_create_tty(dom);
-		} else {
-			shutdown_domain(dom);
-		}
+		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
 	} else {
 		buffer_advance(&dom->buffer, len);
 	}
@@ -928,9 +930,53 @@ static void handle_log_reload(void)
 	}
 }
 
+struct pollfd *xs_pollfd;
+static struct pollfd  *fds;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
+static struct pollfd *set_fds(int fd, short events)
+{
+	struct pollfd *ret;
+	if (current_array_size < nr_fds + 1) {
+		struct pollfd  *new_fds = NULL;
+		unsigned long newsize;
+
+		/* Round up to 2^8 boundary, in practice this just
+		 * make newsize larger than current_array_size.
+		 */
+		newsize = ROUNDUP(nr_fds + 1, 8);
+
+		new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
+		if (!new_fds)
+			goto fail;
+		fds = new_fds;
+
+		memset(&fds[0] + current_array_size, 0,
+		       sizeof(struct pollfd) * (newsize-current_array_size));
+		current_array_size = newsize;
+	}
+
+	fds[nr_fds].fd = fd;
+	fds[nr_fds].events = events;
+	ret = &fds[nr_fds];
+	nr_fds++;
+
+	return ret;
+fail:
+	dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+	return NULL;
+}
+
+static void reset_fds(void)
+{
+	nr_fds = 0;
+	memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+}
+
 void handle_io(void)
 {
-	fd_set readfds, writefds;
 	int ret;
 
 	if (log_hv) {
@@ -959,21 +1005,17 @@ void handle_io(void)
 
 	for (;;) {
 		struct domain *d, *n;
-		int max_fd = -1;
-		struct timeval timeout;
+		int poll_timeout; /* timeout in milliseconds */
 		struct timespec ts;
 		long long now, next_timeout = 0;
 
-		FD_ZERO(&readfds);
-		FD_ZERO(&writefds);
+		reset_fds();
 
-		FD_SET(xs_fileno(xs), &readfds);
-		max_fd = MAX(xs_fileno(xs), max_fd);
+		xs_pollfd = set_fds(xs_fileno(xs), POLLIN|POLLPRI);
 
-		if (log_hv) {
-			FD_SET(xc_evtchn_fd(xce_handle), &readfds);
-			max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
-		}
+		if (log_hv)
+			xce_pollfd = set_fds(xc_evtchn_fd(xce_handle),
+					     POLLIN|POLLPRI);
 
 		if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
 			return;
@@ -982,10 +1024,12 @@ void handle_io(void)
 		/* Re-calculate any event counter allowances & unblock
 		   domains with new allowance */
 		for (d = dom_head; d; d = d->next) {
-			/* Add 5ms of fuzz since select() often returns
-			   a couple of ms sooner than requested. Without
-			   the fuzz we typically do an extra spin in select()
-			   with a 1/2 ms timeout every other iteration */
+			/* CS 16257:955ee4fa1345 introduces a 5ms fuzz
+			 * for select(), it is not clear poll() has
+			 * similar behavior (returning a couple of ms
+			 * sooner than requested) as well. Just leave
+			 * the fuzz here. Remove it with a separate
+			 * patch if necessary */
 			if ((now+5) > d->next_period) {
 				d->next_period = now + RATE_LIMIT_PERIOD;
 				if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
@@ -1006,75 +1050,101 @@ void handle_io(void)
 				    !d->buffer.max_capacity ||
 				    d->buffer.size < d->buffer.max_capacity) {
 					int evtchn_fd = xc_evtchn_fd(d->xce_handle);
-					FD_SET(evtchn_fd, &readfds);
-					max_fd = MAX(evtchn_fd, max_fd);
+					d->xce_pollfd = set_fds(evtchn_fd,
+								POLLIN|POLLPRI);
 				}
 			}
 
 			if (d->master_fd != -1) {
+				short events = 0;
 				if (!d->is_dead && ring_free_bytes(d))
-					FD_SET(d->master_fd, &readfds);
+					events |= POLLIN;
 
 				if (!buffer_empty(&d->buffer))
-					FD_SET(d->master_fd, &writefds);
-				max_fd = MAX(d->master_fd, max_fd);
+					events |= POLLOUT;
+
+				if (events)
+					d->master_pollfd =
+						set_fds(d->master_fd,
+							events|POLLPRI);
 			}
 		}
 
 		/* If any domain has been rate limited, we need to work
-		   out what timeout to supply to select */
+		   out what timeout to supply to poll */
 		if (next_timeout) {
 			long long duration = (next_timeout - now);
 			if (duration <= 0) /* sanity check */
 				duration = 1;
-			timeout.tv_sec = duration / 1000;
-			timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
-					   * 1000);
+			poll_timeout = (int)duration;
 		}
 
-		ret = select(max_fd + 1, &readfds, &writefds, 0,
-			     next_timeout ? &timeout : NULL);
+		ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
 
 		if (log_reload) {
 			handle_log_reload();
 			log_reload = 0;
 		}
 
-		/* Abort if select failed, except for EINTR cases
+		/* Abort if poll failed, except for EINTR cases
 		   which indicate a possible log reload */
 		if (ret == -1) {
 			if (errno == EINTR)
 				continue;
-			dolog(LOG_ERR, "Failure in select: %d (%s)",
+			dolog(LOG_ERR, "Failure in poll: %d (%s)",
 			      errno, strerror(errno));
 			break;
 		}
 
-		if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
-			handle_hv_logs();
+		if (log_hv && xce_pollfd) {
+			if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+				dolog(LOG_ERR,
+				      "Failure in poll xce_handle: %d (%s)",
+				      errno, strerror(errno));
+				break;
+			} else if (xce_pollfd->revents & POLLIN)
+				handle_hv_logs();
+		}
 
 		if (ret <= 0)
 			continue;
 
-		if (FD_ISSET(xs_fileno(xs), &readfds))
-			handle_xs();
+		if (xs_pollfd) {
+			if (xs_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+				dolog(LOG_ERR,
+				      "Failure in poll xs_handle: %d (%s)",
+				      errno, strerror(errno));
+				break;
+			} else if (xs_pollfd->revents & POLLIN)
+				handle_xs();
+		}
 
 		for (d = dom_head; d; d = n) {
 			n = d->next;
 			if (d->event_count < RATE_LIMIT_ALLOWANCE) {
 				if (d->xce_handle != NULL &&
-				    FD_ISSET(xc_evtchn_fd(d->xce_handle),
-					     &readfds))
-					handle_ring_read(d);
+				    d->xce_pollfd &&
+				    !(d->xce_pollfd->revents &
+				      ~(POLLIN|POLLOUT|POLLPRI)) &&
+				      (d->xce_pollfd->revents &
+				       POLLIN))
+				    handle_ring_read(d);
 			}
 
-			if (d->master_fd != -1 && FD_ISSET(d->master_fd,
-							   &readfds))
-				handle_tty_read(d);
-
-			if (d->master_fd != -1 && FD_ISSET(d->master_fd,
-							   &writefds))
-				handle_tty_write(d);
+			if (d->master_fd != -1 && d->master_pollfd) {
+				if (d->master_pollfd->revents &
+				    ~(POLLIN|POLLOUT|POLLPRI))
+					domain_handle_broken_tty(d,
+						   domain_is_valid(d->domid));
+				else {
+					if (d->master_pollfd->revents &
+					    POLLIN)
+						handle_tty_read(d);
+					if (d->master_pollfd->revents &
+					    POLLOUT)
+						handle_tty_write(d);
+				}
+			}
 
 			if (d->last_seen != enum_pass)
 				shutdown_domain(d);
@@ -1084,6 +1154,9 @@ void handle_io(void)
 		}
 	}
 
+	free(fds);
+	current_array_size = 0;
+
  out:
 	if (log_hv_fd != -1) {
 		close(log_hv_fd);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop
  2013-01-08 11:50 [PATCH] Switch from select() to poll() in xenconsoled's IO loop Wei Liu
@ 2013-01-08 12:52 ` Wei Liu
  2013-01-08 14:01   ` Mats Petersson
  0 siblings, 1 reply; 4+ messages in thread
From: Wei Liu @ 2013-01-08 12:52 UTC (permalink / raw)
  To: xen-devel@lists.xen.org
  Cc: Ian Jackson, wei.liu2, Ian Campbell, Mats Petersson

Just to be clear, this is version 5 of the patch.

Git send-email mysteriously dropped my subject prefix.


Wei. 

On Tue, 2013-01-08 at 11:50 +0000, Wei Liu wrote:
> In Linux select() typically supports up to 1024 file descriptors. This can be
> a problem when user tries to boot up many guests. Switching to poll() has
> minimum impact on existing code and has better scalibility.
> 
> pollfd array is dynamically allocated / reallocated. If the array fails to
> expand, we just ignore the incoming fd.
> 
> Change from V2:
>   * remove unnecessary malloc in initialize_pollfd_arrays
>   * use ROUND_UP to get new size of arrays
> 
> Change from V3:
>   * remove initialize and destroy function for array
>   * embedded tracking structure in struct domain, eliminate fd_to_pollfd
> 
> Change from V4:
>   * make xs_pollfd local to io.c
>   * add back the 5 ms fuzz
>   * handle POLLERR and POLLHUP
>   * treat POLLPRI as error if set in revents
>   * abort if xenconsoled's own fds get screwed
>   * handle broken tty if tty's fds get screwed
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>  tools/console/daemon/io.c |  189 +++++++++++++++++++++++++++++++--------------
>  1 file changed, 131 insertions(+), 58 deletions(-)
> 
> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
> index 48fe151..8d16cac 100644
> --- a/tools/console/daemon/io.c
> +++ b/tools/console/daemon/io.c
> @@ -28,7 +28,7 @@
>  #include <stdlib.h>
>  #include <errno.h>
>  #include <string.h>
> -#include <sys/select.h>
> +#include <poll.h>
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <termios.h>
> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
>  static evtchn_port_or_error_t log_hv_evtchn = -1;
>  static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
>  static xc_evtchn *xce_handle = NULL;
> +static struct pollfd *xce_pollfd = NULL;
>  
>  struct buffer {
>  	char *data;
> @@ -81,7 +82,9 @@ struct buffer {
>  struct domain {
>  	int domid;
>  	int master_fd;
> +	struct pollfd *master_pollfd;
>  	int slave_fd;
> +	struct pollfd *slave_pollfd;
>  	int log_fd;
>  	bool is_dead;
>  	unsigned last_seen;
> @@ -92,6 +95,7 @@ struct domain {
>  	evtchn_port_or_error_t local_port;
>  	evtchn_port_or_error_t remote_port;
>  	xc_evtchn *xce_handle;
> +	struct pollfd *xce_pollfd;
>  	struct xencons_interface *interface;
>  	int event_count;
>  	long long next_period;
> @@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
>  	return (sizeof(intf->in) - space);
>  }
>  
> +static void domain_handle_broken_tty(struct domain *dom, int recreate)
> +{
> +	domain_close_tty(dom);
> +
> +	if (recreate) {
> +		domain_create_tty(dom);
> +	} else {
> +		shutdown_domain(dom);
> +	}
> +}
> +
>  static void handle_tty_read(struct domain *dom)
>  {
>  	ssize_t len = 0;
> @@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
>  	 * keep the slave open for the duration.
>  	 */
>  	if (len < 0) {
> -		domain_close_tty(dom);
> -
> -		if (domain_is_valid(dom->domid)) {
> -			domain_create_tty(dom);
> -		} else {
> -			shutdown_domain(dom);
> -		}
> +		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>  	} else if (domain_is_valid(dom->domid)) {
>  		prod = intf->in_prod;
>  		for (i = 0; i < len; i++) {
> @@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
>   	if (len < 1) {
>  		dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
>  		      dom->domid, len, errno);
> -
> -		domain_close_tty(dom);
> -
> -		if (domain_is_valid(dom->domid)) {
> -			domain_create_tty(dom);
> -		} else {
> -			shutdown_domain(dom);
> -		}
> +		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>  	} else {
>  		buffer_advance(&dom->buffer, len);
>  	}
> @@ -928,9 +930,53 @@ static void handle_log_reload(void)
>  	}
>  }
>  
> +struct pollfd *xs_pollfd;
> +static struct pollfd  *fds;
> +static unsigned int current_array_size;
> +static unsigned int nr_fds;
> +
> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
> +static struct pollfd *set_fds(int fd, short events)
> +{
> +	struct pollfd *ret;
> +	if (current_array_size < nr_fds + 1) {
> +		struct pollfd  *new_fds = NULL;
> +		unsigned long newsize;
> +
> +		/* Round up to 2^8 boundary, in practice this just
> +		 * make newsize larger than current_array_size.
> +		 */
> +		newsize = ROUNDUP(nr_fds + 1, 8);
> +
> +		new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> +		if (!new_fds)
> +			goto fail;
> +		fds = new_fds;
> +
> +		memset(&fds[0] + current_array_size, 0,
> +		       sizeof(struct pollfd) * (newsize-current_array_size));
> +		current_array_size = newsize;
> +	}
> +
> +	fds[nr_fds].fd = fd;
> +	fds[nr_fds].events = events;
> +	ret = &fds[nr_fds];
> +	nr_fds++;
> +
> +	return ret;
> +fail:
> +	dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> +	return NULL;
> +}
> +
> +static void reset_fds(void)
> +{
> +	nr_fds = 0;
> +	memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> +}
> +
>  void handle_io(void)
>  {
> -	fd_set readfds, writefds;
>  	int ret;
>  
>  	if (log_hv) {
> @@ -959,21 +1005,17 @@ void handle_io(void)
>  
>  	for (;;) {
>  		struct domain *d, *n;
> -		int max_fd = -1;
> -		struct timeval timeout;
> +		int poll_timeout; /* timeout in milliseconds */
>  		struct timespec ts;
>  		long long now, next_timeout = 0;
>  
> -		FD_ZERO(&readfds);
> -		FD_ZERO(&writefds);
> +		reset_fds();
>  
> -		FD_SET(xs_fileno(xs), &readfds);
> -		max_fd = MAX(xs_fileno(xs), max_fd);
> +		xs_pollfd = set_fds(xs_fileno(xs), POLLIN|POLLPRI);
>  
> -		if (log_hv) {
> -			FD_SET(xc_evtchn_fd(xce_handle), &readfds);
> -			max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
> -		}
> +		if (log_hv)
> +			xce_pollfd = set_fds(xc_evtchn_fd(xce_handle),
> +					     POLLIN|POLLPRI);
>  
>  		if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
>  			return;
> @@ -982,10 +1024,12 @@ void handle_io(void)
>  		/* Re-calculate any event counter allowances & unblock
>  		   domains with new allowance */
>  		for (d = dom_head; d; d = d->next) {
> -			/* Add 5ms of fuzz since select() often returns
> -			   a couple of ms sooner than requested. Without
> -			   the fuzz we typically do an extra spin in select()
> -			   with a 1/2 ms timeout every other iteration */
> +			/* CS 16257:955ee4fa1345 introduces a 5ms fuzz
> +			 * for select(), it is not clear poll() has
> +			 * similar behavior (returning a couple of ms
> +			 * sooner than requested) as well. Just leave
> +			 * the fuzz here. Remove it with a separate
> +			 * patch if necessary */
>  			if ((now+5) > d->next_period) {
>  				d->next_period = now + RATE_LIMIT_PERIOD;
>  				if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> @@ -1006,75 +1050,101 @@ void handle_io(void)
>  				    !d->buffer.max_capacity ||
>  				    d->buffer.size < d->buffer.max_capacity) {
>  					int evtchn_fd = xc_evtchn_fd(d->xce_handle);
> -					FD_SET(evtchn_fd, &readfds);
> -					max_fd = MAX(evtchn_fd, max_fd);
> +					d->xce_pollfd = set_fds(evtchn_fd,
> +								POLLIN|POLLPRI);
>  				}
>  			}
>  
>  			if (d->master_fd != -1) {
> +				short events = 0;
>  				if (!d->is_dead && ring_free_bytes(d))
> -					FD_SET(d->master_fd, &readfds);
> +					events |= POLLIN;
>  
>  				if (!buffer_empty(&d->buffer))
> -					FD_SET(d->master_fd, &writefds);
> -				max_fd = MAX(d->master_fd, max_fd);
> +					events |= POLLOUT;
> +
> +				if (events)
> +					d->master_pollfd =
> +						set_fds(d->master_fd,
> +							events|POLLPRI);
>  			}
>  		}
>  
>  		/* If any domain has been rate limited, we need to work
> -		   out what timeout to supply to select */
> +		   out what timeout to supply to poll */
>  		if (next_timeout) {
>  			long long duration = (next_timeout - now);
>  			if (duration <= 0) /* sanity check */
>  				duration = 1;
> -			timeout.tv_sec = duration / 1000;
> -			timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
> -					   * 1000);
> +			poll_timeout = (int)duration;
>  		}
>  
> -		ret = select(max_fd + 1, &readfds, &writefds, 0,
> -			     next_timeout ? &timeout : NULL);
> +		ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
>  
>  		if (log_reload) {
>  			handle_log_reload();
>  			log_reload = 0;
>  		}
>  
> -		/* Abort if select failed, except for EINTR cases
> +		/* Abort if poll failed, except for EINTR cases
>  		   which indicate a possible log reload */
>  		if (ret == -1) {
>  			if (errno == EINTR)
>  				continue;
> -			dolog(LOG_ERR, "Failure in select: %d (%s)",
> +			dolog(LOG_ERR, "Failure in poll: %d (%s)",
>  			      errno, strerror(errno));
>  			break;
>  		}
>  
> -		if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
> -			handle_hv_logs();
> +		if (log_hv && xce_pollfd) {
> +			if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> +				dolog(LOG_ERR,
> +				      "Failure in poll xce_handle: %d (%s)",
> +				      errno, strerror(errno));
> +				break;
> +			} else if (xce_pollfd->revents & POLLIN)
> +				handle_hv_logs();
> +		}
>  
>  		if (ret <= 0)
>  			continue;
>  
> -		if (FD_ISSET(xs_fileno(xs), &readfds))
> -			handle_xs();
> +		if (xs_pollfd) {
> +			if (xs_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> +				dolog(LOG_ERR,
> +				      "Failure in poll xs_handle: %d (%s)",
> +				      errno, strerror(errno));
> +				break;
> +			} else if (xs_pollfd->revents & POLLIN)
> +				handle_xs();
> +		}
>  
>  		for (d = dom_head; d; d = n) {
>  			n = d->next;
>  			if (d->event_count < RATE_LIMIT_ALLOWANCE) {
>  				if (d->xce_handle != NULL &&
> -				    FD_ISSET(xc_evtchn_fd(d->xce_handle),
> -					     &readfds))
> -					handle_ring_read(d);
> +				    d->xce_pollfd &&
> +				    !(d->xce_pollfd->revents &
> +				      ~(POLLIN|POLLOUT|POLLPRI)) &&
> +				      (d->xce_pollfd->revents &
> +				       POLLIN))
> +				    handle_ring_read(d);
>  			}
>  
> -			if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> -							   &readfds))
> -				handle_tty_read(d);
> -
> -			if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> -							   &writefds))
> -				handle_tty_write(d);
> +			if (d->master_fd != -1 && d->master_pollfd) {
> +				if (d->master_pollfd->revents &
> +				    ~(POLLIN|POLLOUT|POLLPRI))
> +					domain_handle_broken_tty(d,
> +						   domain_is_valid(d->domid));
> +				else {
> +					if (d->master_pollfd->revents &
> +					    POLLIN)
> +						handle_tty_read(d);
> +					if (d->master_pollfd->revents &
> +					    POLLOUT)
> +						handle_tty_write(d);
> +				}
> +			}
>  
>  			if (d->last_seen != enum_pass)
>  				shutdown_domain(d);
> @@ -1084,6 +1154,9 @@ void handle_io(void)
>  		}
>  	}
>  
> +	free(fds);
> +	current_array_size = 0;
> +
>   out:
>  	if (log_hv_fd != -1) {
>  		close(log_hv_fd);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop
  2013-01-08 12:52 ` Wei Liu
@ 2013-01-08 14:01   ` Mats Petersson
  2013-01-08 14:07     ` Ian Campbell
  0 siblings, 1 reply; 4+ messages in thread
From: Mats Petersson @ 2013-01-08 14:01 UTC (permalink / raw)
  To: Wei Liu; +Cc: Ian Jackson, Ian Campbell, xen-devel@lists.xen.org

On 08/01/13 12:52, Wei Liu wrote:
> Just to be clear, this is version 5 of the patch.
>
> Git send-email mysteriously dropped my subject prefix.
>
>
> Wei.
>
> On Tue, 2013-01-08 at 11:50 +0000, Wei Liu wrote:
>> In Linux select() typically supports up to 1024 file descriptors. This can be
>> a problem when user tries to boot up many guests. Switching to poll() has
>> minimum impact on existing code and has better scalibility.
>>
>> pollfd array is dynamically allocated / reallocated. If the array fails to
>> expand, we just ignore the incoming fd.
>>
>> Change from V2:
>>    * remove unnecessary malloc in initialize_pollfd_arrays
>>    * use ROUND_UP to get new size of arrays
>>
>> Change from V3:
>>    * remove initialize and destroy function for array
>>    * embedded tracking structure in struct domain, eliminate fd_to_pollfd
>>
>> Change from V4:
>>    * make xs_pollfd local to io.c
>>    * add back the 5 ms fuzz
>>    * handle POLLERR and POLLHUP
>>    * treat POLLPRI as error if set in revents
>>    * abort if xenconsoled's own fds get screwed
>>    * handle broken tty if tty's fds get screwed
>>
>> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
>> ---
>>   tools/console/daemon/io.c |  189 +++++++++++++++++++++++++++++++--------------
>>   1 file changed, 131 insertions(+), 58 deletions(-)
>>
>> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
>> index 48fe151..8d16cac 100644
>> --- a/tools/console/daemon/io.c
>> +++ b/tools/console/daemon/io.c
>> @@ -28,7 +28,7 @@
>>   #include <stdlib.h>
>>   #include <errno.h>
>>   #include <string.h>
>> -#include <sys/select.h>
>> +#include <poll.h>
>>   #include <fcntl.h>
>>   #include <unistd.h>
>>   #include <termios.h>
>> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
>>   static evtchn_port_or_error_t log_hv_evtchn = -1;
>>   static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
>>   static xc_evtchn *xce_handle = NULL;
>> +static struct pollfd *xce_pollfd = NULL;
>>   
>>   struct buffer {
>>   	char *data;
>> @@ -81,7 +82,9 @@ struct buffer {
>>   struct domain {
>>   	int domid;
>>   	int master_fd;
>> +	struct pollfd *master_pollfd;
>>   	int slave_fd;
>> +	struct pollfd *slave_pollfd;
>>   	int log_fd;
>>   	bool is_dead;
>>   	unsigned last_seen;
>> @@ -92,6 +95,7 @@ struct domain {
>>   	evtchn_port_or_error_t local_port;
>>   	evtchn_port_or_error_t remote_port;
>>   	xc_evtchn *xce_handle;
>> +	struct pollfd *xce_pollfd;
>>   	struct xencons_interface *interface;
>>   	int event_count;
>>   	long long next_period;
>> @@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
>>   	return (sizeof(intf->in) - space);
>>   }
>>   
>> +static void domain_handle_broken_tty(struct domain *dom, int recreate)
>> +{
>> +	domain_close_tty(dom);
>> +
>> +	if (recreate) {
>> +		domain_create_tty(dom);
>> +	} else {
>> +		shutdown_domain(dom);
>> +	}
>> +}
>> +
>>   static void handle_tty_read(struct domain *dom)
>>   {
>>   	ssize_t len = 0;
>> @@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
>>   	 * keep the slave open for the duration.
>>   	 */
>>   	if (len < 0) {
>> -		domain_close_tty(dom);
>> -
>> -		if (domain_is_valid(dom->domid)) {
>> -			domain_create_tty(dom);
>> -		} else {
>> -			shutdown_domain(dom);
>> -		}
>> +		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>>   	} else if (domain_is_valid(dom->domid)) {
>>   		prod = intf->in_prod;
>>   		for (i = 0; i < len; i++) {
>> @@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
>>    	if (len < 1) {
>>   		dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
>>   		      dom->domid, len, errno);
>> -
>> -		domain_close_tty(dom);
>> -
>> -		if (domain_is_valid(dom->domid)) {
>> -			domain_create_tty(dom);
>> -		} else {
>> -			shutdown_domain(dom);
>> -		}
>> +		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>>   	} else {
>>   		buffer_advance(&dom->buffer, len);
>>   	}
>> @@ -928,9 +930,53 @@ static void handle_log_reload(void)
>>   	}
>>   }
>>   
>> +struct pollfd *xs_pollfd;
Why is this not "static"?
>> +static struct pollfd  *fds;
>> +static unsigned int current_array_size;
>> +static unsigned int nr_fds;
There seems to be no particular reason why these variables are here, and 
the other ones up the top of the file. For example xce_handle is not 
used "above" here, but it's declared at the top of the file. I think 
keeping all variables together makes more sense than scattering them around.

Otherwise, looks good to me.

--
Mats

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop
  2013-01-08 14:01   ` Mats Petersson
@ 2013-01-08 14:07     ` Ian Campbell
  0 siblings, 0 replies; 4+ messages in thread
From: Ian Campbell @ 2013-01-08 14:07 UTC (permalink / raw)
  To: Mats Petersson; +Cc: Ian Jackson, Wei Liu, xen-devel@lists.xen.org

On Tue, 2013-01-08 at 14:01 +0000, Mats Petersson wrote:
> >> +struct pollfd *xs_pollfd;
> Why is this not "static"?
> >> +static struct pollfd  *fds;
> >> +static unsigned int current_array_size;
> >> +static unsigned int nr_fds;
> There seems to be no particular reason why these variables are here, and 
> the other ones up the top of the file. For example xce_handle is not 
> used "above" here, but it's declared at the top of the file. I think 
> keeping all variables together makes more sense than scattering them around.

I'd prefer if those which can be were local to the handle_io function
and not global at all.  I think this includes both xs_pollfd and
xce_pollfd.

I think it also includes the existing xce_handle and log_hv_evtchn.

Ian.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-01-08 14:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-08 11:50 [PATCH] Switch from select() to poll() in xenconsoled's IO loop Wei Liu
2013-01-08 12:52 ` Wei Liu
2013-01-08 14:01   ` Mats Petersson
2013-01-08 14:07     ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.