All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wei Liu <Wei.Liu2@citrix.com>
To: "xen-devel@lists.xen.org" <xen-devel@lists.xen.org>
Cc: Ian Jackson <Ian.Jackson@eu.citrix.com>,
	wei.liu2@citrix.com, Ian Campbell <Ian.Campbell@citrix.com>,
	Mats Petersson <mats.petersson@citrix.com>
Subject: Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop
Date: Tue, 8 Jan 2013 12:52:43 +0000	[thread overview]
Message-ID: <1357649563.13581.19.camel@iceland> (raw)
In-Reply-To: <1357645812-25072-1-git-send-email-wei.liu2@citrix.com>

Just to be clear, this is version 5 of the patch.

Git send-email mysteriously dropped my subject prefix.


Wei. 

On Tue, 2013-01-08 at 11:50 +0000, Wei Liu wrote:
> In Linux select() typically supports up to 1024 file descriptors. This can be
> a problem when user tries to boot up many guests. Switching to poll() has
> minimum impact on existing code and has better scalibility.
> 
> pollfd array is dynamically allocated / reallocated. If the array fails to
> expand, we just ignore the incoming fd.
> 
> Change from V2:
>   * remove unnecessary malloc in initialize_pollfd_arrays
>   * use ROUND_UP to get new size of arrays
> 
> Change from V3:
>   * remove initialize and destroy function for array
>   * embedded tracking structure in struct domain, eliminate fd_to_pollfd
> 
> Change from V4:
>   * make xs_pollfd local to io.c
>   * add back the 5 ms fuzz
>   * handle POLLERR and POLLHUP
>   * treat POLLPRI as error if set in revents
>   * abort if xenconsoled's own fds get screwed
>   * handle broken tty if tty's fds get screwed
> 
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>  tools/console/daemon/io.c |  189 +++++++++++++++++++++++++++++++--------------
>  1 file changed, 131 insertions(+), 58 deletions(-)
> 
> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
> index 48fe151..8d16cac 100644
> --- a/tools/console/daemon/io.c
> +++ b/tools/console/daemon/io.c
> @@ -28,7 +28,7 @@
>  #include <stdlib.h>
>  #include <errno.h>
>  #include <string.h>
> -#include <sys/select.h>
> +#include <poll.h>
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <termios.h>
> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
>  static evtchn_port_or_error_t log_hv_evtchn = -1;
>  static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
>  static xc_evtchn *xce_handle = NULL;
> +static struct pollfd *xce_pollfd = NULL;
>  
>  struct buffer {
>  	char *data;
> @@ -81,7 +82,9 @@ struct buffer {
>  struct domain {
>  	int domid;
>  	int master_fd;
> +	struct pollfd *master_pollfd;
>  	int slave_fd;
> +	struct pollfd *slave_pollfd;
>  	int log_fd;
>  	bool is_dead;
>  	unsigned last_seen;
> @@ -92,6 +95,7 @@ struct domain {
>  	evtchn_port_or_error_t local_port;
>  	evtchn_port_or_error_t remote_port;
>  	xc_evtchn *xce_handle;
> +	struct pollfd *xce_pollfd;
>  	struct xencons_interface *interface;
>  	int event_count;
>  	long long next_period;
> @@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
>  	return (sizeof(intf->in) - space);
>  }
>  
> +static void domain_handle_broken_tty(struct domain *dom, int recreate)
> +{
> +	domain_close_tty(dom);
> +
> +	if (recreate) {
> +		domain_create_tty(dom);
> +	} else {
> +		shutdown_domain(dom);
> +	}
> +}
> +
>  static void handle_tty_read(struct domain *dom)
>  {
>  	ssize_t len = 0;
> @@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
>  	 * keep the slave open for the duration.
>  	 */
>  	if (len < 0) {
> -		domain_close_tty(dom);
> -
> -		if (domain_is_valid(dom->domid)) {
> -			domain_create_tty(dom);
> -		} else {
> -			shutdown_domain(dom);
> -		}
> +		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>  	} else if (domain_is_valid(dom->domid)) {
>  		prod = intf->in_prod;
>  		for (i = 0; i < len; i++) {
> @@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
>   	if (len < 1) {
>  		dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
>  		      dom->domid, len, errno);
> -
> -		domain_close_tty(dom);
> -
> -		if (domain_is_valid(dom->domid)) {
> -			domain_create_tty(dom);
> -		} else {
> -			shutdown_domain(dom);
> -		}
> +		domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>  	} else {
>  		buffer_advance(&dom->buffer, len);
>  	}
> @@ -928,9 +930,53 @@ static void handle_log_reload(void)
>  	}
>  }
>  
> +struct pollfd *xs_pollfd;
> +static struct pollfd  *fds;
> +static unsigned int current_array_size;
> +static unsigned int nr_fds;
> +
> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
> +static struct pollfd *set_fds(int fd, short events)
> +{
> +	struct pollfd *ret;
> +	if (current_array_size < nr_fds + 1) {
> +		struct pollfd  *new_fds = NULL;
> +		unsigned long newsize;
> +
> +		/* Round up to 2^8 boundary, in practice this just
> +		 * make newsize larger than current_array_size.
> +		 */
> +		newsize = ROUNDUP(nr_fds + 1, 8);
> +
> +		new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> +		if (!new_fds)
> +			goto fail;
> +		fds = new_fds;
> +
> +		memset(&fds[0] + current_array_size, 0,
> +		       sizeof(struct pollfd) * (newsize-current_array_size));
> +		current_array_size = newsize;
> +	}
> +
> +	fds[nr_fds].fd = fd;
> +	fds[nr_fds].events = events;
> +	ret = &fds[nr_fds];
> +	nr_fds++;
> +
> +	return ret;
> +fail:
> +	dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> +	return NULL;
> +}
> +
> +static void reset_fds(void)
> +{
> +	nr_fds = 0;
> +	memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> +}
> +
>  void handle_io(void)
>  {
> -	fd_set readfds, writefds;
>  	int ret;
>  
>  	if (log_hv) {
> @@ -959,21 +1005,17 @@ void handle_io(void)
>  
>  	for (;;) {
>  		struct domain *d, *n;
> -		int max_fd = -1;
> -		struct timeval timeout;
> +		int poll_timeout; /* timeout in milliseconds */
>  		struct timespec ts;
>  		long long now, next_timeout = 0;
>  
> -		FD_ZERO(&readfds);
> -		FD_ZERO(&writefds);
> +		reset_fds();
>  
> -		FD_SET(xs_fileno(xs), &readfds);
> -		max_fd = MAX(xs_fileno(xs), max_fd);
> +		xs_pollfd = set_fds(xs_fileno(xs), POLLIN|POLLPRI);
>  
> -		if (log_hv) {
> -			FD_SET(xc_evtchn_fd(xce_handle), &readfds);
> -			max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
> -		}
> +		if (log_hv)
> +			xce_pollfd = set_fds(xc_evtchn_fd(xce_handle),
> +					     POLLIN|POLLPRI);
>  
>  		if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
>  			return;
> @@ -982,10 +1024,12 @@ void handle_io(void)
>  		/* Re-calculate any event counter allowances & unblock
>  		   domains with new allowance */
>  		for (d = dom_head; d; d = d->next) {
> -			/* Add 5ms of fuzz since select() often returns
> -			   a couple of ms sooner than requested. Without
> -			   the fuzz we typically do an extra spin in select()
> -			   with a 1/2 ms timeout every other iteration */
> +			/* CS 16257:955ee4fa1345 introduces a 5ms fuzz
> +			 * for select(), it is not clear poll() has
> +			 * similar behavior (returning a couple of ms
> +			 * sooner than requested) as well. Just leave
> +			 * the fuzz here. Remove it with a separate
> +			 * patch if necessary */
>  			if ((now+5) > d->next_period) {
>  				d->next_period = now + RATE_LIMIT_PERIOD;
>  				if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> @@ -1006,75 +1050,101 @@ void handle_io(void)
>  				    !d->buffer.max_capacity ||
>  				    d->buffer.size < d->buffer.max_capacity) {
>  					int evtchn_fd = xc_evtchn_fd(d->xce_handle);
> -					FD_SET(evtchn_fd, &readfds);
> -					max_fd = MAX(evtchn_fd, max_fd);
> +					d->xce_pollfd = set_fds(evtchn_fd,
> +								POLLIN|POLLPRI);
>  				}
>  			}
>  
>  			if (d->master_fd != -1) {
> +				short events = 0;
>  				if (!d->is_dead && ring_free_bytes(d))
> -					FD_SET(d->master_fd, &readfds);
> +					events |= POLLIN;
>  
>  				if (!buffer_empty(&d->buffer))
> -					FD_SET(d->master_fd, &writefds);
> -				max_fd = MAX(d->master_fd, max_fd);
> +					events |= POLLOUT;
> +
> +				if (events)
> +					d->master_pollfd =
> +						set_fds(d->master_fd,
> +							events|POLLPRI);
>  			}
>  		}
>  
>  		/* If any domain has been rate limited, we need to work
> -		   out what timeout to supply to select */
> +		   out what timeout to supply to poll */
>  		if (next_timeout) {
>  			long long duration = (next_timeout - now);
>  			if (duration <= 0) /* sanity check */
>  				duration = 1;
> -			timeout.tv_sec = duration / 1000;
> -			timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
> -					   * 1000);
> +			poll_timeout = (int)duration;
>  		}
>  
> -		ret = select(max_fd + 1, &readfds, &writefds, 0,
> -			     next_timeout ? &timeout : NULL);
> +		ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
>  
>  		if (log_reload) {
>  			handle_log_reload();
>  			log_reload = 0;
>  		}
>  
> -		/* Abort if select failed, except for EINTR cases
> +		/* Abort if poll failed, except for EINTR cases
>  		   which indicate a possible log reload */
>  		if (ret == -1) {
>  			if (errno == EINTR)
>  				continue;
> -			dolog(LOG_ERR, "Failure in select: %d (%s)",
> +			dolog(LOG_ERR, "Failure in poll: %d (%s)",
>  			      errno, strerror(errno));
>  			break;
>  		}
>  
> -		if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
> -			handle_hv_logs();
> +		if (log_hv && xce_pollfd) {
> +			if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> +				dolog(LOG_ERR,
> +				      "Failure in poll xce_handle: %d (%s)",
> +				      errno, strerror(errno));
> +				break;
> +			} else if (xce_pollfd->revents & POLLIN)
> +				handle_hv_logs();
> +		}
>  
>  		if (ret <= 0)
>  			continue;
>  
> -		if (FD_ISSET(xs_fileno(xs), &readfds))
> -			handle_xs();
> +		if (xs_pollfd) {
> +			if (xs_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> +				dolog(LOG_ERR,
> +				      "Failure in poll xs_handle: %d (%s)",
> +				      errno, strerror(errno));
> +				break;
> +			} else if (xs_pollfd->revents & POLLIN)
> +				handle_xs();
> +		}
>  
>  		for (d = dom_head; d; d = n) {
>  			n = d->next;
>  			if (d->event_count < RATE_LIMIT_ALLOWANCE) {
>  				if (d->xce_handle != NULL &&
> -				    FD_ISSET(xc_evtchn_fd(d->xce_handle),
> -					     &readfds))
> -					handle_ring_read(d);
> +				    d->xce_pollfd &&
> +				    !(d->xce_pollfd->revents &
> +				      ~(POLLIN|POLLOUT|POLLPRI)) &&
> +				      (d->xce_pollfd->revents &
> +				       POLLIN))
> +				    handle_ring_read(d);
>  			}
>  
> -			if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> -							   &readfds))
> -				handle_tty_read(d);
> -
> -			if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> -							   &writefds))
> -				handle_tty_write(d);
> +			if (d->master_fd != -1 && d->master_pollfd) {
> +				if (d->master_pollfd->revents &
> +				    ~(POLLIN|POLLOUT|POLLPRI))
> +					domain_handle_broken_tty(d,
> +						   domain_is_valid(d->domid));
> +				else {
> +					if (d->master_pollfd->revents &
> +					    POLLIN)
> +						handle_tty_read(d);
> +					if (d->master_pollfd->revents &
> +					    POLLOUT)
> +						handle_tty_write(d);
> +				}
> +			}
>  
>  			if (d->last_seen != enum_pass)
>  				shutdown_domain(d);
> @@ -1084,6 +1154,9 @@ void handle_io(void)
>  		}
>  	}
>  
> +	free(fds);
> +	current_array_size = 0;
> +
>   out:
>  	if (log_hv_fd != -1) {
>  		close(log_hv_fd);

  reply	other threads:[~2013-01-08 12:52 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-08 11:50 [PATCH] Switch from select() to poll() in xenconsoled's IO loop Wei Liu
2013-01-08 12:52 ` Wei Liu [this message]
2013-01-08 14:01   ` Mats Petersson
2013-01-08 14:07     ` Ian Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1357649563.13581.19.camel@iceland \
    --to=wei.liu2@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=mats.petersson@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.