From: Mats Petersson <mats.petersson@citrix.com>
To: xen-devel@lists.xen.org
Subject: Re: [PATCH V4] Switch from select() to poll() in xenconsoled's IO loop
Date: Mon, 7 Jan 2013 14:41:31 +0000 [thread overview]
Message-ID: <50EADE9B.5090109@citrix.com> (raw)
In-Reply-To: <1357568895.11865.1.camel@iceland>
On 07/01/13 14:28, Wei Liu wrote:
> In Linux select() typically supports up to 1024 file descriptors. This can be
> a problem when user tries to boot up many guests. Switching to poll() has
> minimum impact on existing code and has better scalibility.
>
> pollfd array is dynamically allocated / reallocated. If the array fails to
> expand, we just ignore the incoming fd.
>
> Change from V2:
> * remove unnecessary malloc in initialize_pollfd_arrays
> * use ROUND_UP to get new size of arrays
>
> Change from V3:
> * remove initialize and destroy function for array
> * embedded tracking structure in struct domain, eliminate fd_to_pollfd
>
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
> tools/console/daemon/io.c | 119 ++++++++++++++++++++++++++++--------------
> tools/console/daemon/utils.c | 2 +
> tools/console/daemon/utils.h | 2 +
> 3 files changed, 85 insertions(+), 38 deletions(-)
>
> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
> index 48fe151..80f7144 100644
> --- a/tools/console/daemon/io.c
> +++ b/tools/console/daemon/io.c
> @@ -28,7 +28,7 @@
> #include <stdlib.h>
> #include <errno.h>
> #include <string.h>
> -#include <sys/select.h>
> +#include <poll.h>
> #include <fcntl.h>
> #include <unistd.h>
> #include <termios.h>
> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
> static evtchn_port_or_error_t log_hv_evtchn = -1;
> static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
> static xc_evtchn *xce_handle = NULL;
> +static struct pollfd *xce_pollfd = NULL;
>
> struct buffer {
> char *data;
> @@ -81,7 +82,9 @@ struct buffer {
> struct domain {
> int domid;
> int master_fd;
> + struct pollfd *master_pollfd;
> int slave_fd;
> + struct pollfd *slave_pollfd;
> int log_fd;
> bool is_dead;
> unsigned last_seen;
> @@ -92,6 +95,7 @@ struct domain {
> evtchn_port_or_error_t local_port;
> evtchn_port_or_error_t remote_port;
> xc_evtchn *xce_handle;
> + struct pollfd *xce_pollfd;
> struct xencons_interface *interface;
> int event_count;
> long long next_period;
> @@ -928,9 +932,52 @@ static void handle_log_reload(void)
> }
> }
>
> +static struct pollfd *fds;
> +static unsigned int current_array_size;
> +static unsigned int nr_fds;
> +
> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
> +static struct pollfd *set_fds(int fd, short events)
> +{
> + struct pollfd *ret;
> + if (current_array_size < nr_fds + 1) {
> + struct pollfd *new_fds = NULL;
> + unsigned long newsize;
> +
> + /* Round up to 2^8 boundary, in practice this just
> + * make newsize larger than current_array_size.
> + */
> + newsize = ROUNDUP(nr_fds + 1, 8);
> +
> + new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> + if (!new_fds)
> + goto fail;
> + fds = new_fds;
> +
> + memset(&fds[0] + current_array_size, 0,
> + sizeof(struct pollfd) * (newsize-current_array_size));
> + current_array_size = newsize;
> + }
> +
> + fds[nr_fds].fd = fd;
> + fds[nr_fds].events = events;
> + ret = &fds[nr_fds];
> + nr_fds++;
> +
> + return ret;
> +fail:
> + dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> + return NULL;
> +}
> +
> +static void reset_fds(void)
> +{
> + nr_fds = 0;
> + memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> +}
> +
> void handle_io(void)
> {
> - fd_set readfds, writefds;
> int ret;
>
> if (log_hv) {
> @@ -959,21 +1006,16 @@ void handle_io(void)
>
> for (;;) {
> struct domain *d, *n;
> - int max_fd = -1;
> - struct timeval timeout;
> + int poll_timeout; /* timeout in milliseconds */
> struct timespec ts;
> long long now, next_timeout = 0;
>
> - FD_ZERO(&readfds);
> - FD_ZERO(&writefds);
> + reset_fds();
>
> - FD_SET(xs_fileno(xs), &readfds);
> - max_fd = MAX(xs_fileno(xs), max_fd);
> + xs_pollfd = set_fds(xs_fileno(xs), POLLIN);
>
> - if (log_hv) {
> - FD_SET(xc_evtchn_fd(xce_handle), &readfds);
> - max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
> - }
> + if (log_hv)
> + xce_pollfd = set_fds(xc_evtchn_fd(xce_handle), POLLIN);
>
> if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
> return;
> @@ -982,11 +1024,7 @@ void handle_io(void)
> /* Re-calculate any event counter allowances & unblock
> domains with new allowance */
> for (d = dom_head; d; d = d->next) {
> - /* Add 5ms of fuzz since select() often returns
> - a couple of ms sooner than requested. Without
> - the fuzz we typically do an extra spin in select()
> - with a 1/2 ms timeout every other iteration */
> - if ((now+5) > d->next_period) {
> + if (now > d->next_period) {
Is poll more accurate than select? I would have thought that they were
based on the same timing, and thus equally "fuzzy"?
> d->next_period = now + RATE_LIMIT_PERIOD;
> if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> (void)xc_evtchn_unmask(d->xce_handle, d->local_port);
> @@ -1006,74 +1044,76 @@ void handle_io(void)
> !d->buffer.max_capacity ||
> d->buffer.size < d->buffer.max_capacity) {
> int evtchn_fd = xc_evtchn_fd(d->xce_handle);
> - FD_SET(evtchn_fd, &readfds);
> - max_fd = MAX(evtchn_fd, max_fd);
> + d->xce_pollfd = set_fds(evtchn_fd, POLLIN);
> }
> }
>
> if (d->master_fd != -1) {
> + short events = 0;
> if (!d->is_dead && ring_free_bytes(d))
> - FD_SET(d->master_fd, &readfds);
> + events |= POLLIN;
>
> if (!buffer_empty(&d->buffer))
> - FD_SET(d->master_fd, &writefds);
> - max_fd = MAX(d->master_fd, max_fd);
> + events |= POLLOUT;
> +
> + if (events)
> + d->master_pollfd =
> + set_fds(d->master_fd, events);
> }
> }
>
> /* If any domain has been rate limited, we need to work
> - out what timeout to supply to select */
> + out what timeout to supply to poll */
> if (next_timeout) {
> long long duration = (next_timeout - now);
> if (duration <= 0) /* sanity check */
> duration = 1;
> - timeout.tv_sec = duration / 1000;
> - timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
> - * 1000);
> + poll_timeout = (int)duration;
> }
>
> - ret = select(max_fd + 1, &readfds, &writefds, 0,
> - next_timeout ? &timeout : NULL);
> + ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
>
> if (log_reload) {
> handle_log_reload();
> log_reload = 0;
> }
>
> - /* Abort if select failed, except for EINTR cases
> + /* Abort if poll failed, except for EINTR cases
> which indicate a possible log reload */
> if (ret == -1) {
> if (errno == EINTR)
> continue;
> - dolog(LOG_ERR, "Failure in select: %d (%s)",
> + dolog(LOG_ERR, "Failure in poll: %d (%s)",
> errno, strerror(errno));
> break;
> }
>
> - if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
> + if (log_hv && xce_pollfd && xce_pollfd->revents & POLLIN)
> handle_hv_logs();
>
> if (ret <= 0)
> continue;
>
> - if (FD_ISSET(xs_fileno(xs), &readfds))
> + if (xs_pollfd && xs_pollfd->revents & POLLIN)
> handle_xs();
>
> for (d = dom_head; d; d = n) {
> n = d->next;
> if (d->event_count < RATE_LIMIT_ALLOWANCE) {
> if (d->xce_handle != NULL &&
> - FD_ISSET(xc_evtchn_fd(d->xce_handle),
> - &readfds))
> + d->xce_pollfd != NULL &&
> + d->xce_pollfd->revents & POLLIN)
> handle_ring_read(d);
> }
>
> - if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> - &readfds))
> + if (d->master_fd != -1 &&
> + d->master_pollfd &&
> + d->master_pollfd->revents & POLLIN)
> handle_tty_read(d);
>
> - if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> - &writefds))
> + if (d->master_fd != -1 &&
> + d->master_pollfd &&
> + d->master_pollfd->revents & POLLOUT)
> handle_tty_write(d);
>
> if (d->last_seen != enum_pass)
> @@ -1084,6 +1124,9 @@ void handle_io(void)
> }
> }
>
> + free(fds);
> + current_array_size = 0;
> +
> out:
> if (log_hv_fd != -1) {
> close(log_hv_fd);
> diff --git a/tools/console/daemon/utils.c b/tools/console/daemon/utils.c
> index aab6f42..d0b1b00 100644
> --- a/tools/console/daemon/utils.c
> +++ b/tools/console/daemon/utils.c
> @@ -33,11 +33,13 @@
> #include <sys/un.h>
> #include <string.h>
> #include <signal.h>
> +#include <poll.h>
>
> #include "xenctrl.h"
> #include "utils.h"
>
> struct xs_handle *xs;
> +struct pollfd *xs_pollfd;
I don't see this used outside of io.c - am I missing something?
Adding more dependencies between different source files seems
unnecessary...
--
Mats
> xc_interface *xc;
>
> static void child_exit(int sig)
> diff --git a/tools/console/daemon/utils.h b/tools/console/daemon/utils.h
> index 8725dcd..8e72dab 100644
> --- a/tools/console/daemon/utils.h
> +++ b/tools/console/daemon/utils.h
> @@ -24,6 +24,7 @@
> #include <stdbool.h>
> #include <syslog.h>
> #include <stdio.h>
> +#include <poll.h>
> #include <xenctrl.h>
>
> #include <xenstore.h>
> @@ -32,6 +33,7 @@ void daemonize(const char *pidfile);
> bool xen_setup(void);
>
> extern struct xs_handle *xs;
> +extern struct pollfd *xs_pollfd;
> extern xc_interface *xc;
>
> #if 1
> --
> 1.7.10.4
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>
>
next prev parent reply other threads:[~2013-01-07 14:41 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-03 17:14 [PATCH] Switch to poll in xenconsoled's io loop Wei Liu
2013-01-03 18:22 ` Mats Petersson
2013-01-04 12:30 ` Wei Liu
2013-01-04 15:58 ` [PATCH V2] Switch from select() to poll() in xenconsoled's IO loop Wei Liu
2013-01-04 16:08 ` Ian Campbell
2013-01-04 16:38 ` Wei Liu
2013-01-04 16:51 ` Mats Petersson
2013-01-04 17:17 ` [PATCH V3] " Wei Liu
2013-01-07 10:20 ` Ian Campbell
2013-01-07 12:12 ` Wei Liu
2013-01-07 12:16 ` Ian Campbell
2013-01-07 14:28 ` [PATCH V4] " Wei Liu
2013-01-07 14:39 ` Ian Campbell
2013-01-07 14:44 ` Wei Liu
2013-01-07 14:52 ` Ian Jackson
2013-01-07 14:41 ` Mats Petersson [this message]
2013-01-07 15:01 ` Wei Liu
2013-01-07 15:06 ` Mats Petersson
2013-01-07 15:17 ` Ian Campbell
2013-01-07 15:16 ` Ian Campbell
2013-01-07 15:24 ` Wei Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=50EADE9B.5090109@citrix.com \
--to=mats.petersson@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.