xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Mats Petersson <mats.petersson@citrix.com>
To: xen-devel@lists.xen.org
Subject: Re: [PATCH V4] Switch from select() to poll() in xenconsoled's IO loop
Date: Mon, 7 Jan 2013 14:41:31 +0000	[thread overview]
Message-ID: <50EADE9B.5090109@citrix.com> (raw)
In-Reply-To: <1357568895.11865.1.camel@iceland>

On 07/01/13 14:28, Wei Liu wrote:
> In Linux select() typically supports up to 1024 file descriptors. This can be
> a problem when user tries to boot up many guests. Switching to poll() has
> minimum impact on existing code and has better scalibility.
>
> pollfd array is dynamically allocated / reallocated. If the array fails to
> expand, we just ignore the incoming fd.
>
> Change from V2:
>    * remove unnecessary malloc in initialize_pollfd_arrays
>    * use ROUND_UP to get new size of arrays
>
> Change from V3:
>    * remove initialize and destroy function for array
>    * embedded tracking structure in struct domain, eliminate fd_to_pollfd
>
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
>   tools/console/daemon/io.c    |  119 ++++++++++++++++++++++++++++--------------
>   tools/console/daemon/utils.c |    2 +
>   tools/console/daemon/utils.h |    2 +
>   3 files changed, 85 insertions(+), 38 deletions(-)
>
> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
> index 48fe151..80f7144 100644
> --- a/tools/console/daemon/io.c
> +++ b/tools/console/daemon/io.c
> @@ -28,7 +28,7 @@
>   #include <stdlib.h>
>   #include <errno.h>
>   #include <string.h>
> -#include <sys/select.h>
> +#include <poll.h>
>   #include <fcntl.h>
>   #include <unistd.h>
>   #include <termios.h>
> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
>   static evtchn_port_or_error_t log_hv_evtchn = -1;
>   static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
>   static xc_evtchn *xce_handle = NULL;
> +static struct pollfd *xce_pollfd = NULL;
>
>   struct buffer {
>          char *data;
> @@ -81,7 +82,9 @@ struct buffer {
>   struct domain {
>          int domid;
>          int master_fd;
> +       struct pollfd *master_pollfd;
>          int slave_fd;
> +       struct pollfd *slave_pollfd;
>          int log_fd;
>          bool is_dead;
>          unsigned last_seen;
> @@ -92,6 +95,7 @@ struct domain {
>          evtchn_port_or_error_t local_port;
>          evtchn_port_or_error_t remote_port;
>          xc_evtchn *xce_handle;
> +       struct pollfd *xce_pollfd;
>          struct xencons_interface *interface;
>          int event_count;
>          long long next_period;
> @@ -928,9 +932,52 @@ static void handle_log_reload(void)
>          }
>   }
>
> +static struct pollfd  *fds;
> +static unsigned int current_array_size;
> +static unsigned int nr_fds;
> +
> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
> +static struct pollfd *set_fds(int fd, short events)
> +{
> +       struct pollfd *ret;
> +       if (current_array_size < nr_fds + 1) {
> +               struct pollfd  *new_fds = NULL;
> +               unsigned long newsize;
> +
> +               /* Round up to 2^8 boundary, in practice this just
> +                * make newsize larger than current_array_size.
> +                */
> +               newsize = ROUNDUP(nr_fds + 1, 8);
> +
> +               new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> +               if (!new_fds)
> +                       goto fail;
> +               fds = new_fds;
> +
> +               memset(&fds[0] + current_array_size, 0,
> +                      sizeof(struct pollfd) * (newsize-current_array_size));
> +               current_array_size = newsize;
> +       }
> +
> +       fds[nr_fds].fd = fd;
> +       fds[nr_fds].events = events;
> +       ret = &fds[nr_fds];
> +       nr_fds++;
> +
> +       return ret;
> +fail:
> +       dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> +       return NULL;
> +}
> +
> +static void reset_fds(void)
> +{
> +       nr_fds = 0;
> +       memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> +}
> +
>   void handle_io(void)
>   {
> -       fd_set readfds, writefds;
>          int ret;
>
>          if (log_hv) {
> @@ -959,21 +1006,16 @@ void handle_io(void)
>
>          for (;;) {
>                  struct domain *d, *n;
> -               int max_fd = -1;
> -               struct timeval timeout;
> +               int poll_timeout; /* timeout in milliseconds */
>                  struct timespec ts;
>                  long long now, next_timeout = 0;
>
> -               FD_ZERO(&readfds);
> -               FD_ZERO(&writefds);
> +               reset_fds();
>
> -               FD_SET(xs_fileno(xs), &readfds);
> -               max_fd = MAX(xs_fileno(xs), max_fd);
> +               xs_pollfd = set_fds(xs_fileno(xs), POLLIN);
>
> -               if (log_hv) {
> -                       FD_SET(xc_evtchn_fd(xce_handle), &readfds);
> -                       max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
> -               }
> +               if (log_hv)
> +                       xce_pollfd = set_fds(xc_evtchn_fd(xce_handle), POLLIN);
>
>                  if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
>                          return;
> @@ -982,11 +1024,7 @@ void handle_io(void)
>                  /* Re-calculate any event counter allowances & unblock
>                     domains with new allowance */
>                  for (d = dom_head; d; d = d->next) {
> -                       /* Add 5ms of fuzz since select() often returns
> -                          a couple of ms sooner than requested. Without
> -                          the fuzz we typically do an extra spin in select()
> -                          with a 1/2 ms timeout every other iteration */
> -                       if ((now+5) > d->next_period) {
> +                       if (now > d->next_period) {
Is poll more accurate than select? I would have thought that they were 
based on the same timing, and thus equally "fuzzy"?
>                                  d->next_period = now + RATE_LIMIT_PERIOD;
>                                  if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
>                                          (void)xc_evtchn_unmask(d->xce_handle, d->local_port);
> @@ -1006,74 +1044,76 @@ void handle_io(void)
>                                      !d->buffer.max_capacity ||
>                                      d->buffer.size < d->buffer.max_capacity) {
>                                          int evtchn_fd = xc_evtchn_fd(d->xce_handle);
> -                                       FD_SET(evtchn_fd, &readfds);
> -                                       max_fd = MAX(evtchn_fd, max_fd);
> +                                       d->xce_pollfd = set_fds(evtchn_fd, POLLIN);
>                                  }
>                          }
>
>                          if (d->master_fd != -1) {
> +                               short events = 0;
>                                  if (!d->is_dead && ring_free_bytes(d))
> -                                       FD_SET(d->master_fd, &readfds);
> +                                       events |= POLLIN;
>
>                                  if (!buffer_empty(&d->buffer))
> -                                       FD_SET(d->master_fd, &writefds);
> -                               max_fd = MAX(d->master_fd, max_fd);
> +                                       events |= POLLOUT;
> +
> +                               if (events)
> +                                       d->master_pollfd =
> +                                               set_fds(d->master_fd, events);
>                          }
>                  }
>
>                  /* If any domain has been rate limited, we need to work
> -                  out what timeout to supply to select */
> +                  out what timeout to supply to poll */
>                  if (next_timeout) {
>                          long long duration = (next_timeout - now);
>                          if (duration <= 0) /* sanity check */
>                                  duration = 1;
> -                       timeout.tv_sec = duration / 1000;
> -                       timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
> -                                          * 1000);
> +                       poll_timeout = (int)duration;
>                  }
>
> -               ret = select(max_fd + 1, &readfds, &writefds, 0,
> -                            next_timeout ? &timeout : NULL);
> +               ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
>
>                  if (log_reload) {
>                          handle_log_reload();
>                          log_reload = 0;
>                  }
>
> -               /* Abort if select failed, except for EINTR cases
> +               /* Abort if poll failed, except for EINTR cases
>                     which indicate a possible log reload */
>                  if (ret == -1) {
>                          if (errno == EINTR)
>                                  continue;
> -                       dolog(LOG_ERR, "Failure in select: %d (%s)",
> +                       dolog(LOG_ERR, "Failure in poll: %d (%s)",
>                                errno, strerror(errno));
>                          break;
>                  }
>
> -               if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
> +               if (log_hv && xce_pollfd && xce_pollfd->revents & POLLIN)
>                          handle_hv_logs();
>
>                  if (ret <= 0)
>                          continue;
>
> -               if (FD_ISSET(xs_fileno(xs), &readfds))
> +               if (xs_pollfd && xs_pollfd->revents & POLLIN)
>                          handle_xs();
>
>                  for (d = dom_head; d; d = n) {
>                          n = d->next;
>                          if (d->event_count < RATE_LIMIT_ALLOWANCE) {
>                                  if (d->xce_handle != NULL &&
> -                                   FD_ISSET(xc_evtchn_fd(d->xce_handle),
> -                                            &readfds))
> +                                   d->xce_pollfd != NULL &&
> +                                   d->xce_pollfd->revents & POLLIN)
>                                          handle_ring_read(d);
>                          }
>
> -                       if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> -                                                          &readfds))
> +                       if (d->master_fd != -1 &&
> +                           d->master_pollfd &&
> +                           d->master_pollfd->revents & POLLIN)
>                                  handle_tty_read(d);
>
> -                       if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> -                                                          &writefds))
> +                       if (d->master_fd != -1 &&
> +                           d->master_pollfd &&
> +                           d->master_pollfd->revents & POLLOUT)
>                                  handle_tty_write(d);
>
>                          if (d->last_seen != enum_pass)
> @@ -1084,6 +1124,9 @@ void handle_io(void)
>                  }
>          }
>
> +       free(fds);
> +       current_array_size = 0;
> +
>    out:
>          if (log_hv_fd != -1) {
>                  close(log_hv_fd);
> diff --git a/tools/console/daemon/utils.c b/tools/console/daemon/utils.c
> index aab6f42..d0b1b00 100644
> --- a/tools/console/daemon/utils.c
> +++ b/tools/console/daemon/utils.c
> @@ -33,11 +33,13 @@
>   #include <sys/un.h>
>   #include <string.h>
>   #include <signal.h>
> +#include <poll.h>
>
>   #include "xenctrl.h"
>   #include "utils.h"
>
>   struct xs_handle *xs;
> +struct pollfd *xs_pollfd;
I don't see this used outside of io.c  - am I missing something?

Adding more dependencies between different source files seems 
unnecessary...

--
Mats
>   xc_interface *xc;
>
>   static void child_exit(int sig)
> diff --git a/tools/console/daemon/utils.h b/tools/console/daemon/utils.h
> index 8725dcd..8e72dab 100644
> --- a/tools/console/daemon/utils.h
> +++ b/tools/console/daemon/utils.h
> @@ -24,6 +24,7 @@
>   #include <stdbool.h>
>   #include <syslog.h>
>   #include <stdio.h>
> +#include <poll.h>
>   #include <xenctrl.h>
>
>   #include <xenstore.h>
> @@ -32,6 +33,7 @@ void daemonize(const char *pidfile);
>   bool xen_setup(void);
>
>   extern struct xs_handle *xs;
> +extern struct pollfd *xs_pollfd;
>   extern xc_interface *xc;
>
>   #if 1
> --
> 1.7.10.4
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
>
>

  parent reply	other threads:[~2013-01-07 14:41 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-03 17:14 [PATCH] Switch to poll in xenconsoled's io loop Wei Liu
2013-01-03 18:22 ` Mats Petersson
2013-01-04 12:30   ` Wei Liu
2013-01-04 15:58 ` [PATCH V2] Switch from select() to poll() in xenconsoled's IO loop Wei Liu
2013-01-04 16:08   ` Ian Campbell
2013-01-04 16:38     ` Wei Liu
2013-01-04 16:51       ` Mats Petersson
2013-01-04 17:17 ` [PATCH V3] " Wei Liu
2013-01-07 10:20   ` Ian Campbell
2013-01-07 12:12     ` Wei Liu
2013-01-07 12:16       ` Ian Campbell
2013-01-07 14:28 ` [PATCH V4] " Wei Liu
2013-01-07 14:39   ` Ian Campbell
2013-01-07 14:44     ` Wei Liu
2013-01-07 14:52     ` Ian Jackson
2013-01-07 14:41   ` Mats Petersson [this message]
2013-01-07 15:01     ` Wei Liu
2013-01-07 15:06       ` Mats Petersson
2013-01-07 15:17         ` Ian Campbell
2013-01-07 15:16       ` Ian Campbell
2013-01-07 15:24         ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50EADE9B.5090109@citrix.com \
    --to=mats.petersson@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).