From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mats Petersson Subject: Re: [PATCH V4] Switch from select() to poll() in xenconsoled's IO loop Date: Mon, 7 Jan 2013 14:41:31 +0000 Message-ID: <50EADE9B.5090109@citrix.com> References: <1357233257-13918-1-git-send-email-wei.liu2@citrix.com> <1357568895.11865.1.camel@iceland> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1357568895.11865.1.camel@iceland> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On 07/01/13 14:28, Wei Liu wrote: > In Linux select() typically supports up to 1024 file descriptors. This can be > a problem when user tries to boot up many guests. Switching to poll() has > minimum impact on existing code and has better scalibility. > > pollfd array is dynamically allocated / reallocated. If the array fails to > expand, we just ignore the incoming fd. > > Change from V2: > * remove unnecessary malloc in initialize_pollfd_arrays > * use ROUND_UP to get new size of arrays > > Change from V3: > * remove initialize and destroy function for array > * embedded tracking structure in struct domain, eliminate fd_to_pollfd > > Signed-off-by: Wei Liu > --- > tools/console/daemon/io.c | 119 ++++++++++++++++++++++++++++-------------- > tools/console/daemon/utils.c | 2 + > tools/console/daemon/utils.h | 2 + > 3 files changed, 85 insertions(+), 38 deletions(-) > > diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c > index 48fe151..80f7144 100644 > --- a/tools/console/daemon/io.c > +++ b/tools/console/daemon/io.c > @@ -28,7 +28,7 @@ > #include > #include > #include > -#include > +#include > #include > #include > #include > @@ -69,6 +69,7 @@ static int log_hv_fd = -1; > static evtchn_port_or_error_t log_hv_evtchn = -1; > static xc_interface *xch; /* why does xenconsoled have two xc handles ? */ > static xc_evtchn *xce_handle = NULL; > +static struct pollfd *xce_pollfd = NULL; > > struct buffer { > char *data; > @@ -81,7 +82,9 @@ struct buffer { > struct domain { > int domid; > int master_fd; > + struct pollfd *master_pollfd; > int slave_fd; > + struct pollfd *slave_pollfd; > int log_fd; > bool is_dead; > unsigned last_seen; > @@ -92,6 +95,7 @@ struct domain { > evtchn_port_or_error_t local_port; > evtchn_port_or_error_t remote_port; > xc_evtchn *xce_handle; > + struct pollfd *xce_pollfd; > struct xencons_interface *interface; > int event_count; > long long next_period; > @@ -928,9 +932,52 @@ static void handle_log_reload(void) > } > } > > +static struct pollfd *fds; > +static unsigned int current_array_size; > +static unsigned int nr_fds; > + > +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) > +static struct pollfd *set_fds(int fd, short events) > +{ > + struct pollfd *ret; > + if (current_array_size < nr_fds + 1) { > + struct pollfd *new_fds = NULL; > + unsigned long newsize; > + > + /* Round up to 2^8 boundary, in practice this just > + * make newsize larger than current_array_size. > + */ > + newsize = ROUNDUP(nr_fds + 1, 8); > + > + new_fds = realloc(fds, sizeof(struct pollfd)*newsize); > + if (!new_fds) > + goto fail; > + fds = new_fds; > + > + memset(&fds[0] + current_array_size, 0, > + sizeof(struct pollfd) * (newsize-current_array_size)); > + current_array_size = newsize; > + } > + > + fds[nr_fds].fd = fd; > + fds[nr_fds].events = events; > + ret = &fds[nr_fds]; > + nr_fds++; > + > + return ret; > +fail: > + dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd); > + return NULL; > +} > + > +static void reset_fds(void) > +{ > + nr_fds = 0; > + memset(fds, 0, sizeof(struct pollfd) * current_array_size); > +} > + > void handle_io(void) > { > - fd_set readfds, writefds; > int ret; > > if (log_hv) { > @@ -959,21 +1006,16 @@ void handle_io(void) > > for (;;) { > struct domain *d, *n; > - int max_fd = -1; > - struct timeval timeout; > + int poll_timeout; /* timeout in milliseconds */ > struct timespec ts; > long long now, next_timeout = 0; > > - FD_ZERO(&readfds); > - FD_ZERO(&writefds); > + reset_fds(); > > - FD_SET(xs_fileno(xs), &readfds); > - max_fd = MAX(xs_fileno(xs), max_fd); > + xs_pollfd = set_fds(xs_fileno(xs), POLLIN); > > - if (log_hv) { > - FD_SET(xc_evtchn_fd(xce_handle), &readfds); > - max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd); > - } > + if (log_hv) > + xce_pollfd = set_fds(xc_evtchn_fd(xce_handle), POLLIN); > > if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) > return; > @@ -982,11 +1024,7 @@ void handle_io(void) > /* Re-calculate any event counter allowances & unblock > domains with new allowance */ > for (d = dom_head; d; d = d->next) { > - /* Add 5ms of fuzz since select() often returns > - a couple of ms sooner than requested. Without > - the fuzz we typically do an extra spin in select() > - with a 1/2 ms timeout every other iteration */ > - if ((now+5) > d->next_period) { > + if (now > d->next_period) { Is poll more accurate than select? I would have thought that they were based on the same timing, and thus equally "fuzzy"? > d->next_period = now + RATE_LIMIT_PERIOD; > if (d->event_count >= RATE_LIMIT_ALLOWANCE) { > (void)xc_evtchn_unmask(d->xce_handle, d->local_port); > @@ -1006,74 +1044,76 @@ void handle_io(void) > !d->buffer.max_capacity || > d->buffer.size < d->buffer.max_capacity) { > int evtchn_fd = xc_evtchn_fd(d->xce_handle); > - FD_SET(evtchn_fd, &readfds); > - max_fd = MAX(evtchn_fd, max_fd); > + d->xce_pollfd = set_fds(evtchn_fd, POLLIN); > } > } > > if (d->master_fd != -1) { > + short events = 0; > if (!d->is_dead && ring_free_bytes(d)) > - FD_SET(d->master_fd, &readfds); > + events |= POLLIN; > > if (!buffer_empty(&d->buffer)) > - FD_SET(d->master_fd, &writefds); > - max_fd = MAX(d->master_fd, max_fd); > + events |= POLLOUT; > + > + if (events) > + d->master_pollfd = > + set_fds(d->master_fd, events); > } > } > > /* If any domain has been rate limited, we need to work > - out what timeout to supply to select */ > + out what timeout to supply to poll */ > if (next_timeout) { > long long duration = (next_timeout - now); > if (duration <= 0) /* sanity check */ > duration = 1; > - timeout.tv_sec = duration / 1000; > - timeout.tv_usec = ((duration - (timeout.tv_sec * 1000)) > - * 1000); > + poll_timeout = (int)duration; > } > > - ret = select(max_fd + 1, &readfds, &writefds, 0, > - next_timeout ? &timeout : NULL); > + ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1); > > if (log_reload) { > handle_log_reload(); > log_reload = 0; > } > > - /* Abort if select failed, except for EINTR cases > + /* Abort if poll failed, except for EINTR cases > which indicate a possible log reload */ > if (ret == -1) { > if (errno == EINTR) > continue; > - dolog(LOG_ERR, "Failure in select: %d (%s)", > + dolog(LOG_ERR, "Failure in poll: %d (%s)", > errno, strerror(errno)); > break; > } > > - if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds)) > + if (log_hv && xce_pollfd && xce_pollfd->revents & POLLIN) > handle_hv_logs(); > > if (ret <= 0) > continue; > > - if (FD_ISSET(xs_fileno(xs), &readfds)) > + if (xs_pollfd && xs_pollfd->revents & POLLIN) > handle_xs(); > > for (d = dom_head; d; d = n) { > n = d->next; > if (d->event_count < RATE_LIMIT_ALLOWANCE) { > if (d->xce_handle != NULL && > - FD_ISSET(xc_evtchn_fd(d->xce_handle), > - &readfds)) > + d->xce_pollfd != NULL && > + d->xce_pollfd->revents & POLLIN) > handle_ring_read(d); > } > > - if (d->master_fd != -1 && FD_ISSET(d->master_fd, > - &readfds)) > + if (d->master_fd != -1 && > + d->master_pollfd && > + d->master_pollfd->revents & POLLIN) > handle_tty_read(d); > > - if (d->master_fd != -1 && FD_ISSET(d->master_fd, > - &writefds)) > + if (d->master_fd != -1 && > + d->master_pollfd && > + d->master_pollfd->revents & POLLOUT) > handle_tty_write(d); > > if (d->last_seen != enum_pass) > @@ -1084,6 +1124,9 @@ void handle_io(void) > } > } > > + free(fds); > + current_array_size = 0; > + > out: > if (log_hv_fd != -1) { > close(log_hv_fd); > diff --git a/tools/console/daemon/utils.c b/tools/console/daemon/utils.c > index aab6f42..d0b1b00 100644 > --- a/tools/console/daemon/utils.c > +++ b/tools/console/daemon/utils.c > @@ -33,11 +33,13 @@ > #include > #include > #include > +#include > > #include "xenctrl.h" > #include "utils.h" > > struct xs_handle *xs; > +struct pollfd *xs_pollfd; I don't see this used outside of io.c - am I missing something? Adding more dependencies between different source files seems unnecessary... -- Mats > xc_interface *xc; > > static void child_exit(int sig) > diff --git a/tools/console/daemon/utils.h b/tools/console/daemon/utils.h > index 8725dcd..8e72dab 100644 > --- a/tools/console/daemon/utils.h > +++ b/tools/console/daemon/utils.h > @@ -24,6 +24,7 @@ > #include > #include > #include > +#include > #include > > #include > @@ -32,6 +33,7 @@ void daemonize(const char *pidfile); > bool xen_setup(void); > > extern struct xs_handle *xs; > +extern struct pollfd *xs_pollfd; > extern xc_interface *xc; > > #if 1 > -- > 1.7.10.4 > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel > >