From: Wei Liu <Wei.Liu2@citrix.com>
To: "xen-devel@lists.xen.org" <xen-devel@lists.xen.org>
Cc: wei.liu2@citrix.com
Subject: [PATCH V3] Switch from select() to poll() in xenconsoled's IO loop.
Date: Fri, 4 Jan 2013 17:17:39 +0000 [thread overview]
Message-ID: <1357319859.18503.41.camel@iceland> (raw)
In-Reply-To: <1357233257-13918-1-git-send-email-wei.liu2@citrix.com>
In Linux select() typically supports up to 1024 file descriptors. This can be
a problem when user tries to boot up many guests. Switching to poll() has
minimum impact on existing code and has better scalibility.
Tracking arrays are dynamically allocated / reallocated. If the tracking
arrays fail to expand, we just ignore the incoming fd.
Change from V2:
* remove unnecessary malloc in initialize_pollfd_arrays
* use ROUND_UP to get new size of arrays
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
tools/console/daemon/io.c | 147 +++++++++++++++++++++++++++++++++------------
1 file changed, 109 insertions(+), 38 deletions(-)
diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
index 48fe151..0c53d30 100644
--- a/tools/console/daemon/io.c
+++ b/tools/console/daemon/io.c
@@ -28,7 +28,7 @@
#include <stdlib.h>
#include <errno.h>
#include <string.h>
-#include <sys/select.h>
+#include <poll.h>
#include <fcntl.h>
#include <unistd.h>
#include <termios.h>
@@ -928,9 +928,87 @@ static void handle_log_reload(void)
}
}
+
+/* Should have at least max_fd + 1 elements */
+static struct pollfd *fds;
+static struct pollfd **fd_to_pollfd;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+static void initialize_pollfd_arrays(void)
+{
+ fds = NULL;
+ fd_to_pollfd = NULL;
+ current_array_size = 0;
+}
+
+static void destroy_pollfd_arrays(void)
+{
+ free(fds);
+ free(fd_to_pollfd);
+ current_array_size = 0;
+}
+
+#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
+static void set_fds(int fd, short events)
+{
+ if (current_array_size < fd+1) {
+ struct pollfd *p1 = NULL;
+ struct pollfd **p2 = NULL;
+ unsigned long newsize;
+
+ /* Round up to 2^8 boundary, in practice this just
+ * make newsize larger than current_array_size.
+ */
+ newsize = ROUNDUP(fd+1, 8);
+
+ p1 = realloc(fds, sizeof(struct pollfd)*newsize);
+ if (!p1)
+ goto fail;
+ fds = p1;
+
+ p2 = realloc(fd_to_pollfd, sizeof(struct pollfd *)*newsize);
+ if (!p2)
+ goto fail;
+ fd_to_pollfd = p2;
+
+ memset(&fds[0] + current_array_size, 0,
+ sizeof(struct pollfd) * (newsize-current_array_size));
+ memset(&fd_to_pollfd[0] + current_array_size, 0,
+ sizeof(struct pollfd *) * (newsize-current_array_size));
+ current_array_size = newsize;
+ }
+
+ fds[nr_fds].fd = fd;
+ fds[nr_fds].events = events;
+ fd_to_pollfd[fd] = &fds[nr_fds];
+ nr_fds++;
+
+ return;
+fail:
+ dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+ return;
+}
+
+static short fd_revents(int fd)
+{
+ if (fd >= current_array_size)
+ return 0;
+ if (fd_to_pollfd[fd] == NULL)
+ return 0;
+ return fd_to_pollfd[fd]->revents;
+}
+
+static void reset_fds(void)
+{
+ nr_fds = 0;
+ memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+ memset(fd_to_pollfd, 0,
+ sizeof(struct pollfd *) * current_array_size);
+}
+
void handle_io(void)
{
- fd_set readfds, writefds;
int ret;
if (log_hv) {
@@ -957,23 +1035,20 @@ void handle_io(void)
}
}
+ initialize_pollfd_arrays();
+
for (;;) {
struct domain *d, *n;
- int max_fd = -1;
- struct timeval timeout;
+ int poll_timeout; /* timeout in milliseconds */
struct timespec ts;
long long now, next_timeout = 0;
- FD_ZERO(&readfds);
- FD_ZERO(&writefds);
+ reset_fds();
- FD_SET(xs_fileno(xs), &readfds);
- max_fd = MAX(xs_fileno(xs), max_fd);
+ set_fds(xs_fileno(xs), POLLIN);
- if (log_hv) {
- FD_SET(xc_evtchn_fd(xce_handle), &readfds);
- max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
- }
+ if (log_hv)
+ set_fds(xc_evtchn_fd(xce_handle), POLLIN);
if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
return;
@@ -982,11 +1057,7 @@ void handle_io(void)
/* Re-calculate any event counter allowances & unblock
domains with new allowance */
for (d = dom_head; d; d = d->next) {
- /* Add 5ms of fuzz since select() often returns
- a couple of ms sooner than requested. Without
- the fuzz we typically do an extra spin in select()
- with a 1/2 ms timeout every other iteration */
- if ((now+5) > d->next_period) {
+ if (now > d->next_period) {
d->next_period = now + RATE_LIMIT_PERIOD;
if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
(void)xc_evtchn_unmask(d->xce_handle, d->local_port);
@@ -1006,74 +1077,73 @@ void handle_io(void)
!d->buffer.max_capacity ||
d->buffer.size < d->buffer.max_capacity) {
int evtchn_fd = xc_evtchn_fd(d->xce_handle);
- FD_SET(evtchn_fd, &readfds);
- max_fd = MAX(evtchn_fd, max_fd);
+ set_fds(evtchn_fd, POLLIN);
}
}
if (d->master_fd != -1) {
+ short events = 0;
if (!d->is_dead && ring_free_bytes(d))
- FD_SET(d->master_fd, &readfds);
+ events |= POLLIN;
if (!buffer_empty(&d->buffer))
- FD_SET(d->master_fd, &writefds);
- max_fd = MAX(d->master_fd, max_fd);
+ events |= POLLOUT;
+
+ if (events)
+ set_fds(d->master_fd, events);
}
}
/* If any domain has been rate limited, we need to work
- out what timeout to supply to select */
+ out what timeout to supply to poll */
if (next_timeout) {
long long duration = (next_timeout - now);
if (duration <= 0) /* sanity check */
duration = 1;
- timeout.tv_sec = duration / 1000;
- timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
- * 1000);
+ poll_timeout = (int)duration;
}
- ret = select(max_fd + 1, &readfds, &writefds, 0,
- next_timeout ? &timeout : NULL);
+ ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
if (log_reload) {
handle_log_reload();
log_reload = 0;
}
- /* Abort if select failed, except for EINTR cases
+ /* Abort if poll failed, except for EINTR cases
which indicate a possible log reload */
if (ret == -1) {
if (errno == EINTR)
continue;
- dolog(LOG_ERR, "Failure in select: %d (%s)",
+ dolog(LOG_ERR, "Failure in poll: %d (%s)",
errno, strerror(errno));
break;
}
- if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
+ if (log_hv && fd_revents(xc_evtchn_fd(xce_handle)) & POLLIN)
handle_hv_logs();
if (ret <= 0)
continue;
- if (FD_ISSET(xs_fileno(xs), &readfds))
+ if (fd_revents(xs_fileno(xs)) & POLLIN)
handle_xs();
for (d = dom_head; d; d = n) {
n = d->next;
if (d->event_count < RATE_LIMIT_ALLOWANCE) {
if (d->xce_handle != NULL &&
- FD_ISSET(xc_evtchn_fd(d->xce_handle),
- &readfds))
+ fd_revents(xc_evtchn_fd(d->xce_handle)) &
+ POLLIN)
handle_ring_read(d);
}
- if (d->master_fd != -1 && FD_ISSET(d->master_fd,
- &readfds))
+ if (d->master_fd != -1 &&
+ fd_revents(d->master_fd) & POLLIN)
handle_tty_read(d);
- if (d->master_fd != -1 && FD_ISSET(d->master_fd,
- &writefds))
+ if (d->master_fd != -1 &&
+ fd_revents(d->master_fd) & POLLOUT)
handle_tty_write(d);
if (d->last_seen != enum_pass)
@@ -1084,6 +1154,7 @@ void handle_io(void)
}
}
+ destroy_pollfd_arrays();
out:
if (log_hv_fd != -1) {
close(log_hv_fd);
--
1.7.10.4
next prev parent reply other threads:[~2013-01-04 17:17 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-03 17:14 [PATCH] Switch to poll in xenconsoled's io loop Wei Liu
2013-01-03 18:22 ` Mats Petersson
2013-01-04 12:30 ` Wei Liu
2013-01-04 15:58 ` [PATCH V2] Switch from select() to poll() in xenconsoled's IO loop Wei Liu
2013-01-04 16:08 ` Ian Campbell
2013-01-04 16:38 ` Wei Liu
2013-01-04 16:51 ` Mats Petersson
2013-01-04 17:17 ` Wei Liu [this message]
2013-01-07 10:20 ` [PATCH V3] " Ian Campbell
2013-01-07 12:12 ` Wei Liu
2013-01-07 12:16 ` Ian Campbell
2013-01-07 14:28 ` [PATCH V4] " Wei Liu
2013-01-07 14:39 ` Ian Campbell
2013-01-07 14:44 ` Wei Liu
2013-01-07 14:52 ` Ian Jackson
2013-01-07 14:41 ` Mats Petersson
2013-01-07 15:01 ` Wei Liu
2013-01-07 15:06 ` Mats Petersson
2013-01-07 15:17 ` Ian Campbell
2013-01-07 15:16 ` Ian Campbell
2013-01-07 15:24 ` Wei Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1357319859.18503.41.camel@iceland \
--to=wei.liu2@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).