* [PATCH 0/2] switch to poll() in cxenstored's IO loop
@ 2013-02-20 11:05 Wei Liu
2013-02-20 11:05 ` [PATCH 1/2] mini-os: implement poll(2) Wei Liu
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Wei Liu @ 2013-02-20 11:05 UTC (permalink / raw)
To: xen-devel
This patch series contains:
* poll(2) implementation for mini-os
* switch to poll() in cxenstored's IO loop
The second patch alone breaks xenstore-stubdom build because mini-os does not
have poll(2) originally.
I've tested this patch series with simple xenstore-stubdom setup. The
xenstore-stubdom can send / receive events without any problems.
Wei.
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH 1/2] mini-os: implement poll(2) 2013-02-20 11:05 [PATCH 0/2] switch to poll() in cxenstored's IO loop Wei Liu @ 2013-02-20 11:05 ` Wei Liu 2013-02-20 11:05 ` [PATCH 2/2] Switch to poll() in cxenstored's IO loop Wei Liu 2013-03-04 11:15 ` [PATCH 0/2] switch " Wei Liu 2 siblings, 0 replies; 8+ messages in thread From: Wei Liu @ 2013-02-20 11:05 UTC (permalink / raw) To: xen-devel; +Cc: Wei Liu It is just a wrapper around select(2). This implementation mimics Linux's do_poll. Signed-off-by: Wei Liu <wei.liu2@citrix.com> Acked-by: Samuel Thibault <samuel.thibault@ens-lyon.org> --- extras/mini-os/include/posix/poll.h | 1 + extras/mini-os/lib/sys.c | 117 ++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 extras/mini-os/include/posix/poll.h diff --git a/extras/mini-os/include/posix/poll.h b/extras/mini-os/include/posix/poll.h new file mode 100644 index 0000000..06fb41a --- /dev/null +++ b/extras/mini-os/include/posix/poll.h @@ -0,0 +1 @@ +#include <sys/poll.h> diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c index 3cc3340..cfbdc90 100644 --- a/extras/mini-os/lib/sys.c +++ b/extras/mini-os/lib/sys.c @@ -31,6 +31,7 @@ #include <tpm_tis.h> #include <xenbus.h> #include <xenstore.h> +#include <poll.h> #include <sys/types.h> #include <sys/unistd.h> @@ -678,6 +679,29 @@ static void dump_set(int nfds, fd_set *readfds, fd_set *writefds, fd_set *except #define dump_set(nfds, readfds, writefds, exceptfds, timeout) #endif +#ifdef LIBC_DEBUG +static void dump_pollfds(struct pollfd *pfd, int nfds, int timeout) +{ + int i, comma, fd; + + printk("["); + comma = 0; + for (i = 0; i < nfds; i++) { + fd = pfd[i].fd; + if (comma) + printk(", "); + printk("%d(%c)/%02x", fd, file_types[files[fd].type], + pfd[i].events); + comma = 1; + } + printk("]"); + + printk(", %d, %d", nfds, timeout); +} +#else +#define dump_pollfds(pfds, nfds, timeout) +#endif + /* Just poll without blocking */ static int select_poll(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds) { @@ -983,6 +1007,98 @@ out: return ret; } +/* Wrap around select */ +int poll(struct pollfd _pfd[], nfds_t _nfds, int _timeout) +{ + int n, ret; + int i, fd; + struct timeval _timeo, *timeo = NULL; + fd_set rfds, wfds, efds; + int max_fd = -1; + + DEBUG("poll("); + dump_pollfds(_pfd, _nfds, _timeout); + DEBUG(")\n"); + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&efds); + + n = 0; + + for (i = 0; i < _nfds; i++) { + fd = _pfd[i].fd; + _pfd[i].revents = 0; + + /* fd < 0, revents = 0, which is already set */ + if (fd < 0) continue; + + /* fd is invalid, revents = POLLNVAL, increment counter */ + if (fd >= NOFILE || files[fd].type == FTYPE_NONE) { + n++; + _pfd[i].revents |= POLLNVAL; + continue; + } + + /* normal case, map POLL* into readfds and writefds: + * POLLIN -> readfds + * POLLOUT -> writefds + * POLL* -> none + */ + if (_pfd[i].events & POLLIN) + FD_SET(fd, &rfds); + if (_pfd[i].events & POLLOUT) + FD_SET(fd, &wfds); + /* always set exceptfds */ + FD_SET(fd, &efds); + if (fd > max_fd) + max_fd = fd; + } + + /* should never sleep when we already have events */ + if (n) { + _timeo.tv_sec = 0; + _timeo.tv_usec = 0; + timeo = &_timeo; + } else if (_timeout >= 0) { + /* normal case, construct _timeout, might sleep */ + _timeo.tv_sec = _timeout / 1000; + _timeo.tv_usec = (_timeout % 1000) * 1000; + timeo = &_timeo; + } else { + /* _timeout < 0, block forever */ + timeo = NULL; + } + + + ret = select(max_fd+1, &rfds, &wfds, &efds, timeo); + /* error in select, just return, errno is set by select() */ + if (ret < 0) + return ret; + + for (i = 0; i < _nfds; i++) { + fd = _pfd[i].fd; + + /* the revents has already been set for all error case */ + if (fd < 0 || fd >= NOFILE || files[fd].type == FTYPE_NONE) + continue; + + if (FD_ISSET(fd, &rfds) || FD_ISSET(fd, &wfds) || FD_ISSET(fd, &efds)) + n++; + if (FD_ISSET(fd, &efds)) { + /* anything bad happens we set POLLERR */ + _pfd[i].revents |= POLLERR; + continue; + } + if (FD_ISSET(fd, &rfds)) + _pfd[i].revents |= POLLIN; + if (FD_ISSET(fd, &wfds)) + _pfd[i].revents |= POLLOUT; + } + + return n; +} + #ifdef HAVE_LWIP int socket(int domain, int type, int protocol) { @@ -1360,7 +1476,6 @@ unsupported_function(int, tcgetattr, 0); unsupported_function(int, grantpt, -1); unsupported_function(int, unlockpt, -1); unsupported_function(char *, ptsname, NULL); -unsupported_function(int, poll, -1); /* net/if.h */ unsupported_function_log(unsigned int, if_nametoindex, -1); -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] Switch to poll() in cxenstored's IO loop 2013-02-20 11:05 [PATCH 0/2] switch to poll() in cxenstored's IO loop Wei Liu 2013-02-20 11:05 ` [PATCH 1/2] mini-os: implement poll(2) Wei Liu @ 2013-02-20 11:05 ` Wei Liu 2013-03-12 15:27 ` Ian Campbell 2013-03-04 11:15 ` [PATCH 0/2] switch " Wei Liu 2 siblings, 1 reply; 8+ messages in thread From: Wei Liu @ 2013-02-20 11:05 UTC (permalink / raw) To: xen-devel; +Cc: Wei Liu Poll() can support more file descriptors than select(). We've done this for xenconsoled, now do this for cxenstored as well. The code is taken from xenconsoled and modified to adapt to cxenstored. Updated: * reopen pipe if polling on reopen_log_pipe fails. * exit if polling on some critical fds fails. * clean up POLL*. Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- tools/xenstore/xenstored_core.c | 171 ++++++++++++++++++++++++++------------- tools/xenstore/xenstored_core.h | 2 + 2 files changed, 116 insertions(+), 57 deletions(-) diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index bd44645..839e51d 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -19,7 +19,7 @@ #include <sys/types.h> #include <sys/stat.h> -#include <sys/select.h> +#include <poll.h> #ifndef NO_SOCKETS #include <sys/socket.h> #include <sys/un.h> @@ -55,6 +55,12 @@ #include "hashtable.h" extern xc_evtchn *xce_handle; /* in xenstored_domain.c */ +static struct pollfd *xce_pollfd; +static struct pollfd *fds; +static unsigned int current_array_size; +static unsigned int nr_fds; + +#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) static bool verbose = false; LIST_HEAD(connections); @@ -62,6 +68,7 @@ static int tracefd = -1; static bool recovery = true; static bool remove_local = true; static int reopen_log_pipe[2]; +static struct pollfd *reopen_log_pipe0_pollfd; static char *tracefile = NULL; static TDB_CONTEXT *tdb_ctx = NULL; @@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type) /** * Signal handler for SIGHUP, which requests that the trace log is reopened * (in the main loop). A single byte is written to reopen_log_pipe, to awaken - * the select() in the main loop. + * the poll() in the main loop. */ static void trigger_reopen_log(int signal __attribute__((unused))) { @@ -279,15 +286,12 @@ static int destroy_conn(void *_conn) /* Flush outgoing if possible, but don't block. */ if (!conn->domain) { - fd_set set; - struct timeval none; - - FD_ZERO(&set); - FD_SET(conn->fd, &set); - none.tv_sec = none.tv_usec = 0; + struct pollfd pfd; + pfd.fd = conn->fd; + pfd.events = POLLOUT; while (!list_empty(&conn->out_list) - && select(conn->fd+1, NULL, &set, NULL, &none) == 1) + && poll(&pfd, 1, 0) == 1) if (!write_messages(conn)) break; close(conn->fd); @@ -300,52 +304,74 @@ static int destroy_conn(void *_conn) } -static void set_fd(int fd, fd_set *set, int *max) +static struct pollfd *set_fd(int fd, short events) { - if (fd < 0) - return; - FD_SET(fd, set); - if (fd > *max) - *max = fd; -} + struct pollfd *ret; + if (current_array_size < nr_fds + 1) { + struct pollfd *new_fds = NULL; + unsigned long newsize; + + /* Round up to 2^8 boundary, in practice this just + * make newsize larger than current_array_size. + */ + newsize = ROUNDUP(nr_fds + 1, 8); + new_fds = realloc(fds, sizeof(struct pollfd)*newsize); + if (!new_fds) + goto fail; + fds = new_fds; -static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock, - struct timeval **ptimeout) + memset(&fds[0] + current_array_size, 0, + sizeof(struct pollfd ) * (newsize-current_array_size)); + current_array_size = newsize; + } + + fds[nr_fds].fd = fd; + fds[nr_fds].events = events; + ret = &fds[nr_fds]; + nr_fds++; + + return ret; +fail: + syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd); + return NULL; +} + +static void initialize_fds(int sock, struct pollfd **p_sock_pollfd, + int ro_sock, struct pollfd **p_ro_sock_pollfd, + int *ptimeout) { - static struct timeval zero_timeout = { 0 }; struct connection *conn; - int max = -1; - *ptimeout = NULL; + memset(fds, 0, sizeof(struct pollfd) * current_array_size); + nr_fds = 0; - FD_ZERO(inset); - FD_ZERO(outset); + *ptimeout = -1; if (sock != -1) - set_fd(sock, inset, &max); + *p_sock_pollfd = set_fd(sock, POLLIN|POLLPRI); if (ro_sock != -1) - set_fd(ro_sock, inset, &max); + *p_ro_sock_pollfd = set_fd(ro_sock, POLLIN|POLLPRI); if (reopen_log_pipe[0] != -1) - set_fd(reopen_log_pipe[0], inset, &max); + reopen_log_pipe0_pollfd = + set_fd(reopen_log_pipe[0], POLLIN|POLLPRI); if (xce_handle != NULL) - set_fd(xc_evtchn_fd(xce_handle), inset, &max); + xce_pollfd = set_fd(xc_evtchn_fd(xce_handle), POLLIN|POLLPRI); list_for_each_entry(conn, &connections, list) { if (conn->domain) { if (domain_can_read(conn) || (domain_can_write(conn) && !list_empty(&conn->out_list))) - *ptimeout = &zero_timeout; + *ptimeout = 0; } else { - set_fd(conn->fd, inset, &max); + short events = POLLIN|POLLPRI; if (!list_empty(&conn->out_list)) - FD_SET(conn->fd, outset); + events |= POLLOUT; + conn->pollfd = set_fd(conn->fd, events); } } - - return max; } /* Is child a subnode of parent, or equal? */ @@ -1770,14 +1796,13 @@ int priv_domid = 0; int main(int argc, char *argv[]) { - int opt, *sock, *ro_sock, max; - fd_set inset, outset; + int opt, *sock, *ro_sock; + struct pollfd *sock_pollfd = NULL, *ro_sock_pollfd = NULL; bool dofork = true; bool outputpid = false; bool no_domain_init = false; const char *pidfile = NULL; - int evtchn_fd = -1; - struct timeval *timeout; + int timeout; while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options, NULL)) != -1) { @@ -1880,11 +1905,9 @@ int main(int argc, char *argv[]) signal(SIGHUP, trigger_reopen_log); - if (xce_handle != NULL) - evtchn_fd = xc_evtchn_fd(xce_handle); - /* Get ready to listen to the tools. */ - max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout); + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd, + &timeout); /* Tell the kernel we're up and running. */ xenbus_notify_running(); @@ -1893,27 +1916,55 @@ int main(int argc, char *argv[]) for (;;) { struct connection *conn, *next; - if (select(max+1, &inset, &outset, NULL, timeout) < 0) { + if (poll(fds, nr_fds, timeout) < 0) { if (errno == EINTR) continue; - barf_perror("Select failed"); + barf_perror("Poll failed"); } - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) { - char c; - if (read(reopen_log_pipe[0], &c, 1) != 1) - barf_perror("read failed"); - reopen_log(); + if (reopen_log_pipe0_pollfd) { + if (reopen_log_pipe0_pollfd->revents & ~POLLIN) { + close(reopen_log_pipe[0]); + close(reopen_log_pipe[1]); + init_pipe(reopen_log_pipe); + } else if (reopen_log_pipe0_pollfd->revents & POLLIN) { + char c; + if (read(reopen_log_pipe[0], &c, 1) != 1) + barf_perror("read failed"); + reopen_log(); + } + reopen_log_pipe0_pollfd = NULL; } - if (*sock != -1 && FD_ISSET(*sock, &inset)) - accept_connection(*sock, true); + if (sock_pollfd) { + if (sock_pollfd->revents & ~POLLIN) { + barf_perror("sock poll failed"); + break; + } else if (sock_pollfd->revents & POLLIN) { + accept_connection(*sock, true); + sock_pollfd = NULL; + } + } - if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset)) - accept_connection(*ro_sock, false); + if (ro_sock_pollfd) { + if (ro_sock_pollfd->revents & ~POLLIN) { + barf_perror("ro sock poll failed"); + break; + } else if (ro_sock_pollfd->revents & POLLIN) { + accept_connection(*ro_sock, false); + ro_sock_pollfd = NULL; + } + } - if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset)) - handle_event(); + if (xce_pollfd) { + if (xce_pollfd->revents & ~POLLIN) { + barf_perror("xce_handle poll failed"); + break; + } else if (xce_pollfd->revents & POLLIN) { + handle_event(); + xce_pollfd = NULL; + } + } next = list_entry(connections.next, typeof(*conn), list); if (&next->list != &connections) @@ -1939,21 +1990,27 @@ int main(int argc, char *argv[]) if (talloc_free(conn) == 0) continue; } else { - if (FD_ISSET(conn->fd, &inset)) + if (conn->pollfd && + !(conn->pollfd->revents & ~(POLLIN|POLLOUT)) && + (conn->pollfd->revents & POLLIN)) handle_input(conn); if (talloc_free(conn) == 0) continue; talloc_increase_ref_count(conn); - if (FD_ISSET(conn->fd, &outset)) + if (conn->pollfd && + !(conn->pollfd->revents & ~(POLLIN|POLLOUT)) && + (conn->pollfd->revents & POLLOUT)) handle_output(conn); if (talloc_free(conn) == 0) continue; + + conn->pollfd = NULL; } } - max = initialize_set(&inset, &outset, *sock, *ro_sock, - &timeout); + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd, + &timeout); } } diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h index 492ca0d..f330a87 100644 --- a/tools/xenstore/xenstored_core.h +++ b/tools/xenstore/xenstored_core.h @@ -60,6 +60,8 @@ struct connection /* The file descriptor we came in on. */ int fd; + /* The pollfd corresponding to fd. */ + struct pollfd *pollfd; /* Who am I? 0 for socket connections. */ unsigned int id; -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] Switch to poll() in cxenstored's IO loop 2013-02-20 11:05 ` [PATCH 2/2] Switch to poll() in cxenstored's IO loop Wei Liu @ 2013-03-12 15:27 ` Ian Campbell 2013-03-12 15:34 ` Wei Liu 0 siblings, 1 reply; 8+ messages in thread From: Ian Campbell @ 2013-03-12 15:27 UTC (permalink / raw) To: Wei Liu; +Cc: xen-devel@lists.xen.org On Wed, 2013-02-20 at 11:05 +0000, Wei Liu wrote: > Poll() can support more file descriptors than select(). We've done this for > xenconsoled, now do this for cxenstored as well. Have you checked that the issue(s) Andrew Cooper found in the consoled patch haven't been carried over? Ian. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] Switch to poll() in cxenstored's IO loop 2013-03-12 15:27 ` Ian Campbell @ 2013-03-12 15:34 ` Wei Liu 2013-03-12 15:37 ` Ian Campbell 0 siblings, 1 reply; 8+ messages in thread From: Wei Liu @ 2013-03-12 15:34 UTC (permalink / raw) To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org On Tue, 2013-03-12 at 15:27 +0000, Ian Campbell wrote: > On Wed, 2013-02-20 at 11:05 +0000, Wei Liu wrote: > > Poll() can support more file descriptors than select(). We've done this for > > xenconsoled, now do this for cxenstored as well. > > Have you checked that the issue(s) Andrew Cooper found in the consoled > patch haven't been carried over? > Yes I have checked, this patch is OK because it zero out all fields when allocating structure. But please do not apply this patch, I need to tuning the event handling logic because I found out if a fd goes wrong it could make the event loop active all the time, consuming 100% cpu. Wei. > Ian. > > ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] Switch to poll() in cxenstored's IO loop 2013-03-12 15:34 ` Wei Liu @ 2013-03-12 15:37 ` Ian Campbell 0 siblings, 0 replies; 8+ messages in thread From: Ian Campbell @ 2013-03-12 15:37 UTC (permalink / raw) To: Wei Liu; +Cc: xen-devel@lists.xen.org On Tue, 2013-03-12 at 15:34 +0000, Wei Liu wrote: > On Tue, 2013-03-12 at 15:27 +0000, Ian Campbell wrote: > > On Wed, 2013-02-20 at 11:05 +0000, Wei Liu wrote: > > > Poll() can support more file descriptors than select(). We've done this for > > > xenconsoled, now do this for cxenstored as well. > > > > Have you checked that the issue(s) Andrew Cooper found in the consoled > > patch haven't been carried over? > > > > Yes I have checked, this patch is OK because it zero out all fields when > allocating structure. > > But please do not apply this patch, I need to tuning the event handling > logic because I found out if a fd goes wrong it could make the event > loop active all the time, consuming 100% cpu. OK, I'll wait! Ian. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 0/2] switch to poll() in cxenstored's IO loop 2013-02-20 11:05 [PATCH 0/2] switch to poll() in cxenstored's IO loop Wei Liu 2013-02-20 11:05 ` [PATCH 1/2] mini-os: implement poll(2) Wei Liu 2013-02-20 11:05 ` [PATCH 2/2] Switch to poll() in cxenstored's IO loop Wei Liu @ 2013-03-04 11:15 ` Wei Liu 2 siblings, 0 replies; 8+ messages in thread From: Wei Liu @ 2013-03-04 11:15 UTC (permalink / raw) To: xen-devel@lists.xen.org On Wed, Feb 20, 2013 at 11:05:07AM +0000, Wei Liu wrote: > This patch series contains: > * poll(2) implementation for mini-os > * switch to poll() in cxenstored's IO loop > > The second patch alone breaks xenstore-stubdom build because mini-os does not > have poll(2) originally. > > I've tested this patch series with simple xenstore-stubdom setup. The > xenstore-stubdom can send / receive events without any problems. > Any comments on this? Wei. > > Wei. > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 0/2 V3] Switch to poll() in cxenstored's IO loop @ 2013-03-25 11:17 Wei Liu 2013-03-25 11:17 ` [PATCH 2/2] " Wei Liu 0 siblings, 1 reply; 8+ messages in thread From: Wei Liu @ 2013-03-25 11:17 UTC (permalink / raw) To: xen-devel; +Cc: ian.jackson This is the third version of this patch series. All issues discovered in the switching of xenconsoled have been fixed. ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/2] Switch to poll() in cxenstored's IO loop 2013-03-25 11:17 [PATCH 0/2 V3] Switch " Wei Liu @ 2013-03-25 11:17 ` Wei Liu 0 siblings, 0 replies; 8+ messages in thread From: Wei Liu @ 2013-03-25 11:17 UTC (permalink / raw) To: xen-devel; +Cc: Wei Liu, ian.jackson Poll() can support more file descriptors than select(). We've done this for xenconsoled, now do this for cxenstored as well. The code is taken from xenconsoled and modified to adapt to cxenstored. Note that poll() semantic is a bit different from select(). In Linux, if a fd is set in IN/OUT fd_set and error occurs inside select(), this fd is still considered readable / writable, and it is set in the returned IN/OUT fd_set. So in later handle_input / handle_output, the connection will eventually be talloc_free'ed(). After switching to poll(), we should take care of any error right away, making the code clearer. Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- tools/xenstore/xenstored_core.c | 191 +++++++++++++++++++++++++++------------ tools/xenstore/xenstored_core.h | 2 + 2 files changed, 133 insertions(+), 60 deletions(-) diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index bd44645..a2cf2a6 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -19,7 +19,7 @@ #include <sys/types.h> #include <sys/stat.h> -#include <sys/select.h> +#include <poll.h> #ifndef NO_SOCKETS #include <sys/socket.h> #include <sys/un.h> @@ -55,6 +55,12 @@ #include "hashtable.h" extern xc_evtchn *xce_handle; /* in xenstored_domain.c */ +static int xce_pollfd_idx = -1; +static struct pollfd *fds; +static unsigned int current_array_size; +static unsigned int nr_fds; + +#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1)) static bool verbose = false; LIST_HEAD(connections); @@ -62,6 +68,7 @@ static int tracefd = -1; static bool recovery = true; static bool remove_local = true; static int reopen_log_pipe[2]; +static int reopen_log_pipe0_pollfd_idx = -1; static char *tracefile = NULL; static TDB_CONTEXT *tdb_ctx = NULL; @@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type) /** * Signal handler for SIGHUP, which requests that the trace log is reopened * (in the main loop). A single byte is written to reopen_log_pipe, to awaken - * the select() in the main loop. + * the poll() in the main loop. */ static void trigger_reopen_log(int signal __attribute__((unused))) { @@ -279,15 +286,12 @@ static int destroy_conn(void *_conn) /* Flush outgoing if possible, but don't block. */ if (!conn->domain) { - fd_set set; - struct timeval none; - - FD_ZERO(&set); - FD_SET(conn->fd, &set); - none.tv_sec = none.tv_usec = 0; + struct pollfd pfd; + pfd.fd = conn->fd; + pfd.events = POLLOUT; while (!list_empty(&conn->out_list) - && select(conn->fd+1, NULL, &set, NULL, &none) == 1) + && poll(&pfd, 1, 0) == 1) if (!write_messages(conn)) break; close(conn->fd); @@ -299,53 +303,77 @@ static int destroy_conn(void *_conn) return 0; } - -static void set_fd(int fd, fd_set *set, int *max) +/* This function returns index inside the array if succeed, -1 if fail */ +static int set_fd(int fd, short events) { - if (fd < 0) - return; - FD_SET(fd, set); - if (fd > *max) - *max = fd; -} + int ret; + if (current_array_size < nr_fds + 1) { + struct pollfd *new_fds = NULL; + unsigned long newsize; + + /* Round up to 2^8 boundary, in practice this just + * make newsize larger than current_array_size. + */ + newsize = ROUNDUP(nr_fds + 1, 8); + + new_fds = realloc(fds, sizeof(struct pollfd)*newsize); + if (!new_fds) + goto fail; + fds = new_fds; + + memset(&fds[0] + current_array_size, 0, + sizeof(struct pollfd ) * (newsize-current_array_size)); + current_array_size = newsize; + } + fds[nr_fds].fd = fd; + fds[nr_fds].events = events; + ret = nr_fds; + nr_fds++; -static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock, - struct timeval **ptimeout) + return ret; +fail: + syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd); + return -1; +} + +static void initialize_fds(int sock, int *p_sock_pollfd_idx, + int ro_sock, int *p_ro_sock_pollfd_idx, + int *ptimeout) { - static struct timeval zero_timeout = { 0 }; struct connection *conn; - int max = -1; - *ptimeout = NULL; + if (fds) + memset(fds, 0, sizeof(struct pollfd) * current_array_size); + nr_fds = 0; - FD_ZERO(inset); - FD_ZERO(outset); + *ptimeout = -1; if (sock != -1) - set_fd(sock, inset, &max); + *p_sock_pollfd_idx = set_fd(sock, POLLIN|POLLPRI); if (ro_sock != -1) - set_fd(ro_sock, inset, &max); + *p_ro_sock_pollfd_idx = set_fd(ro_sock, POLLIN|POLLPRI); if (reopen_log_pipe[0] != -1) - set_fd(reopen_log_pipe[0], inset, &max); + reopen_log_pipe0_pollfd_idx = + set_fd(reopen_log_pipe[0], POLLIN|POLLPRI); if (xce_handle != NULL) - set_fd(xc_evtchn_fd(xce_handle), inset, &max); + xce_pollfd_idx = set_fd(xc_evtchn_fd(xce_handle), + POLLIN|POLLPRI); list_for_each_entry(conn, &connections, list) { if (conn->domain) { if (domain_can_read(conn) || (domain_can_write(conn) && !list_empty(&conn->out_list))) - *ptimeout = &zero_timeout; + *ptimeout = 0; } else { - set_fd(conn->fd, inset, &max); + short events = POLLIN|POLLPRI; if (!list_empty(&conn->out_list)) - FD_SET(conn->fd, outset); + events |= POLLOUT; + conn->pollfd_idx = set_fd(conn->fd, events); } } - - return max; } /* Is child a subnode of parent, or equal? */ @@ -1330,6 +1358,7 @@ struct connection *new_connection(connwritefn_t *write, connreadfn_t *read) return NULL; new->fd = -1; + new->pollfd_idx = -1; new->write = write; new->read = read; new->can_write = true; @@ -1770,14 +1799,13 @@ int priv_domid = 0; int main(int argc, char *argv[]) { - int opt, *sock, *ro_sock, max; - fd_set inset, outset; + int opt, *sock, *ro_sock; + int sock_pollfd_idx = -1, ro_sock_pollfd_idx = -1; bool dofork = true; bool outputpid = false; bool no_domain_init = false; const char *pidfile = NULL; - int evtchn_fd = -1; - struct timeval *timeout; + int timeout; while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options, NULL)) != -1) { @@ -1880,11 +1908,9 @@ int main(int argc, char *argv[]) signal(SIGHUP, trigger_reopen_log); - if (xce_handle != NULL) - evtchn_fd = xc_evtchn_fd(xce_handle); - /* Get ready to listen to the tools. */ - max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout); + initialize_fds(*sock, &sock_pollfd_idx, *ro_sock, &ro_sock_pollfd_idx, + &timeout); /* Tell the kernel we're up and running. */ xenbus_notify_running(); @@ -1893,27 +1919,57 @@ int main(int argc, char *argv[]) for (;;) { struct connection *conn, *next; - if (select(max+1, &inset, &outset, NULL, timeout) < 0) { + if (poll(fds, nr_fds, timeout) < 0) { if (errno == EINTR) continue; - barf_perror("Select failed"); + barf_perror("Poll failed"); } - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) { - char c; - if (read(reopen_log_pipe[0], &c, 1) != 1) - barf_perror("read failed"); - reopen_log(); + if (reopen_log_pipe0_pollfd_idx != -1) { + if (fds[reopen_log_pipe0_pollfd_idx].revents + & ~POLLIN) { + close(reopen_log_pipe[0]); + close(reopen_log_pipe[1]); + init_pipe(reopen_log_pipe); + } else if (fds[reopen_log_pipe0_pollfd_idx].revents + & POLLIN) { + char c; + if (read(reopen_log_pipe[0], &c, 1) != 1) + barf_perror("read failed"); + reopen_log(); + } + reopen_log_pipe0_pollfd_idx = -1; } - if (*sock != -1 && FD_ISSET(*sock, &inset)) - accept_connection(*sock, true); + if (sock_pollfd_idx != -1) { + if (fds[sock_pollfd_idx].revents & ~POLLIN) { + barf_perror("sock poll failed"); + break; + } else if (fds[sock_pollfd_idx].revents & POLLIN) { + accept_connection(*sock, true); + sock_pollfd_idx = -1; + } + } - if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset)) - accept_connection(*ro_sock, false); + if (ro_sock_pollfd_idx != -1) { + if (fds[ro_sock_pollfd_idx].revents & ~POLLIN) { + barf_perror("ro sock poll failed"); + break; + } else if (fds[ro_sock_pollfd_idx].revents & POLLIN) { + accept_connection(*ro_sock, false); + ro_sock_pollfd_idx = -1; + } + } - if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset)) - handle_event(); + if (xce_pollfd_idx != -1) { + if (fds[xce_pollfd_idx].revents & ~POLLIN) { + barf_perror("xce_handle poll failed"); + break; + } else if (fds[xce_pollfd_idx].revents & POLLIN) { + handle_event(); + xce_pollfd_idx = -1; + } + } next = list_entry(connections.next, typeof(*conn), list); if (&next->list != &connections) @@ -1939,21 +1995,36 @@ int main(int argc, char *argv[]) if (talloc_free(conn) == 0) continue; } else { - if (FD_ISSET(conn->fd, &inset)) - handle_input(conn); + if (conn->pollfd_idx != -1) { + if (fds[conn->pollfd_idx].revents + & ~(POLLIN|POLLOUT)) + talloc_free(conn); + else if (fds[conn->pollfd_idx].revents + & POLLIN) + handle_input(conn); + } if (talloc_free(conn) == 0) continue; talloc_increase_ref_count(conn); - if (FD_ISSET(conn->fd, &outset)) - handle_output(conn); + + if (conn->pollfd_idx != -1) { + if (fds[conn->pollfd_idx].revents + & ~(POLLIN|POLLOUT)) + talloc_free(conn); + else if (fds[conn->pollfd_idx].revents + & POLLOUT) + handle_output(conn); + } if (talloc_free(conn) == 0) continue; + + conn->pollfd_idx = -1; } } - max = initialize_set(&inset, &outset, *sock, *ro_sock, - &timeout); + initialize_fds(*sock, &sock_pollfd_idx, *ro_sock, + &ro_sock_pollfd_idx, &timeout); } } diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h index 492ca0d..cfbcf6f 100644 --- a/tools/xenstore/xenstored_core.h +++ b/tools/xenstore/xenstored_core.h @@ -60,6 +60,8 @@ struct connection /* The file descriptor we came in on. */ int fd; + /* The index of pollfd in global pollfd array */ + int pollfd_idx; /* Who am I? 0 for socket connections. */ unsigned int id; -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2013-03-25 11:17 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2013-02-20 11:05 [PATCH 0/2] switch to poll() in cxenstored's IO loop Wei Liu 2013-02-20 11:05 ` [PATCH 1/2] mini-os: implement poll(2) Wei Liu 2013-02-20 11:05 ` [PATCH 2/2] Switch to poll() in cxenstored's IO loop Wei Liu 2013-03-12 15:27 ` Ian Campbell 2013-03-12 15:34 ` Wei Liu 2013-03-12 15:37 ` Ian Campbell 2013-03-04 11:15 ` [PATCH 0/2] switch " Wei Liu -- strict thread matches above, loose matches on Subject: below -- 2013-03-25 11:17 [PATCH 0/2 V3] Switch " Wei Liu 2013-03-25 11:17 ` [PATCH 2/2] " Wei Liu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).