* [PATCH] Switch to poll() in cxenstored's IO loop
@ 2013-01-21 19:23 Wei Liu
2013-01-22 9:47 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Wei Liu @ 2013-01-21 19:23 UTC (permalink / raw)
To: xen-devel; +Cc: Wei Liu
Poll() can support more file descriptors than select(). We've done this for
xenconsoled, now do this for cxenstored as well.
The code is taken from xenconsoled and modified to adapt to cxenstored.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
tools/xenstore/xenstored_core.c | 140 ++++++++++++++++++++++++++-------------
tools/xenstore/xenstored_core.h | 2 +
2 files changed, 96 insertions(+), 46 deletions(-)
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bd44645..7079c5e 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -19,7 +19,7 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/select.h>
+#include <poll.h>
#ifndef NO_SOCKETS
#include <sys/socket.h>
#include <sys/un.h>
@@ -55,6 +55,12 @@
#include "hashtable.h"
extern xc_evtchn *xce_handle; /* in xenstored_domain.c */
+static struct pollfd *xce_pollfd;
+static struct pollfd *fds;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
static bool verbose = false;
LIST_HEAD(connections);
@@ -62,6 +68,7 @@ static int tracefd = -1;
static bool recovery = true;
static bool remove_local = true;
static int reopen_log_pipe[2];
+static struct pollfd *reopen_log_pipe0_pollfd;
static char *tracefile = NULL;
static TDB_CONTEXT *tdb_ctx = NULL;
@@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type)
/**
* Signal handler for SIGHUP, which requests that the trace log is reopened
* (in the main loop). A single byte is written to reopen_log_pipe, to awaken
- * the select() in the main loop.
+ * the poll() in the main loop.
*/
static void trigger_reopen_log(int signal __attribute__((unused)))
{
@@ -279,15 +286,12 @@ static int destroy_conn(void *_conn)
/* Flush outgoing if possible, but don't block. */
if (!conn->domain) {
- fd_set set;
- struct timeval none;
-
- FD_ZERO(&set);
- FD_SET(conn->fd, &set);
- none.tv_sec = none.tv_usec = 0;
+ struct pollfd pfd;
+ pfd.fd = conn->fd;
+ pfd.events = POLLOUT;
while (!list_empty(&conn->out_list)
- && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+ && poll(&pfd, 1, 0) == 1)
if (!write_messages(conn))
break;
close(conn->fd);
@@ -300,52 +304,74 @@ static int destroy_conn(void *_conn)
}
-static void set_fd(int fd, fd_set *set, int *max)
+static struct pollfd *set_fd(int fd, short events)
{
- if (fd < 0)
- return;
- FD_SET(fd, set);
- if (fd > *max)
- *max = fd;
-}
+ struct pollfd *ret;
+ if (current_array_size < nr_fds + 1) {
+ struct pollfd *new_fds = NULL;
+ unsigned long newsize;
+
+ /* Round up to 2^8 boundary, in practice this just
+ * make newsize larger than current_array_size.
+ */
+ newsize = ROUNDUP(nr_fds + 1, 8);
+
+ new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
+ if (!new_fds)
+ goto fail;
+ fds = new_fds;
+
+ memset(&fds[0] + current_array_size, 0,
+ sizeof(struct pollfd ) * (newsize-current_array_size));
+ current_array_size = newsize;
+ }
+
+ fds[nr_fds].fd = fd;
+ fds[nr_fds].events = events;
+ ret = &fds[nr_fds];
+ nr_fds++;
+ return ret;
+fail:
+ syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+ return NULL;
+}
-static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
- struct timeval **ptimeout)
+static void initialize_fds(int sock, struct pollfd **p_sock_pollfd,
+ int ro_sock, struct pollfd **p_ro_sock_pollfd,
+ int *ptimeout)
{
- static struct timeval zero_timeout = { 0 };
struct connection *conn;
- int max = -1;
- *ptimeout = NULL;
+ memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+ nr_fds = 0;
- FD_ZERO(inset);
- FD_ZERO(outset);
+ *ptimeout = -1;
if (sock != -1)
- set_fd(sock, inset, &max);
+ *p_sock_pollfd = set_fd(sock, POLLIN|POLLPRI);
if (ro_sock != -1)
- set_fd(ro_sock, inset, &max);
+ *p_ro_sock_pollfd = set_fd(ro_sock, POLLIN|POLLPRI);
if (reopen_log_pipe[0] != -1)
- set_fd(reopen_log_pipe[0], inset, &max);
+ reopen_log_pipe0_pollfd =
+ set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
if (xce_handle != NULL)
- set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+ xce_pollfd = set_fd(xc_evtchn_fd(xce_handle), POLLIN|POLLPRI);
list_for_each_entry(conn, &connections, list) {
if (conn->domain) {
if (domain_can_read(conn) ||
(domain_can_write(conn) &&
!list_empty(&conn->out_list)))
- *ptimeout = &zero_timeout;
+ *ptimeout = 0;
} else {
- set_fd(conn->fd, inset, &max);
+ short events = POLLIN|POLLPRI;
if (!list_empty(&conn->out_list))
- FD_SET(conn->fd, outset);
+ events |= POLLOUT;
+ conn->pollfd = set_fd(conn->fd, events);
}
}
-
- return max;
}
/* Is child a subnode of parent, or equal? */
@@ -1770,14 +1796,14 @@ int priv_domid = 0;
int main(int argc, char *argv[])
{
- int opt, *sock, *ro_sock, max;
- fd_set inset, outset;
+ int opt, *sock, *ro_sock;
+ struct pollfd *sock_pollfd = NULL, *ro_sock_pollfd = NULL;
bool dofork = true;
bool outputpid = false;
bool no_domain_init = false;
const char *pidfile = NULL;
int evtchn_fd = -1;
- struct timeval *timeout;
+ int timeout;
while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
NULL)) != -1) {
@@ -1884,7 +1910,8 @@ int main(int argc, char *argv[])
evtchn_fd = xc_evtchn_fd(xce_handle);
/* Get ready to listen to the tools. */
- max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
+ initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
+ &timeout);
/* Tell the kernel we're up and running. */
xenbus_notify_running();
@@ -1893,27 +1920,42 @@ int main(int argc, char *argv[])
for (;;) {
struct connection *conn, *next;
- if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
+ if (poll(fds, nr_fds, timeout) < 0) {
if (errno == EINTR)
continue;
- barf_perror("Select failed");
+ barf_perror("Poll failed");
}
- if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
+ if (reopen_log_pipe[0] != -1 && reopen_log_pipe0_pollfd &&
+ !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (reopen_log_pipe0_pollfd->revents & POLLIN)) {
char c;
if (read(reopen_log_pipe[0], &c, 1) != 1)
barf_perror("read failed");
reopen_log();
+ reopen_log_pipe0_pollfd = NULL;
}
- if (*sock != -1 && FD_ISSET(*sock, &inset))
+ if (*sock != -1 && sock_pollfd &&
+ !(sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (sock_pollfd->revents & POLLIN)) {
accept_connection(*sock, true);
+ sock_pollfd = NULL;
+ }
- if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset))
+ if (*ro_sock != -1 && ro_sock_pollfd &&
+ !(ro_sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (ro_sock_pollfd->revents & POLLIN)) {
accept_connection(*ro_sock, false);
+ ro_sock_pollfd = NULL;
+ }
- if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
+ if (evtchn_fd != -1 && xce_pollfd &&
+ !(xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (xce_pollfd->revents & POLLIN)) {
handle_event();
+ xce_pollfd = NULL;
+ }
next = list_entry(connections.next, typeof(*conn), list);
if (&next->list != &connections)
@@ -1939,21 +1981,27 @@ int main(int argc, char *argv[])
if (talloc_free(conn) == 0)
continue;
} else {
- if (FD_ISSET(conn->fd, &inset))
+ if (conn->pollfd &&
+ !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (conn->pollfd->revents & POLLIN))
handle_input(conn);
if (talloc_free(conn) == 0)
continue;
talloc_increase_ref_count(conn);
- if (FD_ISSET(conn->fd, &outset))
+ if (conn->pollfd &&
+ !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (conn->pollfd->revents & POLLOUT))
handle_output(conn);
if (talloc_free(conn) == 0)
continue;
+
+ conn->pollfd = NULL;
}
}
- max = initialize_set(&inset, &outset, *sock, *ro_sock,
- &timeout);
+ initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
+ &timeout);
}
}
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 492ca0d..f330a87 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -60,6 +60,8 @@ struct connection
/* The file descriptor we came in on. */
int fd;
+ /* The pollfd corresponding to fd. */
+ struct pollfd *pollfd;
/* Who am I? 0 for socket connections. */
unsigned int id;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-01-21 19:23 [PATCH] Switch to poll() in cxenstored's IO loop Wei Liu
@ 2013-01-22 9:47 ` Ian Campbell
2013-01-22 14:13 ` Wei Liu
0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-01-22 9:47 UTC (permalink / raw)
To: Wei Liu; +Cc: xen-devel@lists.xen.org
On Mon, 2013-01-21 at 19:23 +0000, Wei Liu wrote:
> - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
> + if (reopen_log_pipe[0] != -1 && reopen_log_pipe0_pollfd &&
Is it the case that reopen_log_pip0_pollfd != NULL iff
reopen_log_pipe[0] != -1 ? Would simplify things a bit?
> + !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
This represents an error case I think? Is there anything we can do about
it? If this sort of error occurs and we do nothing will we end up just
spinning because every subsequent poll will just return straight away?
> + (reopen_log_pipe0_pollfd->revents & POLLIN)) {
You only handle POLLIN, not POLLOUT or POLLPRI -- so should they not be
part of the error case above? I don't think you ask for them other than
on the conn->poll_fd?
(those three comments apply to the other instances of this pattern too)
[...]
> @@ -1939,21 +1981,27 @@ int main(int argc, char *argv[])
> if (talloc_free(conn) == 0)
> continue;
> } else {
> - if (FD_ISSET(conn->fd, &inset))
> + if (conn->pollfd &&
> + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> + (conn->pollfd->revents & POLLIN))
> handle_input(conn);
> if (talloc_free(conn) == 0)
> continue;
Do you know what this construct is all about? Some sort of reference
count on conn? Anyway, I wonder if this means you will frequently miss
setting pollfd = NULL below?
talloc_free can apparently only return non-zero if a destructor fails,
which suggests that more often than not you will not make it as far as
checking POLLOUT.
This is all pre-existing though, so maybe it just works and there is
some magic I don't understand ;-)
> talloc_increase_ref_count(conn);
> - if (FD_ISSET(conn->fd, &outset))
> + if (conn->pollfd &&
> + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> + (conn->pollfd->revents & POLLOUT))
> handle_output(conn);
> if (talloc_free(conn) == 0)
> continue;
> +
> + conn->pollfd = NULL;
> }
> }
>
> - max = initialize_set(&inset, &outset, *sock, *ro_sock,
> - &timeout);
> + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
> + &timeout);
> }
> }
>
> diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
> index 492ca0d..f330a87 100644
> --- a/tools/xenstore/xenstored_core.h
> +++ b/tools/xenstore/xenstored_core.h
> @@ -60,6 +60,8 @@ struct connection
>
> /* The file descriptor we came in on. */
> int fd;
> + /* The pollfd corresponding to fd. */
> + struct pollfd *pollfd;
>
> /* Who am I? 0 for socket connections. */
> unsigned int id;
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-01-22 9:47 ` Ian Campbell
@ 2013-01-22 14:13 ` Wei Liu
2013-01-22 14:25 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Wei Liu @ 2013-01-22 14:13 UTC (permalink / raw)
To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org
On Tue, 2013-01-22 at 09:47 +0000, Ian Campbell wrote:
> On Mon, 2013-01-21 at 19:23 +0000, Wei Liu wrote:
>
> > - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
> > + if (reopen_log_pipe[0] != -1 && reopen_log_pipe0_pollfd &&
>
> Is it the case that reopen_log_pip0_pollfd != NULL iff
> reopen_log_pipe[0] != -1 ? Would simplify things a bit?
>
Not really. Pollfd is valid iff set_fd() returns a valid pointer.
set_fd() invokes realloc() which can fail.
> > + !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
>
> This represents an error case I think? Is there anything we can do about
> it? If this sort of error occurs and we do nothing will we end up just
> spinning because every subsequent poll will just return straight away?
>
>
Yes, this represents an error. This indicates the pipe used to trigger
reopen_log is broken. What's the motivation of reopening log file? I
have no idea about the use case. But simply ignoring this error instead
of crashing the process is better choice IMHO.
The whole pollfd set is reinitialized for every loop. So it is also
possible that for the next loop it will success even previous poll
fails?
> > + (reopen_log_pipe0_pollfd->revents & POLLIN)) {
>
> You only handle POLLIN, not POLLOUT or POLLPRI -- so should they not be
> part of the error case above? I don't think you ask for them other than
> on the conn->poll_fd?
>
> (those three comments apply to the other instances of this pattern too)
> [...]
> > @@ -1939,21 +1981,27 @@ int main(int argc, char *argv[])
> > if (talloc_free(conn) == 0)
> > continue;
> > } else {
> > - if (FD_ISSET(conn->fd, &inset))
> > + if (conn->pollfd &&
> > + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > + (conn->pollfd->revents & POLLIN))
> > handle_input(conn);
> > if (talloc_free(conn) == 0)
> > continue;
>
> Do you know what this construct is all about? Some sort of reference
> count on conn? Anyway, I wonder if this means you will frequently miss
> setting pollfd = NULL below?
>
> talloc_free can apparently only return non-zero if a destructor fails,
> which suggests that more often than not you will not make it as far as
> checking POLLOUT.
>
> This is all pre-existing though, so maybe it just works and there is
> some magic I don't understand ;-)
>
The document of talloc_free() says that if the pointer is freed 0 is
returned otherwise -1 is returned.
So if conn is successfully freed, than there is no need to reset pollfd
in conn to NULL.
Wei.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-01-22 14:13 ` Wei Liu
@ 2013-01-22 14:25 ` Ian Campbell
2013-01-22 14:50 ` Wei Liu
0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-01-22 14:25 UTC (permalink / raw)
To: Wei Liu; +Cc: xen-devel@lists.xen.org
On Tue, 2013-01-22 at 14:13 +0000, Wei Liu wrote:
> On Tue, 2013-01-22 at 09:47 +0000, Ian Campbell wrote:
> > On Mon, 2013-01-21 at 19:23 +0000, Wei Liu wrote:
> >
> > > - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
> > > + if (reopen_log_pipe[0] != -1 && reopen_log_pipe0_pollfd &&
> >
> > Is it the case that reopen_log_pip0_pollfd != NULL iff
> > reopen_log_pipe[0] != -1 ? Would simplify things a bit?
> >
>
> Not really. Pollfd is valid iff set_fd() returns a valid pointer.
But set_fd takes reopen_log_pipe[0], doesn't it? So a necessary
precondition is that reopen_log_pipe[0] is != -1.
IOW whenever reopen_log_pip0_pollfd != NULL it is also true that
reopen_log_pipe[0] != -1 and so "if (reopen_log_pipe[0] != -1 &&
reopen_log_pipe0_pollfd)" is equivalent to just "if
(reopen_log_pipe0_pollfd)"
> set_fd() invokes realloc() which can fail.
In the case where reopen_log_pip0_pollfd == NULL it doesn't matter what
reopen_log_pipe[0] is though.
> > > + !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> >
> > This represents an error case I think? Is there anything we can do about
> > it? If this sort of error occurs and we do nothing will we end up just
> > spinning because every subsequent poll will just return straight away?
> >
> >
>
> Yes, this represents an error. This indicates the pipe used to trigger
> reopen_log is broken. What's the motivation of reopening log file?
Rotation I should imagine.
> I
> have no idea about the use case. But simply ignoring this error instead
> of crashing the process is better choice IMHO.
Agreed, but my concern was that the daemon would spin consuming 100%
CPU, which is nearly as bad as crashing.
> The whole pollfd set is reinitialized for every loop. So it is also
> possible that for the next loop it will success even previous poll
> fails?
I suppose it depends on the reason it failed. I would expect most of
them would be pretty final rather than temporary but I haven't looked at
the pipe docs.
> > > @@ -1939,21 +1981,27 @@ int main(int argc, char *argv[])
> > > if (talloc_free(conn) == 0)
> > > continue;
> > > } else {
> > > - if (FD_ISSET(conn->fd, &inset))
> > > + if (conn->pollfd &&
> > > + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > > + (conn->pollfd->revents & POLLIN))
> > > handle_input(conn);
> > > if (talloc_free(conn) == 0)
> > > continue;
> >
> > Do you know what this construct is all about? Some sort of reference
> > count on conn? Anyway, I wonder if this means you will frequently miss
> > setting pollfd = NULL below?
> >
> > talloc_free can apparently only return non-zero if a destructor fails,
> > which suggests that more often than not you will not make it as far as
> > checking POLLOUT.
> >
> > This is all pre-existing though, so maybe it just works and there is
> > some magic I don't understand ;-)
> >
>
> The document of talloc_free() says that if the pointer is freed 0 is
> returned otherwise -1 is returned.
>
> So if conn is successfully freed, than there is no need to reset pollfd
> in conn to NULL.
Doh, yes.
What about missing any pending POLLOUT in that case though, due to the
continue?
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-01-22 14:25 ` Ian Campbell
@ 2013-01-22 14:50 ` Wei Liu
2013-01-22 14:57 ` Ian Campbell
2013-02-05 10:58 ` [PATCH] " Ian Campbell
0 siblings, 2 replies; 13+ messages in thread
From: Wei Liu @ 2013-01-22 14:50 UTC (permalink / raw)
To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org
On Tue, 2013-01-22 at 14:25 +0000, Ian Campbell wrote:
> On Tue, 2013-01-22 at 14:13 +0000, Wei Liu wrote:
> > On Tue, 2013-01-22 at 09:47 +0000, Ian Campbell wrote:
> > > On Mon, 2013-01-21 at 19:23 +0000, Wei Liu wrote:
> > >
> > > > - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
> > > > + if (reopen_log_pipe[0] != -1 && reopen_log_pipe0_pollfd &&
> > >
> > > Is it the case that reopen_log_pip0_pollfd != NULL iff
> > > reopen_log_pipe[0] != -1 ? Would simplify things a bit?
> > >
> >
> > Not really. Pollfd is valid iff set_fd() returns a valid pointer.
>
> But set_fd takes reopen_log_pipe[0], doesn't it? So a necessary
> precondition is that reopen_log_pipe[0] is != -1.
> IOW whenever reopen_log_pip0_pollfd != NULL it is also true that
> reopen_log_pipe[0] != -1 and so "if (reopen_log_pipe[0] != -1 &&
> reopen_log_pipe0_pollfd)" is equivalent to just "if
> (reopen_log_pipe0_pollfd)"
>
> > set_fd() invokes realloc() which can fail.
>
> In the case where reopen_log_pip0_pollfd == NULL it doesn't matter what
> reopen_log_pipe[0] is though.
>
Hah, now I get your point. ;-)
> > > > + !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > >
> > > This represents an error case I think? Is there anything we can do about
> > > it? If this sort of error occurs and we do nothing will we end up just
> > > spinning because every subsequent poll will just return straight away?
> > >
> > >
> >
> > Yes, this represents an error. This indicates the pipe used to trigger
> > reopen_log is broken. What's the motivation of reopening log file?
>
> Rotation I should imagine.
>
> > I
> > have no idea about the use case. But simply ignoring this error instead
> > of crashing the process is better choice IMHO.
>
> Agreed, but my concern was that the daemon would spin consuming 100%
> CPU, which is nearly as bad as crashing.
>
> > The whole pollfd set is reinitialized for every loop. So it is also
> > possible that for the next loop it will success even previous poll
> > fails?
>
> I suppose it depends on the reason it failed. I would expect most of
> them would be pretty final rather than temporary but I haven't looked at
> the pipe docs.
>
We can reinitialize the pipe if error occurs, because this pipe is only
used within xenstored itself to notify log reload.
> > > > @@ -1939,21 +1981,27 @@ int main(int argc, char *argv[])
> > > > if (talloc_free(conn) == 0)
> > > > continue;
> > > > } else {
> > > > - if (FD_ISSET(conn->fd, &inset))
> > > > + if (conn->pollfd &&
> > > > + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > > > + (conn->pollfd->revents & POLLIN))
> > > > handle_input(conn);
> > > > if (talloc_free(conn) == 0)
> > > > continue;
> > >
> > > Do you know what this construct is all about? Some sort of reference
> > > count on conn? Anyway, I wonder if this means you will frequently miss
> > > setting pollfd = NULL below?
> > >
> > > talloc_free can apparently only return non-zero if a destructor fails,
> > > which suggests that more often than not you will not make it as far as
> > > checking POLLOUT.
> > >
> > > This is all pre-existing though, so maybe it just works and there is
> > > some magic I don't understand ;-)
> > >
> >
> > The document of talloc_free() says that if the pointer is freed 0 is
> > returned otherwise -1 is returned.
> >
> > So if conn is successfully freed, than there is no need to reset pollfd
> > in conn to NULL.
>
> Doh, yes.
>
> What about missing any pending POLLOUT in that case though, due to the
> continue?
>
I presume there is some magic here? The original code is constructed
like this, I only did the necessary switch to use poll().
Wei.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-01-22 14:50 ` Wei Liu
@ 2013-01-22 14:57 ` Ian Campbell
2013-01-22 15:18 ` [PATCH V2] " Wei Liu
2013-02-05 10:58 ` [PATCH] " Ian Campbell
1 sibling, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-01-22 14:57 UTC (permalink / raw)
To: Wei Liu; +Cc: xen-devel@lists.xen.org
On Tue, 2013-01-22 at 14:50 +0000, Wei Liu wrote:
> > What about missing any pending POLLOUT in that case though, due to the
> > continue?
> >
>
> I presume there is some magic here?
I guess so. Lets pay no attention to the man behind the curtain ;-)
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH V2] Switch to poll() in cxenstored's IO loop
2013-01-22 14:57 ` Ian Campbell
@ 2013-01-22 15:18 ` Wei Liu
2013-01-29 17:40 ` Wei Liu
0 siblings, 1 reply; 13+ messages in thread
From: Wei Liu @ 2013-01-22 15:18 UTC (permalink / raw)
To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org
commit 14b82f52d40143019094727b8fa1aa3a111ffab3
Author: Wei Liu <wei.liu2@citrix.com>
Date: Mon Jan 21 19:07:37 2013 +0000
Switch to poll() in cxenstored's IO loop
Poll() can support more file descriptors than select(). We've done this for
xenconsoled, now do this for cxenstored as well.
The code is taken from xenconsoled and modified to adapt to cxenstored.
Updated:
* reopen pipe if polling on reopen_log_pipe fails.
* exit if polling on some critical fds fails.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bd44645..b91704e 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -19,7 +19,7 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/select.h>
+#include <poll.h>
#ifndef NO_SOCKETS
#include <sys/socket.h>
#include <sys/un.h>
@@ -55,6 +55,12 @@
#include "hashtable.h"
extern xc_evtchn *xce_handle; /* in xenstored_domain.c */
+static struct pollfd *xce_pollfd;
+static struct pollfd *fds;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
static bool verbose = false;
LIST_HEAD(connections);
@@ -62,6 +68,7 @@ static int tracefd = -1;
static bool recovery = true;
static bool remove_local = true;
static int reopen_log_pipe[2];
+static struct pollfd *reopen_log_pipe0_pollfd;
static char *tracefile = NULL;
static TDB_CONTEXT *tdb_ctx = NULL;
@@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type)
/**
* Signal handler for SIGHUP, which requests that the trace log is reopened
* (in the main loop). A single byte is written to reopen_log_pipe, to awaken
- * the select() in the main loop.
+ * the poll() in the main loop.
*/
static void trigger_reopen_log(int signal __attribute__((unused)))
{
@@ -279,15 +286,12 @@ static int destroy_conn(void *_conn)
/* Flush outgoing if possible, but don't block. */
if (!conn->domain) {
- fd_set set;
- struct timeval none;
-
- FD_ZERO(&set);
- FD_SET(conn->fd, &set);
- none.tv_sec = none.tv_usec = 0;
+ struct pollfd pfd;
+ pfd.fd = conn->fd;
+ pfd.events = POLLOUT;
while (!list_empty(&conn->out_list)
- && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+ && poll(&pfd, 1, 0) == 1)
if (!write_messages(conn))
break;
close(conn->fd);
@@ -300,52 +304,74 @@ static int destroy_conn(void *_conn)
}
-static void set_fd(int fd, fd_set *set, int *max)
+static struct pollfd *set_fd(int fd, short events)
{
- if (fd < 0)
- return;
- FD_SET(fd, set);
- if (fd > *max)
- *max = fd;
-}
+ struct pollfd *ret;
+ if (current_array_size < nr_fds + 1) {
+ struct pollfd *new_fds = NULL;
+ unsigned long newsize;
+
+ /* Round up to 2^8 boundary, in practice this just
+ * make newsize larger than current_array_size.
+ */
+ newsize = ROUNDUP(nr_fds + 1, 8);
+ new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
+ if (!new_fds)
+ goto fail;
+ fds = new_fds;
-static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
- struct timeval **ptimeout)
+ memset(&fds[0] + current_array_size, 0,
+ sizeof(struct pollfd ) * (newsize-current_array_size));
+ current_array_size = newsize;
+ }
+
+ fds[nr_fds].fd = fd;
+ fds[nr_fds].events = events;
+ ret = &fds[nr_fds];
+ nr_fds++;
+
+ return ret;
+fail:
+ syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+ return NULL;
+}
+
+static void initialize_fds(int sock, struct pollfd **p_sock_pollfd,
+ int ro_sock, struct pollfd **p_ro_sock_pollfd,
+ int *ptimeout)
{
- static struct timeval zero_timeout = { 0 };
struct connection *conn;
- int max = -1;
- *ptimeout = NULL;
+ memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+ nr_fds = 0;
- FD_ZERO(inset);
- FD_ZERO(outset);
+ *ptimeout = -1;
if (sock != -1)
- set_fd(sock, inset, &max);
+ *p_sock_pollfd = set_fd(sock, POLLIN|POLLPRI);
if (ro_sock != -1)
- set_fd(ro_sock, inset, &max);
+ *p_ro_sock_pollfd = set_fd(ro_sock, POLLIN|POLLPRI);
if (reopen_log_pipe[0] != -1)
- set_fd(reopen_log_pipe[0], inset, &max);
+ reopen_log_pipe0_pollfd =
+ set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
if (xce_handle != NULL)
- set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+ xce_pollfd = set_fd(xc_evtchn_fd(xce_handle), POLLIN|POLLPRI);
list_for_each_entry(conn, &connections, list) {
if (conn->domain) {
if (domain_can_read(conn) ||
(domain_can_write(conn) &&
!list_empty(&conn->out_list)))
- *ptimeout = &zero_timeout;
+ *ptimeout = 0;
} else {
- set_fd(conn->fd, inset, &max);
+ short events = POLLIN|POLLPRI;
if (!list_empty(&conn->out_list))
- FD_SET(conn->fd, outset);
+ events |= POLLOUT;
+ conn->pollfd = set_fd(conn->fd, events);
}
}
-
- return max;
}
/* Is child a subnode of parent, or equal? */
@@ -1770,14 +1796,13 @@ int priv_domid = 0;
int main(int argc, char *argv[])
{
- int opt, *sock, *ro_sock, max;
- fd_set inset, outset;
+ int opt, *sock, *ro_sock;
+ struct pollfd *sock_pollfd = NULL, *ro_sock_pollfd = NULL;
bool dofork = true;
bool outputpid = false;
bool no_domain_init = false;
const char *pidfile = NULL;
- int evtchn_fd = -1;
- struct timeval *timeout;
+ int timeout;
while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
NULL)) != -1) {
@@ -1880,11 +1905,9 @@ int main(int argc, char *argv[])
signal(SIGHUP, trigger_reopen_log);
- if (xce_handle != NULL)
- evtchn_fd = xc_evtchn_fd(xce_handle);
-
/* Get ready to listen to the tools. */
- max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
+ initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
+ &timeout);
/* Tell the kernel we're up and running. */
xenbus_notify_running();
@@ -1893,27 +1916,55 @@ int main(int argc, char *argv[])
for (;;) {
struct connection *conn, *next;
- if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
+ if (poll(fds, nr_fds, timeout) < 0) {
if (errno == EINTR)
continue;
- barf_perror("Select failed");
+ barf_perror("Poll failed");
}
- if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
- char c;
- if (read(reopen_log_pipe[0], &c, 1) != 1)
- barf_perror("read failed");
- reopen_log();
+ if (reopen_log_pipe0_pollfd) {
+ if (reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ close(reopen_log_pipe[0]);
+ close(reopen_log_pipe[1]);
+ init_pipe(reopen_log_pipe);
+ } else if (reopen_log_pipe0_pollfd->revents & POLLIN) {
+ char c;
+ if (read(reopen_log_pipe[0], &c, 1) != 1)
+ barf_perror("read failed");
+ reopen_log();
+ }
+ reopen_log_pipe0_pollfd = NULL;
}
- if (*sock != -1 && FD_ISSET(*sock, &inset))
- accept_connection(*sock, true);
+ if (sock_pollfd) {
+ if (sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ barf_perror("sock poll failed");
+ break;
+ } else if (sock_pollfd->revents & POLLIN) {
+ accept_connection(*sock, true);
+ sock_pollfd = NULL;
+ }
+ }
- if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset))
- accept_connection(*ro_sock, false);
+ if (ro_sock_pollfd) {
+ if (ro_sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ barf_perror("ro sock poll failed");
+ break;
+ } else if (ro_sock_pollfd->revents & POLLIN) {
+ accept_connection(*ro_sock, false);
+ ro_sock_pollfd = NULL;
+ }
+ }
- if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
- handle_event();
+ if (xce_pollfd) {
+ if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ barf_perror("xce_handle poll failed");
+ break;
+ } else if (xce_pollfd->revents & POLLIN) {
+ handle_event();
+ xce_pollfd = NULL;
+ }
+ }
next = list_entry(connections.next, typeof(*conn), list);
if (&next->list != &connections)
@@ -1939,21 +1990,27 @@ int main(int argc, char *argv[])
if (talloc_free(conn) == 0)
continue;
} else {
- if (FD_ISSET(conn->fd, &inset))
+ if (conn->pollfd &&
+ !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (conn->pollfd->revents & POLLIN))
handle_input(conn);
if (talloc_free(conn) == 0)
continue;
talloc_increase_ref_count(conn);
- if (FD_ISSET(conn->fd, &outset))
+ if (conn->pollfd &&
+ !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (conn->pollfd->revents & POLLOUT))
handle_output(conn);
if (talloc_free(conn) == 0)
continue;
+
+ conn->pollfd = NULL;
}
}
- max = initialize_set(&inset, &outset, *sock, *ro_sock,
- &timeout);
+ initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
+ &timeout);
}
}
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 492ca0d..f330a87 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -60,6 +60,8 @@ struct connection
/* The file descriptor we came in on. */
int fd;
+ /* The pollfd corresponding to fd. */
+ struct pollfd *pollfd;
/* Who am I? 0 for socket connections. */
unsigned int id;
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH V2] Switch to poll() in cxenstored's IO loop
2013-01-22 15:18 ` [PATCH V2] " Wei Liu
@ 2013-01-29 17:40 ` Wei Liu
2013-01-30 9:02 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Wei Liu @ 2013-01-29 17:40 UTC (permalink / raw)
To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org
Ian, ping?
On Tue, 2013-01-22 at 15:18 +0000, Wei Liu wrote:
> commit 14b82f52d40143019094727b8fa1aa3a111ffab3
> Author: Wei Liu <wei.liu2@citrix.com>
> Date: Mon Jan 21 19:07:37 2013 +0000
>
> Switch to poll() in cxenstored's IO loop
>
> Poll() can support more file descriptors than select(). We've done this for
> xenconsoled, now do this for cxenstored as well.
>
> The code is taken from xenconsoled and modified to adapt to cxenstored.
>
> Updated:
> * reopen pipe if polling on reopen_log_pipe fails.
> * exit if polling on some critical fds fails.
>
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
>
> diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
> index bd44645..b91704e 100644
> --- a/tools/xenstore/xenstored_core.c
> +++ b/tools/xenstore/xenstored_core.c
> @@ -19,7 +19,7 @@
>
> #include <sys/types.h>
> #include <sys/stat.h>
> -#include <sys/select.h>
> +#include <poll.h>
> #ifndef NO_SOCKETS
> #include <sys/socket.h>
> #include <sys/un.h>
> @@ -55,6 +55,12 @@
> #include "hashtable.h"
>
> extern xc_evtchn *xce_handle; /* in xenstored_domain.c */
> +static struct pollfd *xce_pollfd;
> +static struct pollfd *fds;
> +static unsigned int current_array_size;
> +static unsigned int nr_fds;
> +
> +#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
>
> static bool verbose = false;
> LIST_HEAD(connections);
> @@ -62,6 +68,7 @@ static int tracefd = -1;
> static bool recovery = true;
> static bool remove_local = true;
> static int reopen_log_pipe[2];
> +static struct pollfd *reopen_log_pipe0_pollfd;
> static char *tracefile = NULL;
> static TDB_CONTEXT *tdb_ctx = NULL;
>
> @@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type)
> /**
> * Signal handler for SIGHUP, which requests that the trace log is reopened
> * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
> - * the select() in the main loop.
> + * the poll() in the main loop.
> */
> static void trigger_reopen_log(int signal __attribute__((unused)))
> {
> @@ -279,15 +286,12 @@ static int destroy_conn(void *_conn)
>
> /* Flush outgoing if possible, but don't block. */
> if (!conn->domain) {
> - fd_set set;
> - struct timeval none;
> -
> - FD_ZERO(&set);
> - FD_SET(conn->fd, &set);
> - none.tv_sec = none.tv_usec = 0;
> + struct pollfd pfd;
> + pfd.fd = conn->fd;
> + pfd.events = POLLOUT;
>
> while (!list_empty(&conn->out_list)
> - && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
> + && poll(&pfd, 1, 0) == 1)
> if (!write_messages(conn))
> break;
> close(conn->fd);
> @@ -300,52 +304,74 @@ static int destroy_conn(void *_conn)
> }
>
>
> -static void set_fd(int fd, fd_set *set, int *max)
> +static struct pollfd *set_fd(int fd, short events)
> {
> - if (fd < 0)
> - return;
> - FD_SET(fd, set);
> - if (fd > *max)
> - *max = fd;
> -}
> + struct pollfd *ret;
> + if (current_array_size < nr_fds + 1) {
> + struct pollfd *new_fds = NULL;
> + unsigned long newsize;
> +
> + /* Round up to 2^8 boundary, in practice this just
> + * make newsize larger than current_array_size.
> + */
> + newsize = ROUNDUP(nr_fds + 1, 8);
>
> + new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> + if (!new_fds)
> + goto fail;
> + fds = new_fds;
>
> -static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
> - struct timeval **ptimeout)
> + memset(&fds[0] + current_array_size, 0,
> + sizeof(struct pollfd ) * (newsize-current_array_size));
> + current_array_size = newsize;
> + }
> +
> + fds[nr_fds].fd = fd;
> + fds[nr_fds].events = events;
> + ret = &fds[nr_fds];
> + nr_fds++;
> +
> + return ret;
> +fail:
> + syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> + return NULL;
> +}
> +
> +static void initialize_fds(int sock, struct pollfd **p_sock_pollfd,
> + int ro_sock, struct pollfd **p_ro_sock_pollfd,
> + int *ptimeout)
> {
> - static struct timeval zero_timeout = { 0 };
> struct connection *conn;
> - int max = -1;
>
> - *ptimeout = NULL;
> + memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> + nr_fds = 0;
>
> - FD_ZERO(inset);
> - FD_ZERO(outset);
> + *ptimeout = -1;
>
> if (sock != -1)
> - set_fd(sock, inset, &max);
> + *p_sock_pollfd = set_fd(sock, POLLIN|POLLPRI);
> if (ro_sock != -1)
> - set_fd(ro_sock, inset, &max);
> + *p_ro_sock_pollfd = set_fd(ro_sock, POLLIN|POLLPRI);
> if (reopen_log_pipe[0] != -1)
> - set_fd(reopen_log_pipe[0], inset, &max);
> + reopen_log_pipe0_pollfd =
> + set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
>
> if (xce_handle != NULL)
> - set_fd(xc_evtchn_fd(xce_handle), inset, &max);
> + xce_pollfd = set_fd(xc_evtchn_fd(xce_handle), POLLIN|POLLPRI);
>
> list_for_each_entry(conn, &connections, list) {
> if (conn->domain) {
> if (domain_can_read(conn) ||
> (domain_can_write(conn) &&
> !list_empty(&conn->out_list)))
> - *ptimeout = &zero_timeout;
> + *ptimeout = 0;
> } else {
> - set_fd(conn->fd, inset, &max);
> + short events = POLLIN|POLLPRI;
> if (!list_empty(&conn->out_list))
> - FD_SET(conn->fd, outset);
> + events |= POLLOUT;
> + conn->pollfd = set_fd(conn->fd, events);
> }
> }
> -
> - return max;
> }
>
> /* Is child a subnode of parent, or equal? */
> @@ -1770,14 +1796,13 @@ int priv_domid = 0;
>
> int main(int argc, char *argv[])
> {
> - int opt, *sock, *ro_sock, max;
> - fd_set inset, outset;
> + int opt, *sock, *ro_sock;
> + struct pollfd *sock_pollfd = NULL, *ro_sock_pollfd = NULL;
> bool dofork = true;
> bool outputpid = false;
> bool no_domain_init = false;
> const char *pidfile = NULL;
> - int evtchn_fd = -1;
> - struct timeval *timeout;
> + int timeout;
>
> while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
> NULL)) != -1) {
> @@ -1880,11 +1905,9 @@ int main(int argc, char *argv[])
>
> signal(SIGHUP, trigger_reopen_log);
>
> - if (xce_handle != NULL)
> - evtchn_fd = xc_evtchn_fd(xce_handle);
> -
> /* Get ready to listen to the tools. */
> - max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
> + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
> + &timeout);
>
> /* Tell the kernel we're up and running. */
> xenbus_notify_running();
> @@ -1893,27 +1916,55 @@ int main(int argc, char *argv[])
> for (;;) {
> struct connection *conn, *next;
>
> - if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
> + if (poll(fds, nr_fds, timeout) < 0) {
> if (errno == EINTR)
> continue;
> - barf_perror("Select failed");
> + barf_perror("Poll failed");
> }
>
> - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
> - char c;
> - if (read(reopen_log_pipe[0], &c, 1) != 1)
> - barf_perror("read failed");
> - reopen_log();
> + if (reopen_log_pipe0_pollfd) {
> + if (reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + close(reopen_log_pipe[0]);
> + close(reopen_log_pipe[1]);
> + init_pipe(reopen_log_pipe);
> + } else if (reopen_log_pipe0_pollfd->revents & POLLIN) {
> + char c;
> + if (read(reopen_log_pipe[0], &c, 1) != 1)
> + barf_perror("read failed");
> + reopen_log();
> + }
> + reopen_log_pipe0_pollfd = NULL;
> }
>
> - if (*sock != -1 && FD_ISSET(*sock, &inset))
> - accept_connection(*sock, true);
> + if (sock_pollfd) {
> + if (sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + barf_perror("sock poll failed");
> + break;
> + } else if (sock_pollfd->revents & POLLIN) {
> + accept_connection(*sock, true);
> + sock_pollfd = NULL;
> + }
> + }
>
> - if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset))
> - accept_connection(*ro_sock, false);
> + if (ro_sock_pollfd) {
> + if (ro_sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + barf_perror("ro sock poll failed");
> + break;
> + } else if (ro_sock_pollfd->revents & POLLIN) {
> + accept_connection(*ro_sock, false);
> + ro_sock_pollfd = NULL;
> + }
> + }
>
> - if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
> - handle_event();
> + if (xce_pollfd) {
> + if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + barf_perror("xce_handle poll failed");
> + break;
> + } else if (xce_pollfd->revents & POLLIN) {
> + handle_event();
> + xce_pollfd = NULL;
> + }
> + }
>
> next = list_entry(connections.next, typeof(*conn), list);
> if (&next->list != &connections)
> @@ -1939,21 +1990,27 @@ int main(int argc, char *argv[])
> if (talloc_free(conn) == 0)
> continue;
> } else {
> - if (FD_ISSET(conn->fd, &inset))
> + if (conn->pollfd &&
> + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> + (conn->pollfd->revents & POLLIN))
> handle_input(conn);
> if (talloc_free(conn) == 0)
> continue;
>
> talloc_increase_ref_count(conn);
> - if (FD_ISSET(conn->fd, &outset))
> + if (conn->pollfd &&
> + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> + (conn->pollfd->revents & POLLOUT))
> handle_output(conn);
> if (talloc_free(conn) == 0)
> continue;
> +
> + conn->pollfd = NULL;
> }
> }
>
> - max = initialize_set(&inset, &outset, *sock, *ro_sock,
> - &timeout);
> + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
> + &timeout);
> }
> }
>
> diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
> index 492ca0d..f330a87 100644
> --- a/tools/xenstore/xenstored_core.h
> +++ b/tools/xenstore/xenstored_core.h
> @@ -60,6 +60,8 @@ struct connection
>
> /* The file descriptor we came in on. */
> int fd;
> + /* The pollfd corresponding to fd. */
> + struct pollfd *pollfd;
>
> /* Who am I? 0 for socket connections. */
> unsigned int id;
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH V2] Switch to poll() in cxenstored's IO loop
2013-01-29 17:40 ` Wei Liu
@ 2013-01-30 9:02 ` Ian Campbell
0 siblings, 0 replies; 13+ messages in thread
From: Ian Campbell @ 2013-01-30 9:02 UTC (permalink / raw)
To: Wei Liu; +Cc: xen-devel@lists.xen.org
On Tue, 2013-01-29 at 17:40 +0000, Wei Liu wrote:
> Ian, ping?
It's in my queue but I've been holding off applying until we get a test
pass and push, we've got a weeks worth of stuff in the staging queue at
the moment.
Ian.
>
> On Tue, 2013-01-22 at 15:18 +0000, Wei Liu wrote:
> > commit 14b82f52d40143019094727b8fa1aa3a111ffab3
> > Author: Wei Liu <wei.liu2@citrix.com>
> > Date: Mon Jan 21 19:07:37 2013 +0000
> >
> > Switch to poll() in cxenstored's IO loop
> >
> > Poll() can support more file descriptors than select(). We've done this for
> > xenconsoled, now do this for cxenstored as well.
> >
> > The code is taken from xenconsoled and modified to adapt to cxenstored.
> >
> > Updated:
> > * reopen pipe if polling on reopen_log_pipe fails.
> > * exit if polling on some critical fds fails.
> >
> > Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> >
> > diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
> > index bd44645..b91704e 100644
> > --- a/tools/xenstore/xenstored_core.c
> > +++ b/tools/xenstore/xenstored_core.c
> > @@ -19,7 +19,7 @@
> >
> > #include <sys/types.h>
> > #include <sys/stat.h>
> > -#include <sys/select.h>
> > +#include <poll.h>
> > #ifndef NO_SOCKETS
> > #include <sys/socket.h>
> > #include <sys/un.h>
> > @@ -55,6 +55,12 @@
> > #include "hashtable.h"
> >
> > extern xc_evtchn *xce_handle; /* in xenstored_domain.c */
> > +static struct pollfd *xce_pollfd;
> > +static struct pollfd *fds;
> > +static unsigned int current_array_size;
> > +static unsigned int nr_fds;
> > +
> > +#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
> >
> > static bool verbose = false;
> > LIST_HEAD(connections);
> > @@ -62,6 +68,7 @@ static int tracefd = -1;
> > static bool recovery = true;
> > static bool remove_local = true;
> > static int reopen_log_pipe[2];
> > +static struct pollfd *reopen_log_pipe0_pollfd;
> > static char *tracefile = NULL;
> > static TDB_CONTEXT *tdb_ctx = NULL;
> >
> > @@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type)
> > /**
> > * Signal handler for SIGHUP, which requests that the trace log is reopened
> > * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
> > - * the select() in the main loop.
> > + * the poll() in the main loop.
> > */
> > static void trigger_reopen_log(int signal __attribute__((unused)))
> > {
> > @@ -279,15 +286,12 @@ static int destroy_conn(void *_conn)
> >
> > /* Flush outgoing if possible, but don't block. */
> > if (!conn->domain) {
> > - fd_set set;
> > - struct timeval none;
> > -
> > - FD_ZERO(&set);
> > - FD_SET(conn->fd, &set);
> > - none.tv_sec = none.tv_usec = 0;
> > + struct pollfd pfd;
> > + pfd.fd = conn->fd;
> > + pfd.events = POLLOUT;
> >
> > while (!list_empty(&conn->out_list)
> > - && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
> > + && poll(&pfd, 1, 0) == 1)
> > if (!write_messages(conn))
> > break;
> > close(conn->fd);
> > @@ -300,52 +304,74 @@ static int destroy_conn(void *_conn)
> > }
> >
> >
> > -static void set_fd(int fd, fd_set *set, int *max)
> > +static struct pollfd *set_fd(int fd, short events)
> > {
> > - if (fd < 0)
> > - return;
> > - FD_SET(fd, set);
> > - if (fd > *max)
> > - *max = fd;
> > -}
> > + struct pollfd *ret;
> > + if (current_array_size < nr_fds + 1) {
> > + struct pollfd *new_fds = NULL;
> > + unsigned long newsize;
> > +
> > + /* Round up to 2^8 boundary, in practice this just
> > + * make newsize larger than current_array_size.
> > + */
> > + newsize = ROUNDUP(nr_fds + 1, 8);
> >
> > + new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> > + if (!new_fds)
> > + goto fail;
> > + fds = new_fds;
> >
> > -static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
> > - struct timeval **ptimeout)
> > + memset(&fds[0] + current_array_size, 0,
> > + sizeof(struct pollfd ) * (newsize-current_array_size));
> > + current_array_size = newsize;
> > + }
> > +
> > + fds[nr_fds].fd = fd;
> > + fds[nr_fds].events = events;
> > + ret = &fds[nr_fds];
> > + nr_fds++;
> > +
> > + return ret;
> > +fail:
> > + syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> > + return NULL;
> > +}
> > +
> > +static void initialize_fds(int sock, struct pollfd **p_sock_pollfd,
> > + int ro_sock, struct pollfd **p_ro_sock_pollfd,
> > + int *ptimeout)
> > {
> > - static struct timeval zero_timeout = { 0 };
> > struct connection *conn;
> > - int max = -1;
> >
> > - *ptimeout = NULL;
> > + memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> > + nr_fds = 0;
> >
> > - FD_ZERO(inset);
> > - FD_ZERO(outset);
> > + *ptimeout = -1;
> >
> > if (sock != -1)
> > - set_fd(sock, inset, &max);
> > + *p_sock_pollfd = set_fd(sock, POLLIN|POLLPRI);
> > if (ro_sock != -1)
> > - set_fd(ro_sock, inset, &max);
> > + *p_ro_sock_pollfd = set_fd(ro_sock, POLLIN|POLLPRI);
> > if (reopen_log_pipe[0] != -1)
> > - set_fd(reopen_log_pipe[0], inset, &max);
> > + reopen_log_pipe0_pollfd =
> > + set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
> >
> > if (xce_handle != NULL)
> > - set_fd(xc_evtchn_fd(xce_handle), inset, &max);
> > + xce_pollfd = set_fd(xc_evtchn_fd(xce_handle), POLLIN|POLLPRI);
> >
> > list_for_each_entry(conn, &connections, list) {
> > if (conn->domain) {
> > if (domain_can_read(conn) ||
> > (domain_can_write(conn) &&
> > !list_empty(&conn->out_list)))
> > - *ptimeout = &zero_timeout;
> > + *ptimeout = 0;
> > } else {
> > - set_fd(conn->fd, inset, &max);
> > + short events = POLLIN|POLLPRI;
> > if (!list_empty(&conn->out_list))
> > - FD_SET(conn->fd, outset);
> > + events |= POLLOUT;
> > + conn->pollfd = set_fd(conn->fd, events);
> > }
> > }
> > -
> > - return max;
> > }
> >
> > /* Is child a subnode of parent, or equal? */
> > @@ -1770,14 +1796,13 @@ int priv_domid = 0;
> >
> > int main(int argc, char *argv[])
> > {
> > - int opt, *sock, *ro_sock, max;
> > - fd_set inset, outset;
> > + int opt, *sock, *ro_sock;
> > + struct pollfd *sock_pollfd = NULL, *ro_sock_pollfd = NULL;
> > bool dofork = true;
> > bool outputpid = false;
> > bool no_domain_init = false;
> > const char *pidfile = NULL;
> > - int evtchn_fd = -1;
> > - struct timeval *timeout;
> > + int timeout;
> >
> > while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
> > NULL)) != -1) {
> > @@ -1880,11 +1905,9 @@ int main(int argc, char *argv[])
> >
> > signal(SIGHUP, trigger_reopen_log);
> >
> > - if (xce_handle != NULL)
> > - evtchn_fd = xc_evtchn_fd(xce_handle);
> > -
> > /* Get ready to listen to the tools. */
> > - max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
> > + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
> > + &timeout);
> >
> > /* Tell the kernel we're up and running. */
> > xenbus_notify_running();
> > @@ -1893,27 +1916,55 @@ int main(int argc, char *argv[])
> > for (;;) {
> > struct connection *conn, *next;
> >
> > - if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
> > + if (poll(fds, nr_fds, timeout) < 0) {
> > if (errno == EINTR)
> > continue;
> > - barf_perror("Select failed");
> > + barf_perror("Poll failed");
> > }
> >
> > - if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
> > - char c;
> > - if (read(reopen_log_pipe[0], &c, 1) != 1)
> > - barf_perror("read failed");
> > - reopen_log();
> > + if (reopen_log_pipe0_pollfd) {
> > + if (reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> > + close(reopen_log_pipe[0]);
> > + close(reopen_log_pipe[1]);
> > + init_pipe(reopen_log_pipe);
> > + } else if (reopen_log_pipe0_pollfd->revents & POLLIN) {
> > + char c;
> > + if (read(reopen_log_pipe[0], &c, 1) != 1)
> > + barf_perror("read failed");
> > + reopen_log();
> > + }
> > + reopen_log_pipe0_pollfd = NULL;
> > }
> >
> > - if (*sock != -1 && FD_ISSET(*sock, &inset))
> > - accept_connection(*sock, true);
> > + if (sock_pollfd) {
> > + if (sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> > + barf_perror("sock poll failed");
> > + break;
> > + } else if (sock_pollfd->revents & POLLIN) {
> > + accept_connection(*sock, true);
> > + sock_pollfd = NULL;
> > + }
> > + }
> >
> > - if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset))
> > - accept_connection(*ro_sock, false);
> > + if (ro_sock_pollfd) {
> > + if (ro_sock_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> > + barf_perror("ro sock poll failed");
> > + break;
> > + } else if (ro_sock_pollfd->revents & POLLIN) {
> > + accept_connection(*ro_sock, false);
> > + ro_sock_pollfd = NULL;
> > + }
> > + }
> >
> > - if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
> > - handle_event();
> > + if (xce_pollfd) {
> > + if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> > + barf_perror("xce_handle poll failed");
> > + break;
> > + } else if (xce_pollfd->revents & POLLIN) {
> > + handle_event();
> > + xce_pollfd = NULL;
> > + }
> > + }
> >
> > next = list_entry(connections.next, typeof(*conn), list);
> > if (&next->list != &connections)
> > @@ -1939,21 +1990,27 @@ int main(int argc, char *argv[])
> > if (talloc_free(conn) == 0)
> > continue;
> > } else {
> > - if (FD_ISSET(conn->fd, &inset))
> > + if (conn->pollfd &&
> > + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > + (conn->pollfd->revents & POLLIN))
> > handle_input(conn);
> > if (talloc_free(conn) == 0)
> > continue;
> >
> > talloc_increase_ref_count(conn);
> > - if (FD_ISSET(conn->fd, &outset))
> > + if (conn->pollfd &&
> > + !(conn->pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > + (conn->pollfd->revents & POLLOUT))
> > handle_output(conn);
> > if (talloc_free(conn) == 0)
> > continue;
> > +
> > + conn->pollfd = NULL;
> > }
> > }
> >
> > - max = initialize_set(&inset, &outset, *sock, *ro_sock,
> > - &timeout);
> > + initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
> > + &timeout);
> > }
> > }
> >
> > diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
> > index 492ca0d..f330a87 100644
> > --- a/tools/xenstore/xenstored_core.h
> > +++ b/tools/xenstore/xenstored_core.h
> > @@ -60,6 +60,8 @@ struct connection
> >
> > /* The file descriptor we came in on. */
> > int fd;
> > + /* The pollfd corresponding to fd. */
> > + struct pollfd *pollfd;
> >
> > /* Who am I? 0 for socket connections. */
> > unsigned int id;
> >
> >
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-01-22 14:50 ` Wei Liu
2013-01-22 14:57 ` Ian Campbell
@ 2013-02-05 10:58 ` Ian Campbell
2013-02-05 13:31 ` Wei Liu
1 sibling, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-02-05 10:58 UTC (permalink / raw)
To: Wei Liu; +Cc: xen-devel@lists.xen.org
On Tue, 2013-01-22 at 14:50 +0000, Wei Liu wrote:
> > > > > + !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > > >
> > > > This represents an error case I think? Is there anything we can do about
> > > > it? If this sort of error occurs and we do nothing will we end up just
> > > > spinning because every subsequent poll will just return straight away?
> > > >
> > > >
> > >
> > > Yes, this represents an error. This indicates the pipe used to trigger
> > > reopen_log is broken. What's the motivation of reopening log file?
> >
> > Rotation I should imagine.
> >
> > > I
> > > have no idea about the use case. But simply ignoring this error instead
> > > of crashing the process is better choice IMHO.
> >
> > Agreed, but my concern was that the daemon would spin consuming 100%
> > CPU, which is nearly as bad as crashing.
> >
> > > The whole pollfd set is reinitialized for every loop. So it is also
> > > possible that for the next loop it will success even previous poll
> > > fails?
> >
> > I suppose it depends on the reason it failed. I would expect most of
> > them would be pretty final rather than temporary but I haven't looked at
> > the pipe docs.
> >
>
> We can reinitialize the pipe if error occurs, because this pipe is only
> used within xenstored itself to notify log reload.
But if you get POLLPRI or POLLOUT you don't do this with the code as it
currently stands (from the v2 patch):
+ if (reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ close(reopen_log_pipe[0]);
+ close(reopen_log_pipe[1]);
+ init_pipe(reopen_log_pipe);
+ } else if (reopen_log_pipe0_pollfd->revents & POLLIN) {
+ char c;
+ if (read(reopen_log_pipe[0], &c, 1) != 1)
+ barf_perror("read failed");
+ reopen_log();
+ }
So on POLLRDHUP or POLLERR etc you reopen the log, on POLLIN you act as
expected but for POLLPRI and POLLOUT you don't do anything.
If these two events should never occur then you should include the in
the error handling part of the above. If they can occur then you need to
do something to handle them, otherwise they will just keep reoccuring on
every iteration.
The same goes for the other instances of this pattern.
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-02-05 10:58 ` [PATCH] " Ian Campbell
@ 2013-02-05 13:31 ` Wei Liu
2013-02-15 14:52 ` Ian Campbell
0 siblings, 1 reply; 13+ messages in thread
From: Wei Liu @ 2013-02-05 13:31 UTC (permalink / raw)
To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org
On Tue, 2013-02-05 at 10:58 +0000, Ian Campbell wrote:
> On Tue, 2013-01-22 at 14:50 +0000, Wei Liu wrote:
> > > > > > + !(reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) &&
> > > > >
> > > > > This represents an error case I think? Is there anything we can do about
> > > > > it? If this sort of error occurs and we do nothing will we end up just
> > > > > spinning because every subsequent poll will just return straight away?
> > > > >
> > > > >
> > > >
> > > > Yes, this represents an error. This indicates the pipe used to trigger
> > > > reopen_log is broken. What's the motivation of reopening log file?
> > >
> > > Rotation I should imagine.
> > >
> > > > I
> > > > have no idea about the use case. But simply ignoring this error instead
> > > > of crashing the process is better choice IMHO.
> > >
> > > Agreed, but my concern was that the daemon would spin consuming 100%
> > > CPU, which is nearly as bad as crashing.
> > >
> > > > The whole pollfd set is reinitialized for every loop. So it is also
> > > > possible that for the next loop it will success even previous poll
> > > > fails?
> > >
> > > I suppose it depends on the reason it failed. I would expect most of
> > > them would be pretty final rather than temporary but I haven't looked at
> > > the pipe docs.
> > >
> >
> > We can reinitialize the pipe if error occurs, because this pipe is only
> > used within xenstored itself to notify log reload.
>
> But if you get POLLPRI or POLLOUT you don't do this with the code as it
> currently stands (from the v2 patch):
> + if (reopen_log_pipe0_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + close(reopen_log_pipe[0]);
> + close(reopen_log_pipe[1]);
> + init_pipe(reopen_log_pipe);
> + } else if (reopen_log_pipe0_pollfd->revents & POLLIN) {
> + char c;
> + if (read(reopen_log_pipe[0], &c, 1) != 1)
> + barf_perror("read failed");
> + reopen_log();
> + }
>
> So on POLLRDHUP or POLLERR etc you reopen the log, on POLLIN you act as
> expected but for POLLPRI and POLLOUT you don't do anything.
>
> If these two events should never occur then you should include the in
> the error handling part of the above. If they can occur then you need to
> do something to handle them, otherwise they will just keep reoccuring on
> every iteration.
>
> The same goes for the other instances of this pattern.
>
> Ian.
>
I think POLLPRI and POLLOUT should not occur in most cases, here is a
updated version of the patch. Tested on my testbox and it works.
-----8<----
>From 22a7b50b8b0be62aeae7c86242a27dac1ffdb929 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Mon, 21 Jan 2013 19:07:37 +0000
Subject: [PATCH] Switch to poll() in cxenstored's IO loop
Poll() can support more file descriptors than select(). We've done this for
xenconsoled, now do this for cxenstored as well.
The code is taken from xenconsoled and modified to adapt to cxenstored.
Updated:
* reopen pipe if polling on reopen_log_pipe fails.
* exit if polling on some critical fds fails.
* clean up POLL*.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
tools/xenstore/xenstored_core.c | 171 ++++++++++++++++++++++++++-------------
tools/xenstore/xenstored_core.h | 2 +
2 files changed, 116 insertions(+), 57 deletions(-)
diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c
index bd44645..839e51d 100644
--- a/tools/xenstore/xenstored_core.c
+++ b/tools/xenstore/xenstored_core.c
@@ -19,7 +19,7 @@
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/select.h>
+#include <poll.h>
#ifndef NO_SOCKETS
#include <sys/socket.h>
#include <sys/un.h>
@@ -55,6 +55,12 @@
#include "hashtable.h"
extern xc_evtchn *xce_handle; /* in xenstored_domain.c */
+static struct pollfd *xce_pollfd;
+static struct pollfd *fds;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+#define ROUNDUP(_x, _w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
static bool verbose = false;
LIST_HEAD(connections);
@@ -62,6 +68,7 @@ static int tracefd = -1;
static bool recovery = true;
static bool remove_local = true;
static int reopen_log_pipe[2];
+static struct pollfd *reopen_log_pipe0_pollfd;
static char *tracefile = NULL;
static TDB_CONTEXT *tdb_ctx = NULL;
@@ -199,7 +206,7 @@ void trace_destroy(const void *data, const char *type)
/**
* Signal handler for SIGHUP, which requests that the trace log is reopened
* (in the main loop). A single byte is written to reopen_log_pipe, to awaken
- * the select() in the main loop.
+ * the poll() in the main loop.
*/
static void trigger_reopen_log(int signal __attribute__((unused)))
{
@@ -279,15 +286,12 @@ static int destroy_conn(void *_conn)
/* Flush outgoing if possible, but don't block. */
if (!conn->domain) {
- fd_set set;
- struct timeval none;
-
- FD_ZERO(&set);
- FD_SET(conn->fd, &set);
- none.tv_sec = none.tv_usec = 0;
+ struct pollfd pfd;
+ pfd.fd = conn->fd;
+ pfd.events = POLLOUT;
while (!list_empty(&conn->out_list)
- && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
+ && poll(&pfd, 1, 0) == 1)
if (!write_messages(conn))
break;
close(conn->fd);
@@ -300,52 +304,74 @@ static int destroy_conn(void *_conn)
}
-static void set_fd(int fd, fd_set *set, int *max)
+static struct pollfd *set_fd(int fd, short events)
{
- if (fd < 0)
- return;
- FD_SET(fd, set);
- if (fd > *max)
- *max = fd;
-}
+ struct pollfd *ret;
+ if (current_array_size < nr_fds + 1) {
+ struct pollfd *new_fds = NULL;
+ unsigned long newsize;
+
+ /* Round up to 2^8 boundary, in practice this just
+ * make newsize larger than current_array_size.
+ */
+ newsize = ROUNDUP(nr_fds + 1, 8);
+ new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
+ if (!new_fds)
+ goto fail;
+ fds = new_fds;
-static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
- struct timeval **ptimeout)
+ memset(&fds[0] + current_array_size, 0,
+ sizeof(struct pollfd ) * (newsize-current_array_size));
+ current_array_size = newsize;
+ }
+
+ fds[nr_fds].fd = fd;
+ fds[nr_fds].events = events;
+ ret = &fds[nr_fds];
+ nr_fds++;
+
+ return ret;
+fail:
+ syslog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+ return NULL;
+}
+
+static void initialize_fds(int sock, struct pollfd **p_sock_pollfd,
+ int ro_sock, struct pollfd **p_ro_sock_pollfd,
+ int *ptimeout)
{
- static struct timeval zero_timeout = { 0 };
struct connection *conn;
- int max = -1;
- *ptimeout = NULL;
+ memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+ nr_fds = 0;
- FD_ZERO(inset);
- FD_ZERO(outset);
+ *ptimeout = -1;
if (sock != -1)
- set_fd(sock, inset, &max);
+ *p_sock_pollfd = set_fd(sock, POLLIN|POLLPRI);
if (ro_sock != -1)
- set_fd(ro_sock, inset, &max);
+ *p_ro_sock_pollfd = set_fd(ro_sock, POLLIN|POLLPRI);
if (reopen_log_pipe[0] != -1)
- set_fd(reopen_log_pipe[0], inset, &max);
+ reopen_log_pipe0_pollfd =
+ set_fd(reopen_log_pipe[0], POLLIN|POLLPRI);
if (xce_handle != NULL)
- set_fd(xc_evtchn_fd(xce_handle), inset, &max);
+ xce_pollfd = set_fd(xc_evtchn_fd(xce_handle), POLLIN|POLLPRI);
list_for_each_entry(conn, &connections, list) {
if (conn->domain) {
if (domain_can_read(conn) ||
(domain_can_write(conn) &&
!list_empty(&conn->out_list)))
- *ptimeout = &zero_timeout;
+ *ptimeout = 0;
} else {
- set_fd(conn->fd, inset, &max);
+ short events = POLLIN|POLLPRI;
if (!list_empty(&conn->out_list))
- FD_SET(conn->fd, outset);
+ events |= POLLOUT;
+ conn->pollfd = set_fd(conn->fd, events);
}
}
-
- return max;
}
/* Is child a subnode of parent, or equal? */
@@ -1770,14 +1796,13 @@ int priv_domid = 0;
int main(int argc, char *argv[])
{
- int opt, *sock, *ro_sock, max;
- fd_set inset, outset;
+ int opt, *sock, *ro_sock;
+ struct pollfd *sock_pollfd = NULL, *ro_sock_pollfd = NULL;
bool dofork = true;
bool outputpid = false;
bool no_domain_init = false;
const char *pidfile = NULL;
- int evtchn_fd = -1;
- struct timeval *timeout;
+ int timeout;
while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
NULL)) != -1) {
@@ -1880,11 +1905,9 @@ int main(int argc, char *argv[])
signal(SIGHUP, trigger_reopen_log);
- if (xce_handle != NULL)
- evtchn_fd = xc_evtchn_fd(xce_handle);
-
/* Get ready to listen to the tools. */
- max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
+ initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
+ &timeout);
/* Tell the kernel we're up and running. */
xenbus_notify_running();
@@ -1893,27 +1916,55 @@ int main(int argc, char *argv[])
for (;;) {
struct connection *conn, *next;
- if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
+ if (poll(fds, nr_fds, timeout) < 0) {
if (errno == EINTR)
continue;
- barf_perror("Select failed");
+ barf_perror("Poll failed");
}
- if (reopen_log_pipe[0] != -1 && FD_ISSET(reopen_log_pipe[0], &inset)) {
- char c;
- if (read(reopen_log_pipe[0], &c, 1) != 1)
- barf_perror("read failed");
- reopen_log();
+ if (reopen_log_pipe0_pollfd) {
+ if (reopen_log_pipe0_pollfd->revents & ~POLLIN) {
+ close(reopen_log_pipe[0]);
+ close(reopen_log_pipe[1]);
+ init_pipe(reopen_log_pipe);
+ } else if (reopen_log_pipe0_pollfd->revents & POLLIN) {
+ char c;
+ if (read(reopen_log_pipe[0], &c, 1) != 1)
+ barf_perror("read failed");
+ reopen_log();
+ }
+ reopen_log_pipe0_pollfd = NULL;
}
- if (*sock != -1 && FD_ISSET(*sock, &inset))
- accept_connection(*sock, true);
+ if (sock_pollfd) {
+ if (sock_pollfd->revents & ~POLLIN) {
+ barf_perror("sock poll failed");
+ break;
+ } else if (sock_pollfd->revents & POLLIN) {
+ accept_connection(*sock, true);
+ sock_pollfd = NULL;
+ }
+ }
- if (*ro_sock != -1 && FD_ISSET(*ro_sock, &inset))
- accept_connection(*ro_sock, false);
+ if (ro_sock_pollfd) {
+ if (ro_sock_pollfd->revents & ~POLLIN) {
+ barf_perror("ro sock poll failed");
+ break;
+ } else if (ro_sock_pollfd->revents & POLLIN) {
+ accept_connection(*ro_sock, false);
+ ro_sock_pollfd = NULL;
+ }
+ }
- if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
- handle_event();
+ if (xce_pollfd) {
+ if (xce_pollfd->revents & ~POLLIN) {
+ barf_perror("xce_handle poll failed");
+ break;
+ } else if (xce_pollfd->revents & POLLIN) {
+ handle_event();
+ xce_pollfd = NULL;
+ }
+ }
next = list_entry(connections.next, typeof(*conn), list);
if (&next->list != &connections)
@@ -1939,21 +1990,27 @@ int main(int argc, char *argv[])
if (talloc_free(conn) == 0)
continue;
} else {
- if (FD_ISSET(conn->fd, &inset))
+ if (conn->pollfd &&
+ !(conn->pollfd->revents & ~(POLLIN|POLLOUT)) &&
+ (conn->pollfd->revents & POLLIN))
handle_input(conn);
if (talloc_free(conn) == 0)
continue;
talloc_increase_ref_count(conn);
- if (FD_ISSET(conn->fd, &outset))
+ if (conn->pollfd &&
+ !(conn->pollfd->revents & ~(POLLIN|POLLOUT)) &&
+ (conn->pollfd->revents & POLLOUT))
handle_output(conn);
if (talloc_free(conn) == 0)
continue;
+
+ conn->pollfd = NULL;
}
}
- max = initialize_set(&inset, &outset, *sock, *ro_sock,
- &timeout);
+ initialize_fds(*sock, &sock_pollfd, *ro_sock, &ro_sock_pollfd,
+ &timeout);
}
}
diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h
index 492ca0d..f330a87 100644
--- a/tools/xenstore/xenstored_core.h
+++ b/tools/xenstore/xenstored_core.h
@@ -60,6 +60,8 @@ struct connection
/* The file descriptor we came in on. */
int fd;
+ /* The pollfd corresponding to fd. */
+ struct pollfd *pollfd;
/* Who am I? 0 for socket connections. */
unsigned int id;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-02-05 13:31 ` Wei Liu
@ 2013-02-15 14:52 ` Ian Campbell
2013-02-17 3:53 ` Wei Liu
0 siblings, 1 reply; 13+ messages in thread
From: Ian Campbell @ 2013-02-15 14:52 UTC (permalink / raw)
To: Wei Liu; +Cc: xen-devel@lists.xen.org
On Tue, 2013-02-05 at 13:31 +0000, Wei Liu wrote:
> -----8<----
> From 22a7b50b8b0be62aeae7c86242a27dac1ffdb929 Mon Sep 17 00:00:00 2001
> From: Wei Liu <wei.liu2@citrix.com>
> Date: Mon, 21 Jan 2013 19:07:37 +0000
> Subject: [PATCH] Switch to poll() in cxenstored's IO loop
>
> Poll() can support more file descriptors than select(). We've done this for
> xenconsoled, now do this for cxenstored as well.
>
> The code is taken from xenconsoled and modified to adapt to cxenstored.
>
> Updated:
> * reopen pipe if polling on reopen_log_pipe fails.
> * exit if polling on some critical fds fails.
> * clean up POLL*.
>
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Does this build for you? It fails to build for me in a fairly obvious
way with:
xenstored_core.c:22:18: error: poll.h: No such file or directory
followed by the inevitable fallout.
Oh, this is the stubdomain build! I suppose mini-os lacks poll support.
Faced with either making xenstored able to use poll or select as a
compile time option and implementing poll for mini-os I'd probably go
for the latter, presuming that most of the infrastructure is present
already...
Ian.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Switch to poll() in cxenstored's IO loop
2013-02-15 14:52 ` Ian Campbell
@ 2013-02-17 3:53 ` Wei Liu
0 siblings, 0 replies; 13+ messages in thread
From: Wei Liu @ 2013-02-17 3:53 UTC (permalink / raw)
To: Ian Campbell; +Cc: wei.liu2, xen-devel@lists.xen.org
On Fri, 2013-02-15 at 14:52 +0000, Ian Campbell wrote:
> On Tue, 2013-02-05 at 13:31 +0000, Wei Liu wrote:
>
> > -----8<----
> > From 22a7b50b8b0be62aeae7c86242a27dac1ffdb929 Mon Sep 17 00:00:00 2001
> > From: Wei Liu <wei.liu2@citrix.com>
> > Date: Mon, 21 Jan 2013 19:07:37 +0000
> > Subject: [PATCH] Switch to poll() in cxenstored's IO loop
> >
> > Poll() can support more file descriptors than select(). We've done this for
> > xenconsoled, now do this for cxenstored as well.
> >
> > The code is taken from xenconsoled and modified to adapt to cxenstored.
> >
> > Updated:
> > * reopen pipe if polling on reopen_log_pipe fails.
> > * exit if polling on some critical fds fails.
> > * clean up POLL*.
> >
> > Signed-off-by: Wei Liu <wei.liu2@citrix.com>
>
> Does this build for you? It fails to build for me in a fairly obvious
> way with:
> xenstored_core.c:22:18: error: poll.h: No such file or directory
> followed by the inevitable fallout.
>
> Oh, this is the stubdomain build! I suppose mini-os lacks poll support.
>
> Faced with either making xenstored able to use poll or select as a
> compile time option and implementing poll for mini-os I'd probably go
> for the latter, presuming that most of the infrastructure is present
> already...
>
Right. The header file is easy. Stefano introduced that years ago back
in 2008, but the implementation is missing.
Looking deeper in the code, poll is also missing in LWIP, but I think I
can skip that at the moment, or use lwip_select in Mini-OS's poll as
well...
Wei.
> Ian.
>
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2013-02-17 3:53 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-21 19:23 [PATCH] Switch to poll() in cxenstored's IO loop Wei Liu
2013-01-22 9:47 ` Ian Campbell
2013-01-22 14:13 ` Wei Liu
2013-01-22 14:25 ` Ian Campbell
2013-01-22 14:50 ` Wei Liu
2013-01-22 14:57 ` Ian Campbell
2013-01-22 15:18 ` [PATCH V2] " Wei Liu
2013-01-29 17:40 ` Wei Liu
2013-01-30 9:02 ` Ian Campbell
2013-02-05 10:58 ` [PATCH] " Ian Campbell
2013-02-05 13:31 ` Wei Liu
2013-02-15 14:52 ` Ian Campbell
2013-02-17 3:53 ` Wei Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).