* [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
@ 2012-09-18 20:37 Anthony Liguori
2012-09-19 7:26 ` Paolo Bonzini
2012-09-19 14:20 ` Peter Portante
0 siblings, 2 replies; 14+ messages in thread
From: Anthony Liguori @ 2012-09-18 20:37 UTC (permalink / raw)
To: qemu-devel; +Cc: Paolo Bonzini, Anthony Liguori, Jan Kiszka
timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that
allows for a high resolution event timer signaled through a file descriptor.
This patch adds an alarm timer implementation using timerfd.
timerfd has a couple advantages over dynticks. Namely, it's fd based instead
of signal based which reduces the likelihood of receiving EINTR failure codes.
Because we can't process timers in a signal handler, we write to a pipe() and
read that in the main loop. timerfd allows us to avoid the extra write to a
pipe.
Finally, since timerfd is just a file descriptor, it could conceivably used to
implement per-thread alarm timers. It also would integrate very easily in a
glib main loop.
Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
right now otherwise the refactoring would be trivial. I'll leave that for
another day.
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
Please note, this is lightly tested. Since this is such a fundamental change,
I'd like to do some performance analysis before committing but wanted to share
early.
---
configure | 14 +++++++++
qemu-timer.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+), 0 deletions(-)
diff --git a/configure b/configure
index 8564142..8a3135b 100755
--- a/configure
+++ b/configure
@@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then
sync_file_range=yes
fi
+# check for timerfd
+timerfd="no"
+cat > $TMPC << EOF
+#include <sys/timerfd.h>
+int main(void) { return timerfd_create(CLOCK_REALTIME, 0); }
+EOF
+
+if compile_prog "" "" ; then
+ timerfd=yes
+fi
+
# check for linux/fiemap.h and FS_IOC_FIEMAP
fiemap=no
cat > $TMPC << EOF
@@ -3467,6 +3478,9 @@ fi
if test "$signalfd" = "yes" ; then
echo "CONFIG_SIGNALFD=y" >> $config_host_mak
fi
+if test "$timerfd" = "yes" ; then
+ echo "CONFIG_TIMERFD=y" >> $config_host_mak
+fi
if test "$tcg_interpreter" = "yes" ; then
echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
fi
diff --git a/qemu-timer.c b/qemu-timer.c
index c7a1551..6f7fa35 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -35,6 +35,10 @@
#include <mmsystem.h>
#endif
+#ifdef CONFIG_TIMERFD
+#include <sys/timerfd.h>
+#endif
+
/***********************************************************/
/* timers */
@@ -66,6 +70,9 @@ struct qemu_alarm_timer {
int (*start)(struct qemu_alarm_timer *t);
void (*stop)(struct qemu_alarm_timer *t);
void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
+#ifdef CONFIG_TIMERFD
+ int timerfd;
+#endif
#if defined(__linux__)
timer_t timer;
int fd;
@@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t)
/* TODO: MIN_TIMER_REARM_NS should be optimized */
#define MIN_TIMER_REARM_NS 250000
+#ifdef CONFIG_TIMERFD
+static int timerfd_start_timer(struct qemu_alarm_timer *t);
+static void timerfd_stop_timer(struct qemu_alarm_timer *t);
+static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
+#endif
+
#ifdef _WIN32
static int mm_start_timer(struct qemu_alarm_timer *t);
@@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta);
#endif /* _WIN32 */
static struct qemu_alarm_timer alarm_timers[] = {
+#ifdef CONFIG_TIMERFD
+ {"timerfd", timerfd_start_timer,
+ timerfd_stop_timer, timerfd_rearm_timer},
+#endif
#ifndef _WIN32
#ifdef __linux__
{"dynticks", dynticks_start_timer,
@@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum)
#include "compatfd.h"
+static void timerfd_event(void *opaque)
+{
+ struct qemu_alarm_timer *t = opaque;
+ ssize_t len;
+ uint64_t count;
+
+ len = read(t->timerfd, &count, sizeof(count));
+ g_assert(len == sizeof(count));
+
+ t->expired = true;
+ t->pending = true;
+
+ /* We already process pending timers at the end of the main loop whereas
+ * this function is called during I/O processing. That means we know that
+ * pending timers will be checked before select()'ing again which means we
+ * don't need to explicitly call qemu_notify_event()
+ */
+}
+
+static int timerfd_start_timer(struct qemu_alarm_timer *t)
+{
+ t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC);
+ if (t->timerfd == -1) {
+ return -errno;
+ }
+
+ qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t);
+
+ return 0;
+}
+
+static void timerfd_stop_timer(struct qemu_alarm_timer *t)
+{
+ qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL);
+ close(t->timerfd);
+ t->timerfd = -1;
+}
+
+static void timerfd_rearm_timer(struct qemu_alarm_timer *t,
+ int64_t nearest_delta_ns)
+{
+ int ret;
+ struct itimerspec timeout;
+ int64_t current_ns;
+
+ if (nearest_delta_ns < MIN_TIMER_REARM_NS) {
+ nearest_delta_ns = MIN_TIMER_REARM_NS;
+ }
+
+ /* check whether a timer is already running */
+ ret = timerfd_gettime(t->timerfd, &timeout);
+ g_assert(ret == 0);
+
+ current_ns = timeout.it_value.tv_sec * 1000000000LL;
+ current_ns += timeout.it_value.tv_nsec;
+
+ if (current_ns && current_ns <= nearest_delta_ns) {
+ return;
+ }
+
+ timeout.it_interval.tv_sec = 0;
+ timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
+ timeout.it_value.tv_sec = nearest_delta_ns / 1000000000;
+ timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000;
+
+ ret = timerfd_settime(t->timerfd, 0, &timeout, NULL);
+ g_assert(ret == 0);
+}
+
static int dynticks_start_timer(struct qemu_alarm_timer *t)
{
struct sigevent ev;
--
1.7.5.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-18 20:37 [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd Anthony Liguori
@ 2012-09-19 7:26 ` Paolo Bonzini
2012-09-19 7:44 ` Jan Kiszka
2012-09-19 16:04 ` Stefan Weil
2012-09-19 14:20 ` Peter Portante
1 sibling, 2 replies; 14+ messages in thread
From: Paolo Bonzini @ 2012-09-19 7:26 UTC (permalink / raw)
To: Anthony Liguori; +Cc: Jan Kiszka, qemu-devel, Peter Portante, Stefan Weil
Il 18/09/2012 22:37, Anthony Liguori ha scritto:
> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
> right now otherwise the refactoring would be trivial. I'll leave that for
> another day.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Jan Kiszka <jan.kiszka@siemens.com>
> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
> ---
> Please note, this is lightly tested. Since this is such a fundamental change,
> I'd like to do some performance analysis before committing but wanted to share
> early.
Looks good. I think Peter Portante tested something similar, and found no big
difference between the two. But it's a good thing and, in my opinion, for
non-timerfd OSes we should simply adjust the select() timeout and not bother
with signals.
I'm not sure if the same can be done for Windows, but I think it's possible as long
as you keep the timeBeginPeriod/timeEndPeriod calls. As a start, Stefan, can you
check if the win32 timer works for you with the calls added? Like this:
diff --git a/qemu-timer.c b/qemu-timer.c
index c7a1551..721c769 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
HANDLE hTimer;
BOOLEAN success;
+ timeGetDevCaps(&mm_tc, sizeof(mm_tc));
+
+ timeBeginPeriod(mm_tc.wPeriodMin);
+
/* If you call ChangeTimerQueueTimer on a one-shot timer (its period
is zero) that has already expired, the timer is not updated. Since
creating a new timer is relatively expensive, set a bogus one-hour
@@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
if (!success) {
fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
GetLastError());
+ timeEndPeriod(mm_tc.wPeriodMin);
return -1;
}
@@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t)
if (hTimer) {
DeleteTimerQueueTimer(NULL, hTimer, NULL);
}
+ timeEndPeriod(mm_tc.wPeriodMin);
}
static void win32_rearm_timer(struct qemu_alarm_timer *t,
Paolo
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:26 ` Paolo Bonzini
@ 2012-09-19 7:44 ` Jan Kiszka
2012-09-19 8:39 ` Paolo Bonzini
` (4 more replies)
2012-09-19 16:04 ` Stefan Weil
1 sibling, 5 replies; 14+ messages in thread
From: Jan Kiszka @ 2012-09-19 7:44 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Stefan Weil, Anthony Liguori, qemu-devel@nongnu.org,
Peter Portante
On 2012-09-19 09:26, Paolo Bonzini wrote:
> Il 18/09/2012 22:37, Anthony Liguori ha scritto:
>> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
>> right now otherwise the refactoring would be trivial. I'll leave that for
>> another day.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Jan Kiszka <jan.kiszka@siemens.com>
>> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
>> ---
>> Please note, this is lightly tested. Since this is such a fundamental change,
>> I'd like to do some performance analysis before committing but wanted to share
>> early.
>
> Looks good. I think Peter Portante tested something similar, and found no big
> difference between the two. But it's a good thing and, in my opinion, for
> non-timerfd OSes we should simply adjust the select() timeout and not bother
> with signals.
What would be the advantage of timerfd over select? On Linux, both use
hrtimers (and low slack for RT processes). I'm starting to like the
select/WaitForMultipleObjects pattern as it would allow to consolidate
over basically two versions of timers and simplify the code.
Jan
>
> I'm not sure if the same can be done for Windows, but I think it's possible as long
> as you keep the timeBeginPeriod/timeEndPeriod calls. As a start, Stefan, can you
> check if the win32 timer works for you with the calls added? Like this:
>
> diff --git a/qemu-timer.c b/qemu-timer.c
> index c7a1551..721c769 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
> HANDLE hTimer;
> BOOLEAN success;
>
> + timeGetDevCaps(&mm_tc, sizeof(mm_tc));
> +
> + timeBeginPeriod(mm_tc.wPeriodMin);
> +
> /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
> is zero) that has already expired, the timer is not updated. Since
> creating a new timer is relatively expensive, set a bogus one-hour
> @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
> if (!success) {
> fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
> GetLastError());
> + timeEndPeriod(mm_tc.wPeriodMin);
> return -1;
> }
>
> @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t)
> if (hTimer) {
> DeleteTimerQueueTimer(NULL, hTimer, NULL);
> }
> + timeEndPeriod(mm_tc.wPeriodMin);
> }
>
> static void win32_rearm_timer(struct qemu_alarm_timer *t,
>
> Paolo
>
--
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:44 ` Jan Kiszka
@ 2012-09-19 8:39 ` Paolo Bonzini
2012-09-19 14:15 ` Peter Portante
` (3 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2012-09-19 8:39 UTC (permalink / raw)
To: Jan Kiszka
Cc: Stefan Weil, Anthony Liguori, qemu-devel@nongnu.org,
Peter Portante
Il 19/09/2012 09:44, Jan Kiszka ha scritto:
>> > Looks good. I think Peter Portante tested something similar, and found no big
>> > difference between the two. But it's a good thing and, in my opinion, for
>> > non-timerfd OSes we should simply adjust the select() timeout and not bother
>> > with signals.
> What would be the advantage of timerfd over select? On Linux, both use
> hrtimers (and low slack for RT processes). I'm starting to like the
> select/WaitForMultipleObjects pattern as it would allow to consolidate
> over basically two versions of timers and simplify the code.
Oh, I didn't know this. Even better.
Paolo
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:44 ` Jan Kiszka
2012-09-19 8:39 ` Paolo Bonzini
@ 2012-09-19 14:15 ` Peter Portante
2012-09-19 14:27 ` Jan Kiszka
2012-09-19 16:55 ` Avi Kivity
` (2 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Peter Portante @ 2012-09-19 14:15 UTC (permalink / raw)
To: Jan Kiszka; +Cc: qemu-devel
[-- Attachment #1: Type: text/plain, Size: 3511 bytes --]
On Wed, Sep 19, 2012 at 3:44 AM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> On 2012-09-19 09:26, Paolo Bonzini wrote:
> > Il 18/09/2012 22:37, Anthony Liguori ha scritto:
> >> Unfortunately, there's a lot of Windows code in qemu-timer.c and
> main-loop.c
> >> right now otherwise the refactoring would be trivial. I'll leave that
> for
> >> another day.
> >>
> >> Cc: Paolo Bonzini <pbonzini@redhat.com>
> >> Cc: Jan Kiszka <jan.kiszka@siemens.com>
> >> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
> >> ---
> >> Please note, this is lightly tested. Since this is such a fundamental
> change,
> >> I'd like to do some performance analysis before committing but wanted
> to share
> >> early.
> >
> > Looks good. I think Peter Portante tested something similar, and found
> no big
> > difference between the two. But it's a good thing and, in my opinion,
> for
> > non-timerfd OSes we should simply adjust the select() timeout and not
> bother
> > with signals.
>
> What would be the advantage of timerfd over select? On Linux, both use
> hrtimers (and low slack for RT processes).
I am not sure the comparison is timerfd v. select, but timerfd v signal
based timer (setitimer). The timerfd path allows you to integrate with
select/poll/epoll loops, where as signal based timers make that more
difficult. One can do the same thing with signalfd, but only for one
signal, where as you can setup multiple timers at the expense of file
descriptors.
Additionally, FWIW, select() has a resolution capped by its use of struct
timeval, which is microseconds, where timerfd_settime allows for nanosecond
resolution.
> I'm starting to like the
> select/WaitForMultipleObjects pattern as it would allow to consolidate
> over basically two versions of timers and simplify the code.
>
With timerfd, signalfd and eventfd, Linux seems to have provided all the
coverage needed to make that happen.
>
> Jan
>
> >
> > I'm not sure if the same can be done for Windows, but I think it's
> possible as long
> > as you keep the timeBeginPeriod/timeEndPeriod calls. As a start,
> Stefan, can you
> > check if the win32 timer works for you with the calls added? Like this:
> >
> > diff --git a/qemu-timer.c b/qemu-timer.c
> > index c7a1551..721c769 100644
> > --- a/qemu-timer.c
> > +++ b/qemu-timer.c
> > @@ -673,6 +673,10 @@ static int win32_start_timer(struct
> qemu_alarm_timer *t)
> > HANDLE hTimer;
> > BOOLEAN success;
> >
> > + timeGetDevCaps(&mm_tc, sizeof(mm_tc));
> > +
> > + timeBeginPeriod(mm_tc.wPeriodMin);
> > +
> > /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
> > is zero) that has already expired, the timer is not updated.
> Since
> > creating a new timer is relatively expensive, set a bogus
> one-hour
> > @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer
> *t)
> > if (!success) {
> > fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
> > GetLastError());
> > + timeEndPeriod(mm_tc.wPeriodMin);
> > return -1;
> > }
> >
> > @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer
> *t)
> > if (hTimer) {
> > DeleteTimerQueueTimer(NULL, hTimer, NULL);
> > }
> > + timeEndPeriod(mm_tc.wPeriodMin);
> > }
> >
> > static void win32_rearm_timer(struct qemu_alarm_timer *t,
> >
> > Paolo
> >
>
> --
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
>
>
[-- Attachment #2: Type: text/html, Size: 4778 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 14:15 ` Peter Portante
@ 2012-09-19 14:27 ` Jan Kiszka
2012-09-20 5:51 ` Peter Portante
0 siblings, 1 reply; 14+ messages in thread
From: Jan Kiszka @ 2012-09-19 14:27 UTC (permalink / raw)
To: Peter Portante; +Cc: qemu-devel@nongnu.org
Please turn of HTML in you mailer. It's very hard to parse your reply.
On 2012-09-19 16:15, Peter Portante wrote:
> On Wed, Sep 19, 2012 at 3:44 AM, Jan Kiszka <jan.kiszka@siemens.com<mailto:jan.kiszka@siemens.com>> wrote:
> On 2012-09-19 09:26, Paolo Bonzini wrote:
>> Il 18/09/2012 22:37, Anthony Liguori ha scritto:
>>> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
>>> right now otherwise the refactoring would be trivial. I'll leave that for
>>> another day.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com<mailto:pbonzini@redhat.com>>
>>> Cc: Jan Kiszka <jan.kiszka@siemens.com<mailto:jan.kiszka@siemens.com>>
>>> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com<mailto:aliguori@us.ibm.com>>
>>> ---
>>> Please note, this is lightly tested. Since this is such a fundamental change,
>>> I'd like to do some performance analysis before committing but wanted to share
>>> early.
>>
>> Looks good. I think Peter Portante tested something similar, and found no big
>> difference between the two. But it's a good thing and, in my opinion, for
>> non-timerfd OSes we should simply adjust the select() timeout and not bother
>> with signals.
>
> What would be the advantage of timerfd over select? On Linux, both use
> hrtimers (and low slack for RT processes).
>
> I am not sure the comparison is timerfd v. select, but timerfd v signal based timer (setitimer). The timerfd path allows you to integrate with select/poll/epoll loops, where as signal based timers make that more difficult. One can do the same thing with signalfd, but only for one signal, where as you can setup multiple timers at the expense of file descriptors.
>
> Additionally, FWIW, select() has a resolution capped by its use of struct timeval, which is microseconds, where timerfd_settime allows for nanosecond resolution.
< 1µs resolution is pointless, even on RT-hardened kernels with fast
hardware underneath and when running natively.
>
> I'm starting to like the
> select/WaitForMultipleObjects pattern as it would allow to consolidate
> over basically two versions of timers and simplify the code.
>
> With timerfd, signalfd and eventfd, Linux seems to have provided all the coverage needed to make that happen.
The advantage is that timers based on select/poll timeouts will allow to
unify a lot of code for _all_ host platforms, i.e. even Windows. We
still need to evaluate the precise impact and look for potentially
missed limitations (aka: someone has to write patches and test them).
But if there are no relevant ones, it should be the better architecture.
That said, a timerfd based solution for Linux may be an intermediate
step of the select-based work takes longer.
Jan
--
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 14:27 ` Jan Kiszka
@ 2012-09-20 5:51 ` Peter Portante
0 siblings, 0 replies; 14+ messages in thread
From: Peter Portante @ 2012-09-20 5:51 UTC (permalink / raw)
To: Jan Kiszka; +Cc: qemu-devel@nongnu.org
On Wed, Sep 19, 2012 at 10:27 AM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> Please turn of HTML in you mailer. It's very hard to parse your reply.
>
> On 2012-09-19 16:15, Peter Portante wrote:
>> On Wed, Sep 19, 2012 at 3:44 AM, Jan Kiszka <jan.kiszka@siemens.com<mailto:jan.kiszka@siemens.com>> wrote:
>> On 2012-09-19 09:26, Paolo Bonzini wrote:
>>> Il 18/09/2012 22:37, Anthony Liguori ha scritto:
>>>> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
>>>> right now otherwise the refactoring would be trivial. I'll leave that for
>>>> another day.
>>>>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com<mailto:pbonzini@redhat.com>>
>>>> Cc: Jan Kiszka <jan.kiszka@siemens.com<mailto:jan.kiszka@siemens.com>>
>>>> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com<mailto:aliguori@us.ibm.com>>
>>>> ---
>>>> Please note, this is lightly tested. Since this is such a fundamental change,
>>>> I'd like to do some performance analysis before committing but wanted to share
>>>> early.
>>>
>>> Looks good. I think Peter Portante tested something similar, and found no big
>>> difference between the two. But it's a good thing and, in my opinion, for
>>> non-timerfd OSes we should simply adjust the select() timeout and not bother
>>> with signals.
>>
>> What would be the advantage of timerfd over select? On Linux, both use
>> hrtimers (and low slack for RT processes).
>>
>> I am not sure the comparison is timerfd v. select, but timerfd v signal based timer (setitimer). The timerfd path allows you to integrate with select/poll/epoll loops, where as signal based timers make that more difficult. One can do the same thing with signalfd, but only for one signal, where as you can setup multiple timers at the expense of file descriptors.
>>
>> Additionally, FWIW, select() has a resolution capped by its use of struct timeval, which is microseconds, where timerfd_settime allows for nanosecond resolution.
>
> < 1us resolution is pointless, even on RT-hardened kernels with fast
> hardware underneath and when running natively.
Perhaps.
Under Linux, we have been unable to demonstrate sub-50us resolution
for timeouts outside of RT scheduling classes. We have been able to
get around 1us resolution using SCHED_RR or SCHED_FIFO.
However, if native clocks return timer values at nanosecond
resolution, why would you not want to maintain those units to avoid
rounding errors or simple logic errors during conversions? Not sure
how pointless all the work has been to create software APIs and
hardware interfaces that use nanosecond resolution.
>>
>> I'm starting to like the
>> select/WaitForMultipleObjects pattern as it would allow to consolidate
>> over basically two versions of timers and simplify the code.
>>
>> With timerfd, signalfd and eventfd, Linux seems to have provided all the coverage needed to make that happen.
>
> The advantage is that timers based on select/poll timeouts will allow to
> unify a lot of code for _all_ host platforms, i.e. even Windows. We
> still need to evaluate the precise impact and look for potentially
> missed limitations (aka: someone has to write patches and test them).
> But if there are no relevant ones, it should be the better architecture.
>
> That said, a timerfd based solution for Linux may be an intermediate
> step of the select-based work takes longer.
>
> Jan
>
> --
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:44 ` Jan Kiszka
2012-09-19 8:39 ` Paolo Bonzini
2012-09-19 14:15 ` Peter Portante
@ 2012-09-19 16:55 ` Avi Kivity
2012-09-19 17:13 ` Alon Ziv
2012-09-20 2:54 ` Anthony Liguori
4 siblings, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2012-09-19 16:55 UTC (permalink / raw)
To: Jan Kiszka
Cc: Paolo Bonzini, Anthony Liguori, qemu-devel@nongnu.org,
Peter Portante, Stefan Weil
On 09/19/2012 10:44 AM, Jan Kiszka wrote:
> On 2012-09-19 09:26, Paolo Bonzini wrote:
>> Il 18/09/2012 22:37, Anthony Liguori ha scritto:
>>> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
>>> right now otherwise the refactoring would be trivial. I'll leave that for
>>> another day.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Jan Kiszka <jan.kiszka@siemens.com>
>>> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
>>> ---
>>> Please note, this is lightly tested. Since this is such a fundamental change,
>>> I'd like to do some performance analysis before committing but wanted to share
>>> early.
>>
>> Looks good. I think Peter Portante tested something similar, and found no big
>> difference between the two. But it's a good thing and, in my opinion, for
>> non-timerfd OSes we should simply adjust the select() timeout and not bother
>> with signals.
>
> What would be the advantage of timerfd over select? On Linux, both use
> hrtimers (and low slack for RT processes). I'm starting to like the
> select/WaitForMultipleObjects pattern as it would allow to consolidate
> over basically two versions of timers and simplify the code.
An advantage is that if you have a lot of fd events but fewer timer
events, then you do not need to rearm the timer needlessly. It just
waits in the background. I doubt whether that advantage amounts to
anything in practice.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:44 ` Jan Kiszka
` (2 preceding siblings ...)
2012-09-19 16:55 ` Avi Kivity
@ 2012-09-19 17:13 ` Alon Ziv
2012-09-20 2:54 ` Anthony Liguori
4 siblings, 0 replies; 14+ messages in thread
From: Alon Ziv @ 2012-09-19 17:13 UTC (permalink / raw)
To: qemu-devel
Jan Kiszka <jan.kiszka <at> siemens.com> writes:
> What would be the advantage of timerfd over select? On Linux, both use
> hrtimers (and low slack for RT processes). I'm starting to like the
> select/WaitForMultipleObjects pattern as it would allow to consolidate
> over basically two versions of timers and simplify the code.
Why not use CreateWaitableTimer on Windows, then, to implement the same pattern?
-a
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:44 ` Jan Kiszka
` (3 preceding siblings ...)
2012-09-19 17:13 ` Alon Ziv
@ 2012-09-20 2:54 ` Anthony Liguori
4 siblings, 0 replies; 14+ messages in thread
From: Anthony Liguori @ 2012-09-20 2:54 UTC (permalink / raw)
To: Jan Kiszka, Paolo Bonzini
Cc: Stefan Weil, qemu-devel@nongnu.org, Peter Portante
Jan Kiszka <jan.kiszka@siemens.com> writes:
> On 2012-09-19 09:26, Paolo Bonzini wrote:
>> Il 18/09/2012 22:37, Anthony Liguori ha scritto:
>>> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
>>> right now otherwise the refactoring would be trivial. I'll leave that for
>>> another day.
>>>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Jan Kiszka <jan.kiszka@siemens.com>
>>> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
>>> ---
>>> Please note, this is lightly tested. Since this is such a fundamental change,
>>> I'd like to do some performance analysis before committing but wanted to share
>>> early.
>>
>> Looks good. I think Peter Portante tested something similar, and found no big
>> difference between the two. But it's a good thing and, in my opinion, for
>> non-timerfd OSes we should simply adjust the select() timeout and not bother
>> with signals.
>
> What would be the advantage of timerfd over select? On Linux, both use
> hrtimers (and low slack for RT processes). I'm starting to like the
> select/WaitForMultipleObjects pattern as it would allow to consolidate
> over basically two versions of timers and simplify the code.
I don't think it's an either or. Ideally we get to a complete glib main
loop and our timer implementation could fall back to use the timeout if
timerfd wasn't available.
Practically speaking, timerfd allows selecting the clock to use which
select() doesn't and it also supports absolute events instead of
relative events. Not sure either matters all that much, but no doubt
it's better than using signals.
Regarding microsecond granularity, note that while select() provides
microsecond granualrity, poll() only provides millisecond granularity.
I think there are a lot of good reasons to evaluate moving to poll()
(glib uses poll by default). With poll(), we definitely need timerfd.
Regards,
Anthony Liguori
>
> Jan
>
>>
>> I'm not sure if the same can be done for Windows, but I think it's possible as long
>> as you keep the timeBeginPeriod/timeEndPeriod calls. As a start, Stefan, can you
>> check if the win32 timer works for you with the calls added? Like this:
>>
>> diff --git a/qemu-timer.c b/qemu-timer.c
>> index c7a1551..721c769 100644
>> --- a/qemu-timer.c
>> +++ b/qemu-timer.c
>> @@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
>> HANDLE hTimer;
>> BOOLEAN success;
>>
>> + timeGetDevCaps(&mm_tc, sizeof(mm_tc));
>> +
>> + timeBeginPeriod(mm_tc.wPeriodMin);
>> +
>> /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
>> is zero) that has already expired, the timer is not updated. Since
>> creating a new timer is relatively expensive, set a bogus one-hour
>> @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
>> if (!success) {
>> fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
>> GetLastError());
>> + timeEndPeriod(mm_tc.wPeriodMin);
>> return -1;
>> }
>>
>> @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t)
>> if (hTimer) {
>> DeleteTimerQueueTimer(NULL, hTimer, NULL);
>> }
>> + timeEndPeriod(mm_tc.wPeriodMin);
>> }
>>
>> static void win32_rearm_timer(struct qemu_alarm_timer *t,
>>
>> Paolo
>>
>
> --
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 7:26 ` Paolo Bonzini
2012-09-19 7:44 ` Jan Kiszka
@ 2012-09-19 16:04 ` Stefan Weil
2012-09-19 16:12 ` Paolo Bonzini
1 sibling, 1 reply; 14+ messages in thread
From: Stefan Weil @ 2012-09-19 16:04 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Jan Kiszka, Anthony Liguori, qemu-devel, Peter Portante
Am 19.09.2012 09:26, schrieb Paolo Bonzini:
> Il 18/09/2012 22:37, Anthony Liguori ha scritto:
>> Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c
>> right now otherwise the refactoring would be trivial. I'll leave that for
>> another day.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Jan Kiszka <jan.kiszka@siemens.com>
>> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
>> ---
>> Please note, this is lightly tested. Since this is such a fundamental change,
>> I'd like to do some performance analysis before committing but wanted to share
>> early.
> Looks good. I think Peter Portante tested something similar, and found no big
> difference between the two. But it's a good thing and, in my opinion, for
> non-timerfd OSes we should simply adjust the select() timeout and not bother
> with signals.
>
> I'm not sure if the same can be done for Windows, but I think it's possible as long
> as you keep the timeBeginPeriod/timeEndPeriod calls. As a start, Stefan, can you
> check if the win32 timer works for you with the calls added? Like this:
>
> diff --git a/qemu-timer.c b/qemu-timer.c
> index c7a1551..721c769 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -673,6 +673,10 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
> HANDLE hTimer;
> BOOLEAN success;
>
> + timeGetDevCaps(&mm_tc, sizeof(mm_tc));
> +
> + timeBeginPeriod(mm_tc.wPeriodMin);
> +
> /* If you call ChangeTimerQueueTimer on a one-shot timer (its period
> is zero) that has already expired, the timer is not updated. Since
> creating a new timer is relatively expensive, set a bogus one-hour
> @@ -688,6 +692,7 @@ static int win32_start_timer(struct qemu_alarm_timer *t)
> if (!success) {
> fprintf(stderr, "Failed to initialize win32 alarm timer: %ld\n",
> GetLastError());
> + timeEndPeriod(mm_tc.wPeriodMin);
> return -1;
> }
>
> @@ -702,6 +707,7 @@ static void win32_stop_timer(struct qemu_alarm_timer *t)
> if (hTimer) {
> DeleteTimerQueueTimer(NULL, hTimer, NULL);
> }
> + timeEndPeriod(mm_tc.wPeriodMin);
> }
>
> static void win32_rearm_timer(struct qemu_alarm_timer *t,
>
> Paolo
The win32 timer still works when these modifications were applied.
What are they good for?
Stefan
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 16:04 ` Stefan Weil
@ 2012-09-19 16:12 ` Paolo Bonzini
2012-09-19 16:22 ` Stefan Weil
0 siblings, 1 reply; 14+ messages in thread
From: Paolo Bonzini @ 2012-09-19 16:12 UTC (permalink / raw)
To: Stefan Weil; +Cc: Jan Kiszka, Anthony Liguori, qemu-devel, Peter Portante
Il 19/09/2012 18:04, Stefan Weil ha scritto:
> The win32 timer still works when these modifications were applied.
> What are they good for?
IIUC the Win32 did _not_ work (except on Wine) without these. Note I'm
talking about "-clock win32".
So these provide a hint that the problem with the Win32 timer is not in
the bowels of Windows, but only that it does not adjust the Windows
scheduler quantum. Wine does not need it because the Linux scheduler is
not as lame.
So if we just eliminated timers and used the g_poll timeout (what was
proposed upthread), Windows would work provided
timeBeginPeriod/timeEndPeriod are called correctly.
Paolo
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-19 16:12 ` Paolo Bonzini
@ 2012-09-19 16:22 ` Stefan Weil
0 siblings, 0 replies; 14+ messages in thread
From: Stefan Weil @ 2012-09-19 16:22 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Jan Kiszka, Anthony Liguori, qemu-devel, Peter Portante
Am 19.09.2012 18:12, schrieb Paolo Bonzini:
> Il 19/09/2012 18:04, Stefan Weil ha scritto:
>> The win32 timer still works when these modifications were applied.
>> What are they good for?
> IIUC the Win32 did _not_ work (except on Wine) without these. Note I'm
> talking about "-clock win32".
>
> So these provide a hint that the problem with the Win32 timer is not in
> the bowels of Windows, but only that it does not adjust the Windows
> scheduler quantum. Wine does not need it because the Linux scheduler is
> not as lame.
>
> So if we just eliminated timers and used the g_poll timeout (what was
> proposed upthread), Windows would work provided
> timeBeginPeriod/timeEndPeriod are called correctly.
>
> Paolo
I tested with -clock win32 (not the mm timer), and it worked on
Win7 (64 bit QEMU) with and without the modification.
Maybe I can run some more tests on 32 bit Windows (also with g_poll),
but don't expect results soon.
Stefan
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd
2012-09-18 20:37 [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd Anthony Liguori
2012-09-19 7:26 ` Paolo Bonzini
@ 2012-09-19 14:20 ` Peter Portante
1 sibling, 0 replies; 14+ messages in thread
From: Peter Portante @ 2012-09-19 14:20 UTC (permalink / raw)
To: qemu-devel
[-- Attachment #1: Type: text/plain, Size: 6405 bytes --]
On Tue, Sep 18, 2012 at 4:37 PM, Anthony Liguori <aliguori@us.ibm.com>wrote:
> timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that
> allows for a high resolution event timer signaled through a file
> descriptor.
> This patch adds an alarm timer implementation using timerfd.
>
> timerfd has a couple advantages over dynticks. Namely, it's fd based
> instead
> of signal based which reduces the likelihood of receiving EINTR failure
> codes.
>
> Because we can't process timers in a signal handler, we write to a pipe()
> and
> read that in the main loop. timerfd allows us to avoid the extra write to
> a
> pipe.
>
> Finally, since timerfd is just a file descriptor, it could conceivably
> used to
> implement per-thread alarm timers. It also would integrate very easily in
> a
> glib main loop.
>
> Unfortunately, there's a lot of Windows code in qemu-timer.c and
> main-loop.c
> right now otherwise the refactoring would be trivial. I'll leave that for
> another day.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Jan Kiszka <jan.kiszka@siemens.com>
> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
> ---
> Please note, this is lightly tested. Since this is such a fundamental
> change,
> I'd like to do some performance analysis before committing but wanted to
> share
> early.
> ---
> configure | 14 +++++++++
> qemu-timer.c | 86
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 100 insertions(+), 0 deletions(-)
>
> diff --git a/configure b/configure
> index 8564142..8a3135b 100755
> --- a/configure
> +++ b/configure
> @@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then
> sync_file_range=yes
> fi
>
> +# check for timerfd
> +timerfd="no"
> +cat > $TMPC << EOF
> +#include <sys/timerfd.h>
> +int main(void) { return timerfd_create(CLOCK_REALTIME, 0); }
> +EOF
> +
> +if compile_prog "" "" ; then
> + timerfd=yes
> +fi
> +
> # check for linux/fiemap.h and FS_IOC_FIEMAP
> fiemap=no
> cat > $TMPC << EOF
> @@ -3467,6 +3478,9 @@ fi
> if test "$signalfd" = "yes" ; then
> echo "CONFIG_SIGNALFD=y" >> $config_host_mak
> fi
> +if test "$timerfd" = "yes" ; then
> + echo "CONFIG_TIMERFD=y" >> $config_host_mak
> +fi
> if test "$tcg_interpreter" = "yes" ; then
> echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
> fi
> diff --git a/qemu-timer.c b/qemu-timer.c
> index c7a1551..6f7fa35 100644
> --- a/qemu-timer.c
> +++ b/qemu-timer.c
> @@ -35,6 +35,10 @@
> #include <mmsystem.h>
> #endif
>
> +#ifdef CONFIG_TIMERFD
> +#include <sys/timerfd.h>
> +#endif
> +
> /***********************************************************/
> /* timers */
>
> @@ -66,6 +70,9 @@ struct qemu_alarm_timer {
> int (*start)(struct qemu_alarm_timer *t);
> void (*stop)(struct qemu_alarm_timer *t);
> void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns);
> +#ifdef CONFIG_TIMERFD
> + int timerfd;
> +#endif
> #if defined(__linux__)
> timer_t timer;
> int fd;
> @@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct
> qemu_alarm_timer *t)
> /* TODO: MIN_TIMER_REARM_NS should be optimized */
> #define MIN_TIMER_REARM_NS 250000
>
> +#ifdef CONFIG_TIMERFD
> +static int timerfd_start_timer(struct qemu_alarm_timer *t);
> +static void timerfd_stop_timer(struct qemu_alarm_timer *t);
> +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t
> delta);
> +#endif
> +
> #ifdef _WIN32
>
> static int mm_start_timer(struct qemu_alarm_timer *t);
> @@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct
> qemu_alarm_timer *t, int64_t delta);
> #endif /* _WIN32 */
>
> static struct qemu_alarm_timer alarm_timers[] = {
> +#ifdef CONFIG_TIMERFD
> + {"timerfd", timerfd_start_timer,
> + timerfd_stop_timer, timerfd_rearm_timer},
> +#endif
> #ifndef _WIN32
> #ifdef __linux__
> {"dynticks", dynticks_start_timer,
> @@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum)
>
> #include "compatfd.h"
>
> +static void timerfd_event(void *opaque)
> +{
> + struct qemu_alarm_timer *t = opaque;
> + ssize_t len;
> + uint64_t count;
> +
> + len = read(t->timerfd, &count, sizeof(count));
> + g_assert(len == sizeof(count));
> +
> + t->expired = true;
> + t->pending = true;
> +
> + /* We already process pending timers at the end of the main loop
> whereas
> + * this function is called during I/O processing. That means we know
> that
> + * pending timers will be checked before select()'ing again which
> means we
> + * don't need to explicitly call qemu_notify_event()
> + */
> +}
> +
> +static int timerfd_start_timer(struct qemu_alarm_timer *t)
> +{
> + t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC);
> + if (t->timerfd == -1) {
> + return -errno;
> + }
> +
> + qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t);
> +
> + return 0;
> +}
> +
> +static void timerfd_stop_timer(struct qemu_alarm_timer *t)
> +{
> + qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL);
> + close(t->timerfd);
> + t->timerfd = -1;
> +}
> +
> +static void timerfd_rearm_timer(struct qemu_alarm_timer *t,
> + int64_t nearest_delta_ns)
> +{
> + int ret;
> + struct itimerspec timeout;
> + int64_t current_ns;
> +
> + if (nearest_delta_ns < MIN_TIMER_REARM_NS) {
> + nearest_delta_ns = MIN_TIMER_REARM_NS;
> + }
>
It has been a few months, but IIRC, this pattern can lead to spurious
timeouts if these methods are not used correctly. If folks are interested,
I can spend some time to dig up my past work on this to explain more.
> + /* check whether a timer is already running */
> + ret = timerfd_gettime(t->timerfd, &timeout);
> + g_assert(ret == 0);
> +
> + current_ns = timeout.it_value.tv_sec * 1000000000LL;
> + current_ns += timeout.it_value.tv_nsec;
> +
> + if (current_ns && current_ns <= nearest_delta_ns) {
> + return;
> + }
> +
> + timeout.it_interval.tv_sec = 0;
> + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
> + timeout.it_value.tv_sec = nearest_delta_ns / 1000000000;
> + timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000;
> +
> + ret = timerfd_settime(t->timerfd, 0, &timeout, NULL);
> + g_assert(ret == 0);
> +}
> +
> static int dynticks_start_timer(struct qemu_alarm_timer *t)
> {
> struct sigevent ev;
> --
> 1.7.5.4
>
>
>
[-- Attachment #2: Type: text/html, Size: 7870 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2012-09-20 5:51 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-18 20:37 [Qemu-devel] [PATCH] qemu-clock: add an alarm timer based on timerfd Anthony Liguori
2012-09-19 7:26 ` Paolo Bonzini
2012-09-19 7:44 ` Jan Kiszka
2012-09-19 8:39 ` Paolo Bonzini
2012-09-19 14:15 ` Peter Portante
2012-09-19 14:27 ` Jan Kiszka
2012-09-20 5:51 ` Peter Portante
2012-09-19 16:55 ` Avi Kivity
2012-09-19 17:13 ` Alon Ziv
2012-09-20 2:54 ` Anthony Liguori
2012-09-19 16:04 ` Stefan Weil
2012-09-19 16:12 ` Paolo Bonzini
2012-09-19 16:22 ` Stefan Weil
2012-09-19 14:20 ` Peter Portante
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).