qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2
@ 2007-08-17 23:11 Luca Tettamanti
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 1/4] Rework alarm timer infrastrucure Luca Tettamanti
                   ` (5 more replies)
  0 siblings, 6 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-17 23:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel

Hello,
in reply to this mail I will send a serie of 4 patches that cleans up and
expands the alarm timer handling in QEMU. Patches have been rebased on QEMU
CVS.

Patch 1 is mostly a cleanup of the existing code; instead of having multiple
#ifdefs to handle different timers scattered all over the code I've created a
modular infrastructure where each timer type is self-contained and generic code
is more readable. The resulting code is functionally equivalent to the old one.

Patch 2 implements the "-clock" command line option proposed by Daniel Berrange
and Avi Kivity. By default QEMU tries RTC and then falls back to unix timer;
user can override the order of the timer through this options. Syntax is pretty
simple: -clock timer1,timer2,etc. (QEMU will pick the first one that works).

Patch 3 adds support for HPET under Linux (which is basically my old patch). As
suggested HPET takes precedence over other timers, but of course this can be
overridden.

Patch 4 introduces "dynticks" timer source; patch is mostly based on the work
Dan Kenigsberg. dynticks is now the default alarm timer.

Luca
-- 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH 1/4] Rework alarm timer infrastrucure.
  2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
@ 2007-08-17 23:11 ` Luca Tettamanti
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 2/4] Add -clock option Luca Tettamanti
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-17 23:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Luca Tettamanti

[-- Attachment #1: clock-base --]
[-- Type: text/plain, Size: 10003 bytes --]

Make the alarm code modular, removing #ifdef from the generic code and
abstract a common interface for all the timer. The result is functionally
equivalent to the old code.

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

---
 vl.c |  287 +++++++++++++++++++++++++++++++++++++++++++------------------------
 vl.h |    1 
 2 files changed, 185 insertions(+), 103 deletions(-)

Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c	2007-08-17 16:48:32.000000000 +0200
+++ qemu/vl.c	2007-08-18 00:40:25.000000000 +0200
@@ -781,18 +781,58 @@
     struct QEMUTimer *next;
 };
 
-QEMUClock *rt_clock;
-QEMUClock *vm_clock;
+struct qemu_alarm_timer {
+    char const *name;
+
+    int (*start)(struct qemu_alarm_timer *t);
+    void (*stop)(struct qemu_alarm_timer *t);
+    void *priv;
+};
+
+static struct qemu_alarm_timer *alarm_timer;
 
-static QEMUTimer *active_timers[2];
 #ifdef _WIN32
-static MMRESULT timerID;
-static HANDLE host_alarm = NULL;
-static unsigned int period = 1;
+
+struct qemu_alarm_win32 {
+    MMRESULT timerId;
+    HANDLE host_alarm;
+    unsigned int period;
+} alarm_win32_data = {0, NULL, -1};
+
+static int win32_start_timer(struct qemu_alarm_timer *t);
+static void win32_stop_timer(struct qemu_alarm_timer *t);
+
+#else
+
+static int unix_start_timer(struct qemu_alarm_timer *t);
+static void unix_stop_timer(struct qemu_alarm_timer *t);
+
+#ifdef __linux__
+
+static int rtc_start_timer(struct qemu_alarm_timer *t);
+static void rtc_stop_timer(struct qemu_alarm_timer *t);
+
+#endif
+
+#endif /* _WIN32 */
+
+static struct qemu_alarm_timer alarm_timers[] = {
+#ifdef __linux__
+    /* RTC - if available - is preferred */
+    {"rtc", rtc_start_timer, rtc_stop_timer, NULL},
+#endif
+#ifndef _WIN32
+    {"unix", unix_start_timer, unix_stop_timer, NULL},
 #else
-/* frequency of the times() clock tick */
-static int timer_freq;
+    {"win32", win32_start_timer, win32_stop_timer, &alarm_win32_data},
 #endif
+    {NULL, }
+};
+
+QEMUClock *rt_clock;
+QEMUClock *vm_clock;
+
+static QEMUTimer *active_timers[2];
 
 QEMUClock *qemu_new_clock(int type)
 {
@@ -1009,7 +1049,8 @@
         qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME],
                            qemu_get_clock(rt_clock))) {
 #ifdef _WIN32
-        SetEvent(host_alarm);
+        struct qemu_alarm_win32 *data = ((struct qemu_alarm_timer*)dwUser)->priv;
+        SetEvent(data->host_alarm);
 #endif
         CPUState *env = cpu_single_env;
         if (env) {
@@ -1030,10 +1071,27 @@
 
 #define RTC_FREQ 1024
 
-static int rtc_fd;
+static void enable_sigio_timer(int fd)
+{
+    struct sigaction act;
 
-static int start_rtc_timer(void)
+    /* timer signal */
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+#if defined (TARGET_I386) && defined(USE_CODE_COPY)
+    act.sa_flags |= SA_ONSTACK;
+#endif
+    act.sa_handler = host_alarm_handler;
+
+    sigaction(SIGIO, &act, NULL);
+    fcntl(fd, F_SETFL, O_ASYNC);
+    fcntl(fd, F_SETOWN, getpid());
+}
+
+static int rtc_start_timer(struct qemu_alarm_timer *t)
 {
+    int rtc_fd;
+
     TFR(rtc_fd = open("/dev/rtc", O_RDONLY));
     if (rtc_fd < 0)
         return -1;
@@ -1048,117 +1106,142 @@
         close(rtc_fd);
         return -1;
     }
-    pit_min_timer_count = PIT_FREQ / RTC_FREQ;
+
+    enable_sigio_timer(rtc_fd);
+
+    t->priv = (void *)rtc_fd;
+
     return 0;
 }
 
-#else
-
-static int start_rtc_timer(void)
+static void rtc_stop_timer(struct qemu_alarm_timer *t)
 {
-    return -1;
+    int rtc_fd = (int)t->priv;
+
+    close(rtc_fd);
 }
 
 #endif /* !defined(__linux__) */
 
-#endif /* !defined(_WIN32) */
+static int unix_start_timer(struct qemu_alarm_timer *t)
+{
+    struct sigaction act;
+    struct itimerval itv;
+    int err;
+
+    /* timer signal */
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+#if defined(TARGET_I386) && defined(USE_CODE_COPY)
+    act.sa_flags |= SA_ONSTACK;
+#endif
+    act.sa_handler = host_alarm_handler;
+
+    sigaction(SIGALRM, &act, NULL);
+
+    itv.it_interval.tv_sec = 0;
+    /* for i386 kernel 2.6 to get 1 ms */
+    itv.it_interval.tv_usec = 999;
+    itv.it_value.tv_sec = 0;
+    itv.it_value.tv_usec = 10 * 1000;
 
-static void init_timer_alarm(void)
+    err = setitimer(ITIMER_REAL, &itv, NULL);
+    if (err)
+        return -1;
+
+    return 0;
+}
+
+static void unix_stop_timer(struct qemu_alarm_timer *t)
 {
+    struct itimerval itv;
+
+    memset(&itv, 0, sizeof(itv));
+    setitimer(ITIMER_REAL, &itv, NULL);
+}
+
+#endif /* !defined(_WIN32) */
+
 #ifdef _WIN32
-    {
-        int count=0;
-        TIMECAPS tc;
 
-        ZeroMemory(&tc, sizeof(TIMECAPS));
-        timeGetDevCaps(&tc, sizeof(TIMECAPS));
-        if (period < tc.wPeriodMin)
-            period = tc.wPeriodMin;
-        timeBeginPeriod(period);
-        timerID = timeSetEvent(1,     // interval (ms)
-                               period,     // resolution
-                               host_alarm_handler, // function
-                               (DWORD)&count,  // user parameter
-                               TIME_PERIODIC | TIME_CALLBACK_FUNCTION);
- 	if( !timerID ) {
-            perror("failed timer alarm");
-            exit(1);
- 	}
-        host_alarm = CreateEvent(NULL, FALSE, FALSE, NULL);
-        if (!host_alarm) {
-            perror("failed CreateEvent");
-            exit(1);
-        }
-        qemu_add_wait_object(host_alarm, NULL, NULL);
+static int win32_start_timer(struct qemu_alarm_timer *t)
+{
+    TIMECAPS tc;
+    struct qemu_alarm_win32 *data = t->priv;
+
+    data->host_alarm = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (!data->host_alarm) {
+        perror("Failed CreateEvent");
+        return -1
     }
-    pit_min_timer_count = ((uint64_t)10000 * PIT_FREQ) / 1000000;
-#else
-    {
-        struct sigaction act;
-        struct itimerval itv;
-        
-        /* get times() syscall frequency */
-        timer_freq = sysconf(_SC_CLK_TCK);
-        
-        /* timer signal */
-        sigfillset(&act.sa_mask);
-       act.sa_flags = 0;
-#if defined (TARGET_I386) && defined(USE_CODE_COPY)
-        act.sa_flags |= SA_ONSTACK;
-#endif
-        act.sa_handler = host_alarm_handler;
-        sigaction(SIGALRM, &act, NULL);
 
-        itv.it_interval.tv_sec = 0;
-        itv.it_interval.tv_usec = 999; /* for i386 kernel 2.6 to get 1 ms */
-        itv.it_value.tv_sec = 0;
-        itv.it_value.tv_usec = 10 * 1000;
-        setitimer(ITIMER_REAL, &itv, NULL);
-        /* we probe the tick duration of the kernel to inform the user if
-           the emulated kernel requested a too high timer frequency */
-        getitimer(ITIMER_REAL, &itv);
+    memset(&tc, 0, sizeof(tc));
+    timeGetDevCaps(&tc, sizeof(tc));
 
-#if defined(__linux__)
-        /* XXX: force /dev/rtc usage because even 2.6 kernels may not
-           have timers with 1 ms resolution. The correct solution will
-           be to use the POSIX real time timers available in recent
-           2.6 kernels */
-        if (itv.it_interval.tv_usec > 1000 || 1) {
-            /* try to use /dev/rtc to have a faster timer */
-            if (start_rtc_timer() < 0)
-                goto use_itimer;
-            /* disable itimer */
-            itv.it_interval.tv_sec = 0;
-            itv.it_interval.tv_usec = 0;
-            itv.it_value.tv_sec = 0;
-            itv.it_value.tv_usec = 0;
-            setitimer(ITIMER_REAL, &itv, NULL);
-
-            /* use the RTC */
-            sigaction(SIGIO, &act, NULL);
-            fcntl(rtc_fd, F_SETFL, O_ASYNC);
-            fcntl(rtc_fd, F_SETOWN, getpid());
-        } else 
-#endif /* defined(__linux__) */
-        {
-        use_itimer:
-            pit_min_timer_count = ((uint64_t)itv.it_interval.tv_usec * 
-                                   PIT_FREQ) / 1000000;
-        }
+    if (data->period < tc.wPeriodMin)
+        data->period = tc.wPeriodMin;
+
+    timeBeginPeriod(data->period);
+
+    data->timerId = timeSetEvent(1,         // interval (ms)
+                        data->period,       // resolution
+                        host_alarm_handler, // function
+                        (DWORD)t,           // parameter
+                        TIME_PERIODIC | TIME_CALLBACK_FUNCTION);
+
+    if (!data->timerId) {
+        perror("Failed to initialize win32 alarm timer");
+
+        timeEndPeriod(data->period);
+        CloseHandle(data->host_alarm);
+        return -1;
     }
-#endif
+
+    qemu_add_wait_object(data->host_alarm, NULL, NULL);
+
+    return 0;
 }
 
-void quit_timers(void)
+static void win32_stop_timer(struct qemu_alarm_timer *t)
 {
-#ifdef _WIN32
-    timeKillEvent(timerID);
-    timeEndPeriod(period);
-    if (host_alarm) {
-        CloseHandle(host_alarm);
-        host_alarm = NULL;
+    struct qemu_alarm_win32 *data = t->priv;
+
+    timeKillEvent(data->timerId);
+    timeEndPeriod(data->period);
+
+    CloseHandle(data->host_alarm);
+}
+
+#endif /* _WIN32 */
+
+static void init_timer_alarm(void)
+{
+    struct qemu_alarm_timer *t;
+    int i, err = -1;
+
+    for (i = 0; alarm_timers[i].name; i++) {
+        t = &alarm_timers[i];
+
+        printf("trying %s...\n", t->name);
+
+        err = t->start(t);
+        if (!err)
+            break;
     }
-#endif
+
+    if (err) {
+        fprintf(stderr, "Unable to find any suitable alarm timer.\n");
+        fprintf(stderr, "Terminating\n");
+        exit(1);
+    }
+
+    alarm_timer = t;
+}
+
+void quit_timers(void)
+{
+    alarm_timer->stop(alarm_timer);
+    alarm_timer = NULL;
 }
 
 /***********************************************************/
Index: qemu/vl.h
===================================================================
--- qemu.orig/vl.h	2007-08-17 17:03:26.000000000 +0200
+++ qemu/vl.h	2007-08-17 17:03:37.000000000 +0200
@@ -447,7 +447,6 @@
 int qemu_timer_pending(QEMUTimer *ts);
 
 extern int64_t ticks_per_sec;
-extern int pit_min_timer_count;
 
 int64_t cpu_get_ticks(void);
 void cpu_enable_ticks(void);

-- 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH 2/4] Add -clock option.
  2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 1/4] Rework alarm timer infrastrucure Luca Tettamanti
@ 2007-08-17 23:11 ` Luca Tettamanti
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer Luca Tettamanti
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-17 23:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Luca Tettamanti

[-- Attachment #1: clock-select --]
[-- Type: text/plain, Size: 2942 bytes --]

Allow user to override the list of available alarm timers and their
priority. The format of the options is -clock clk1,clk2,...

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

---
 vl.c |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c	2007-08-17 17:31:09.000000000 +0200
+++ qemu/vl.c	2007-08-18 00:40:22.000000000 +0200
@@ -829,6 +829,71 @@
     {NULL, }
 };
 
+static void show_available_alarms()
+{
+    int i;
+
+    printf("Available alarm timers, in order of precedence:\n");
+    for (i = 0; alarm_timers[i].name; i++)
+        printf("%s\n", alarm_timers[i].name);
+}
+
+static void configure_alarms(char const *opt)
+{
+    int i;
+    int cur = 0;
+    int count = (sizeof(alarm_timers) / sizeof(*alarm_timers)) - 1;
+    char *arg;
+    char *name;
+
+    if (!strcmp(opt, "help")) {
+        show_available_alarms();
+        exit(0);
+    }
+
+    arg = strdup(opt);
+
+    /* Reorder the array */
+    name = strtok(arg, ",");
+    while (name) {
+        struct qemu_alarm_timer tmp;
+
+        for (i = 0; i < count; i++) {
+            if (!strcmp(alarm_timers[i].name, name))
+                break;
+        }
+
+        if (i == count) {
+            fprintf(stderr, "Unknown clock %s\n", name);
+            goto next;
+        }
+
+        if (i < cur)
+            /* Ignore */
+            goto next;
+
+	/* Swap */
+        tmp = alarm_timers[i];
+        alarm_timers[i] = alarm_timers[cur];
+        alarm_timers[cur] = tmp;
+
+        cur++;
+next:
+        name = strtok(NULL, ",");
+    }
+
+    free(arg);
+
+    if (cur) {
+	/* Disable remaining timers */
+        for (i = cur; i < count; i++)
+            alarm_timers[i].name = NULL;
+    }
+
+    /* debug */
+    show_available_alarms();
+}
+
 QEMUClock *rt_clock;
 QEMUClock *vm_clock;
 
@@ -6791,6 +6856,8 @@
 #ifdef TARGET_SPARC
            "-prom-env variable=value  set OpenBIOS nvram variables\n"
 #endif
+           "-clock          force the use of the given methods for timer alarm.\n"
+           "                To see what timers are available use -clock help\n"
            "\n"
            "During emulation, the following keys are useful:\n"
            "ctrl-alt-f      toggle full screen\n"
@@ -6888,6 +6955,7 @@
     QEMU_OPTION_name,
     QEMU_OPTION_prom_env,
     QEMU_OPTION_old_param,
+    QEMU_OPTION_clock,
 };
 
 typedef struct QEMUOption {
@@ -6992,6 +7060,7 @@
 #if defined(TARGET_ARM)
     { "old-param", 0, QEMU_OPTION_old_param },
 #endif
+    { "clock", HAS_ARG, QEMU_OPTION_clock },
     { NULL },
 };
 
@@ -7771,6 +7840,9 @@
             case QEMU_OPTION_old_param:
                 old_param = 1;
 #endif
+            case QEMU_OPTION_clock:
+                configure_alarms(optarg);
+                break;
             }
         }
     }

-- 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 1/4] Rework alarm timer infrastrucure Luca Tettamanti
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 2/4] Add -clock option Luca Tettamanti
@ 2007-08-17 23:11 ` Luca Tettamanti
  2007-08-21 19:24   ` Matthew Kent
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 4/4] Add support for dynamic ticks Luca Tettamanti
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-17 23:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Luca Tettamanti

[-- Attachment #1: clock-hpet --]
[-- Type: text/plain, Size: 2741 bytes --]

Linux operates the HPET timer in legacy replacement mode, which means that
the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
RTC; the periodic interrupt is delivered via SIGIO and is handled in the
same way as the RTC timer.

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

---
 vl.c |   57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c	2007-08-17 17:39:21.000000000 +0200
+++ qemu/vl.c	2007-08-18 00:40:16.000000000 +0200
@@ -56,6 +56,7 @@
 #include <pty.h>
 #include <malloc.h>
 #include <linux/rtc.h>
+#include <linux/hpet.h>
 #include <linux/ppdev.h>
 #include <linux/parport.h>
 #else
@@ -809,6 +810,9 @@
 
 #ifdef __linux__
 
+static int hpet_start_timer(struct qemu_alarm_timer *t);
+static void hpet_stop_timer(struct qemu_alarm_timer *t);
+
 static int rtc_start_timer(struct qemu_alarm_timer *t);
 static void rtc_stop_timer(struct qemu_alarm_timer *t);
 
@@ -818,7 +822,9 @@
 
 static struct qemu_alarm_timer alarm_timers[] = {
 #ifdef __linux__
-    /* RTC - if available - is preferred */
+    /* HPET - if available - is preferred */
+    {"hpet", hpet_start_timer, hpet_stop_timer, NULL},
+    /* ...otherwise try RTC */
     {"rtc", rtc_start_timer, rtc_stop_timer, NULL},
 #endif
 #ifndef _WIN32
@@ -1153,6 +1159,55 @@
     fcntl(fd, F_SETOWN, getpid());
 }
 
+static int hpet_start_timer(struct qemu_alarm_timer *t)
+{
+    struct hpet_info info;
+    int r, fd;
+
+    fd = open("/dev/hpet", O_RDONLY);
+    if (fd < 0)
+        return -1;
+
+    /* Set frequency */
+    r = ioctl(fd, HPET_IRQFREQ, RTC_FREQ);
+    if (r < 0) {
+        fprintf(stderr, "Could not configure '/dev/hpet' to have a 1024Hz timer. This is not a fatal\n"
+                "error, but for better emulation accuracy type:\n"
+                "'echo 1024 > /proc/sys/dev/hpet/max-user-freq' as root.\n");
+        goto fail;
+    }
+
+    /* Check capabilities */
+    r = ioctl(fd, HPET_INFO, &info);
+    if (r < 0)
+        goto fail;
+
+    /* Enable periodic mode */
+    r = ioctl(fd, HPET_EPI, 0);
+    if (info.hi_flags && (r < 0))
+        goto fail;
+
+    /* Enable interrupt */
+    r = ioctl(fd, HPET_IE_ON, 0);
+    if (r < 0)
+        goto fail;
+
+    enable_sigio_timer(fd);
+    t->priv = (void *)fd;
+
+    return 0;
+fail:
+    close(fd);
+    return -1;
+}
+
+static void hpet_stop_timer(struct qemu_alarm_timer *t)
+{
+    int fd = (int)t->priv;
+
+    close(fd);
+}
+
 static int rtc_start_timer(struct qemu_alarm_timer *t)
 {
     int rtc_fd;

-- 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH 4/4] Add support for dynamic ticks.
  2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
                   ` (2 preceding siblings ...)
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer Luca Tettamanti
@ 2007-08-17 23:11 ` Luca Tettamanti
  2007-08-17 23:48 ` [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Christian MICHON
  2007-08-18 15:17 ` [Qemu-devel] Re: [kvm-devel] " Anthony Liguori
  5 siblings, 0 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-17 23:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Luca Tettamanti

[-- Attachment #1: clock-dyn --]
[-- Type: text/plain, Size: 7559 bytes --]

If DYNAMIC_TICKS is defined qemu does not attepmt to generate SIGALRM at a
constant rate. Rather, the system timer is set to generate SIGALRM only
when it is needed. DYNAMIC_TICKS reduces the number of SIGALRMs sent to
idle dynamic-ticked guests.
Original patch from Dan Kenigsberg <dank@qumranet.com>

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

---
 configure |    5 ++
 vl.c      |  149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 148 insertions(+), 6 deletions(-)

Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c	2007-08-17 17:45:00.000000000 +0200
+++ qemu/vl.c	2007-08-18 00:38:03.000000000 +0200
@@ -784,12 +784,42 @@
 
 struct qemu_alarm_timer {
     char const *name;
+    unsigned int flags;
 
     int (*start)(struct qemu_alarm_timer *t);
     void (*stop)(struct qemu_alarm_timer *t);
+    void (*rearm)(struct qemu_alarm_timer *t);
     void *priv;
 };
 
+#define ALARM_FLAG_DYNTICKS  0x1
+
+#ifdef DYNAMIC_TICKS
+
+static inline int alarm_has_dynticks(struct qemu_alarm_timer *t)
+{
+    return t->flags & ALARM_FLAG_DYNTICKS;
+}
+
+static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) {
+    if (!alarm_has_dynticks(t))
+        return;
+
+    t->rearm(t);
+}
+
+#else /* DYNAMIC_TICKS */
+
+static inline int alarm_has_dynticks(struct qemu_alarm_timer *t)
+{
+    return 0;
+}
+
+static void qemu_rearm_alarm_timer(void) {
+}
+
+#endif /* DYNAMIC_TICKS */
+
 static struct qemu_alarm_timer *alarm_timer;
 
 #ifdef _WIN32
@@ -808,6 +838,14 @@
 static int unix_start_timer(struct qemu_alarm_timer *t);
 static void unix_stop_timer(struct qemu_alarm_timer *t);
 
+#ifdef DYNAMIC_TICKS
+
+static int dynticks_start_timer(struct qemu_alarm_timer *t);
+static void dynticks_stop_timer(struct qemu_alarm_timer *t);
+static void dynticks_rearm_timer(struct qemu_alarm_timer *t);
+
+#endif
+
 #ifdef __linux__
 
 static int hpet_start_timer(struct qemu_alarm_timer *t);
@@ -821,16 +859,19 @@
 #endif /* _WIN32 */
 
 static struct qemu_alarm_timer alarm_timers[] = {
+#ifndef _WIN32
+#ifdef DYNAMIC_TICKS
+    {"dynticks", ALARM_FLAG_DYNTICKS, dynticks_start_timer, dynticks_stop_timer, dynticks_rearm_timer, NULL},
+#endif
 #ifdef __linux__
     /* HPET - if available - is preferred */
-    {"hpet", hpet_start_timer, hpet_stop_timer, NULL},
+    {"hpet", 0, hpet_start_timer, hpet_stop_timer, NULL, NULL},
     /* ...otherwise try RTC */
-    {"rtc", rtc_start_timer, rtc_stop_timer, NULL},
+    {"rtc", 0, rtc_start_timer, rtc_stop_timer, NULL, NULL},
 #endif
-#ifndef _WIN32
-    {"unix", unix_start_timer, unix_stop_timer, NULL},
+    {"unix", 0, unix_start_timer, unix_stop_timer, NULL, NULL},
 #else
-    {"win32", win32_start_timer, win32_stop_timer, &alarm_win32_data},
+    {"win32", 0, win32_start_timer, win32_stop_timer, NULL, &alarm_win32_data},
 #endif
     {NULL, }
 };
@@ -949,6 +990,8 @@
         }
         pt = &t->next;
     }
+
+    qemu_rearm_alarm_timer(alarm_timer);
 }
 
 /* modify the current timer so that it will be fired when current_time
@@ -1008,6 +1051,7 @@
         /* run the callback (the timer list can be modified) */
         ts->cb(ts->opaque);
     }
+    qemu_rearm_alarm_timer(alarm_timer);
 }
 
 int64_t qemu_get_clock(QEMUClock *clock)
@@ -1115,7 +1159,8 @@
         last_clock = ti;
     }
 #endif
-    if (qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
+    if (alarm_has_dynticks(alarm_timer) ||
+        qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
                            qemu_get_clock(vm_clock)) ||
         qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME],
                            qemu_get_clock(rt_clock))) {
@@ -1243,6 +1288,97 @@
 
 #endif /* !defined(__linux__) */
 
+#ifdef DYNAMIC_TICKS
+static int dynticks_start_timer(struct qemu_alarm_timer *t)
+{
+    struct sigevent ev;
+    timer_t host_timer;
+    struct sigaction act;
+
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+#if defined(TARGET_I386) && defined(USE_CODE_COPY)
+    act.sa_flags |= SA_ONSTACK;
+#endif
+    act.sa_handler = host_alarm_handler;
+
+    sigaction(SIGALRM, &act, NULL);
+
+    ev.sigev_value.sival_int = 0;
+    ev.sigev_notify = SIGEV_SIGNAL;
+    ev.sigev_signo = SIGALRM;
+
+    if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) {
+        perror("timer_create");
+
+        /* disable dynticks */
+        fprintf(stderr, "Dynamic Ticks disabled\n");
+
+        return -1;
+    }
+
+    t->priv = (void *)host_timer;
+
+    return 0;
+}
+
+static void dynticks_stop_timer(struct qemu_alarm_timer *t)
+{
+    timer_t host_timer = (timer_t)t->priv;
+
+    timer_delete(host_timer);
+}
+
+static void dynticks_rearm_timer(struct qemu_alarm_timer *t)
+{
+    timer_t host_timer = (timer_t)t->priv;
+    struct itimerspec timeout;
+    int64_t nearest_delta_us = INT64_MAX;
+
+    if (active_timers[QEMU_TIMER_REALTIME] ||
+                    active_timers[QEMU_TIMER_VIRTUAL]) {
+        int64_t vmdelta_us, current_us;
+
+        if (active_timers[QEMU_TIMER_REALTIME])
+            nearest_delta_us = (active_timers[QEMU_TIMER_REALTIME]->expire_time - qemu_get_clock(rt_clock))*1000;
+
+        if (active_timers[QEMU_TIMER_VIRTUAL]) {
+            /* round up */
+            vmdelta_us = (active_timers[QEMU_TIMER_VIRTUAL]->expire_time - qemu_get_clock(vm_clock)+999)/1000;
+            if (vmdelta_us < nearest_delta_us)
+                nearest_delta_us = vmdelta_us;
+        }
+
+        /* Avoid arming the timer to negative, zero, or too low values */
+        /* TODO: MIN_TIMER_REARM_US should be optimized */
+        #define MIN_TIMER_REARM_US 250
+        if (nearest_delta_us <= MIN_TIMER_REARM_US)
+            nearest_delta_us = MIN_TIMER_REARM_US;
+
+        /* check whether a timer is already running */
+        if (timer_gettime(host_timer, &timeout)) {
+            perror("gettime");
+            fprintf(stderr, "Internal timer error: aborting\n");
+            exit(1);
+        }
+        current_us = timeout.it_value.tv_sec * 1000000 + timeout.it_value.tv_nsec/1000;
+        if (current_us && current_us <= nearest_delta_us)
+            return;
+
+        timeout.it_interval.tv_sec = 0;
+        timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
+        timeout.it_value.tv_sec =  nearest_delta_us / 1000000;
+        timeout.it_value.tv_nsec = (nearest_delta_us % 1000000) * 1000;
+        if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) {
+            perror("settime");
+            fprintf(stderr, "Internal timer error: aborting\n");
+            exit(1);
+        }
+    }
+}
+
+#endif /* DYNAMIC_TICKS */
+
 static int unix_start_timer(struct qemu_alarm_timer *t)
 {
     struct sigaction act;
@@ -6490,6 +6626,7 @@
         cpu_enable_ticks();
         vm_running = 1;
         vm_state_notify(1);
+        qemu_rearm_alarm_timer(alarm_timer);
     }
 }
 
Index: qemu/configure
===================================================================
--- qemu.orig/configure	2007-08-17 17:45:17.000000000 +0200
+++ qemu/configure	2007-08-17 17:45:31.000000000 +0200
@@ -294,6 +294,8 @@
         *)     echo "undefined SPARC architecture. Exiting";exit 1;;
       esac
   ;;
+  --disable-dynamic-ticks) dynamic_ticks="no"
+  ;;
   esac
 done
 
@@ -859,6 +861,9 @@
 if [ "$build_docs" = "yes" ] ; then
   echo "BUILD_DOCS=yes" >> $config_mak
 fi
+if test "$dynamic_ticks" != "no" ; then
+  echo "#define DYNAMIC_TICKS 1" >> $config_h
+fi
 
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then

-- 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2
  2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
                   ` (3 preceding siblings ...)
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 4/4] Add support for dynamic ticks Luca Tettamanti
@ 2007-08-17 23:48 ` Christian MICHON
  2007-08-18  0:10   ` [kvm-devel] " Luca
  2007-08-18 15:17 ` [Qemu-devel] Re: [kvm-devel] " Anthony Liguori
  5 siblings, 1 reply; 53+ messages in thread
From: Christian MICHON @ 2007-08-17 23:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel

On 8/18/07, Luca Tettamanti <kronos.it@gmail.com> wrote:
> Hello,
> in reply to this mail I will send a serie of 4 patches that cleans up and
> expands the alarm timer handling in QEMU. Patches have been rebased on QEMU
> CVS.
>
> Patch 1 is mostly a cleanup of the existing code; instead of having multiple
> #ifdefs to handle different timers scattered all over the code I've created a
> modular infrastructure where each timer type is self-contained and generic code
> is more readable. The resulting code is functionally equivalent to the old one.
>
> Patch 2 implements the "-clock" command line option proposed by Daniel Berrange
> and Avi Kivity. By default QEMU tries RTC and then falls back to unix timer;
> user can override the order of the timer through this options. Syntax is pretty
> simple: -clock timer1,timer2,etc. (QEMU will pick the first one that works).
>
> Patch 3 adds support for HPET under Linux (which is basically my old patch). As
> suggested HPET takes precedence over other timers, but of course this can be
> overridden.
>
> Patch 4 introduces "dynticks" timer source; patch is mostly based on the work
> Dan Kenigsberg. dynticks is now the default alarm timer.
>
> Luca
> --
>
>
>

there's a typo line 1432 on vl.c after applying all 4 patches
(missing ';')

C:/msys/1.0/home/Xian/qemu/vl.c: In function `win32_start_timer':
C:/msys/1.0/home/Xian/qemu/vl.c:1432: error: syntax error before '}' token
make[1]: *** [vl.o] Error 1

beyond this small typo, I managed to include this in a win32 qemu build.
is there a specific practical test to see the improvement in a linux guest
when running on a windows host ?

-- 
Christian
--
http://detaolb.sourceforge.net/, a linux distribution for Qemu

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2
  2007-08-17 23:48 ` [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Christian MICHON
@ 2007-08-18  0:10   ` Luca
  0 siblings, 0 replies; 53+ messages in thread
From: Luca @ 2007-08-18  0:10 UTC (permalink / raw)
  To: Christian MICHON; +Cc: kvm-devel, qemu-devel

On 8/18/07, Christian MICHON <christian.michon@gmail.com> wrote:
> there's a typo line 1432 on vl.c after applying all 4 patches
> (missing ';')

Ops...

> beyond this small typo, I managed to include this in a win32 qemu build.
> is there a specific practical test to see the improvement in a linux guest
> when running on a windows host ?

The improvements - beyond the refactoring - are either specific to
Linux (HPET timer) or to UNIX in general (dynticks - POSIX timers are
used).
It may be possible to use one-shot timer on windows too, but I'm not
really familiar with win32 API.

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2
  2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
                   ` (4 preceding siblings ...)
  2007-08-17 23:48 ` [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Christian MICHON
@ 2007-08-18 15:17 ` Anthony Liguori
  2007-08-18 16:53   ` [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Dor Laor
  5 siblings, 1 reply; 53+ messages in thread
From: Anthony Liguori @ 2007-08-18 15:17 UTC (permalink / raw)
  To: Luca Tettamanti; +Cc: kvm-devel, qemu-devel


On Sat, 2007-08-18 at 01:11 +0200, Luca Tettamanti wrote:
> Hello,
> in reply to this mail I will send a serie of 4 patches that cleans up and
> expands the alarm timer handling in QEMU. Patches have been rebased on QEMU
> CVS.
> 
> Patch 1 is mostly a cleanup of the existing code; instead of having multiple
> #ifdefs to handle different timers scattered all over the code I've created a
> modular infrastructure where each timer type is self-contained and generic code
> is more readable. The resulting code is functionally equivalent to the old one.
> 
> Patch 2 implements the "-clock" command line option proposed by Daniel Berrange
> and Avi Kivity. By default QEMU tries RTC and then falls back to unix timer;
> user can override the order of the timer through this options. Syntax is pretty
> simple: -clock timer1,timer2,etc. (QEMU will pick the first one that works).
> 
> Patch 3 adds support for HPET under Linux (which is basically my old patch). As
> suggested HPET takes precedence over other timers, but of course this can be
> overridden.
> 
> Patch 4 introduces "dynticks" timer source; patch is mostly based on the work
> Dan Kenigsberg. dynticks is now the default alarm timer.

Why do you guard dynticks with #ifdef?  Is there any reason why you
wouldn't want to use dynticks?

Regards,

Anthony Liguori

> Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-18 15:17 ` [Qemu-devel] Re: [kvm-devel] " Anthony Liguori
@ 2007-08-18 16:53   ` Dor Laor
  2007-08-18 22:02     ` [Qemu-devel] " Luca Tettamanti
  0 siblings, 1 reply; 53+ messages in thread
From: Dor Laor @ 2007-08-18 16:53 UTC (permalink / raw)
  To: Anthony Liguori, Luca Tettamanti; +Cc: kvm-devel, qemu-devel

>> Hello,
>> in reply to this mail I will send a serie of 4 patches that cleans up
>and
>> expands the alarm timer handling in QEMU. Patches have been rebased
on
>QEMU
>> CVS.
>>
>> Patch 1 is mostly a cleanup of the existing code; instead of having
>multiple
>> #ifdefs to handle different timers scattered all over the code I've
>created a
>> modular infrastructure where each timer type is self-contained and
>generic code
>> is more readable. The resulting code is functionally equivalent to
the
>old one.
>>
>> Patch 2 implements the "-clock" command line option proposed by
Daniel
>Berrange
>> and Avi Kivity. By default QEMU tries RTC and then falls back to unix
>timer;
>> user can override the order of the timer through this options. Syntax
>is pretty
>> simple: -clock timer1,timer2,etc. (QEMU will pick the first one that
>works).
>>
>> Patch 3 adds support for HPET under Linux (which is basically my old
>patch). As
>> suggested HPET takes precedence over other timers, but of course this
>can be
>> overridden.
>>
>> Patch 4 introduces "dynticks" timer source; patch is mostly based on
>the work
>> Dan Kenigsberg. dynticks is now the default alarm timer.
>
>Why do you guard dynticks with #ifdef?  Is there any reason why you
>wouldn't want to use dynticks?

I think too that it's should be the default.
There is no regression in performance nor behaviour with this option.

We didn't test qemu dyn-tick with kernels that don't have
high-res-timer+dyn-tick.
In this case the dyn-tick minimum res will be 1msec. I believe it should
work ok since this
is the case without any dyn-tick.

So, I join Anthony's vote.

>
>Regards,
>
>Anthony Liguori
>
>> Luca
>
>
>-----------------------------------------------------------------------
-
>-
>This SF.net email is sponsored by: Splunk Inc.
>Still grepping through log files to find problems?  Stop.
>Now Search log events and configuration files using AJAX and a browser.
>Download your FREE copy of Splunk now >>  http://get.splunk.com/
>_______________________________________________
>kvm-devel mailing list
>kvm-devel@lists.sourceforge.net
>https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-18 16:53   ` [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Dor Laor
@ 2007-08-18 22:02     ` Luca Tettamanti
  2007-08-18 23:58       ` Anthony Liguori
  2007-08-19 16:52       ` [Qemu-devel] Re: [kvm-devel] " Luca
  0 siblings, 2 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-18 22:02 UTC (permalink / raw)
  To: kvm-devel; +Cc: qemu-devel

Dor Laor ha scritto: 
> >> Hello,
> >> in reply to this mail I will send a serie of 4 patches that cleans up
> >and
> >> expands the alarm timer handling in QEMU. Patches have been rebased
> on
> >QEMU
> >> CVS.
> >>
> >> Patch 1 is mostly a cleanup of the existing code; instead of having
> >multiple
> >> #ifdefs to handle different timers scattered all over the code I've
> >created a
> >> modular infrastructure where each timer type is self-contained and
> >generic code
> >> is more readable. The resulting code is functionally equivalent to
> the
> >old one.
> >>
> >> Patch 2 implements the "-clock" command line option proposed by
> Daniel
> >Berrange
> >> and Avi Kivity. By default QEMU tries RTC and then falls back to unix
> >timer;
> >> user can override the order of the timer through this options. Syntax
> >is pretty
> >> simple: -clock timer1,timer2,etc. (QEMU will pick the first one that
> >works).
> >>
> >> Patch 3 adds support for HPET under Linux (which is basically my old
> >patch). As
> >> suggested HPET takes precedence over other timers, but of course this
> >can be
> >> overridden.
> >>
> >> Patch 4 introduces "dynticks" timer source; patch is mostly based on
> >the work
> >> Dan Kenigsberg. dynticks is now the default alarm timer.
> >
> >Why do you guard dynticks with #ifdef?  Is there any reason why you
> >wouldn't want to use dynticks?
> 
> I think too that it's should be the default.
> There is no regression in performance nor behaviour with this option.

Ok, I've updated the patch. It was pretty easy to implement the same
feature for win32 (slightly tested inside a winxp VM).

> We didn't test qemu dyn-tick with kernels that don't have
> high-res-timer+dyn-tick.

I did ;)

> In this case the dyn-tick minimum res will be 1msec. I believe it should
> work ok since this is the case without any dyn-tick.

Actually minimum resolution depends on host HZ setting, but - yes -
essentially you have the same behaviour of the "unix" timer, plus the
overhead of reprogramming the timer. 

Add support for dynamic ticks.

If the the dynticks alarm timer is used qemu does not attempt to generate
SIGALRM at a constant rate. Rather, the system timer is set to generate SIGALRM
only when it is needed. Dynticks timer reduces the number of SIGALRMs sent to
idle dynamic-ticked guests.
Original patch from Dan Kenigsberg <dank@qumranet.com>

Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

---
 vl.c |  178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 170 insertions(+), 8 deletions(-)

Index: qemu/vl.c
===================================================================
--- qemu.orig/vl.c	2007-08-18 23:23:47.000000000 +0200
+++ qemu/vl.c	2007-08-18 23:23:53.000000000 +0200
@@ -784,12 +784,31 @@
 
 struct qemu_alarm_timer {
     char const *name;
+    unsigned int flags;
 
     int (*start)(struct qemu_alarm_timer *t);
     void (*stop)(struct qemu_alarm_timer *t);
+    void (*rearm)(struct qemu_alarm_timer *t);
     void *priv;
 };
 
+#define ALARM_FLAG_DYNTICKS  0x1
+
+static inline int alarm_has_dynticks(struct qemu_alarm_timer *t)
+{
+    return t->flags & ALARM_FLAG_DYNTICKS;
+}
+
+static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) {
+    if (!alarm_has_dynticks(t))
+        return;
+
+    t->rearm(t);
+}
+
+/* TODO: MIN_TIMER_REARM_US should be optimized */
+#define MIN_TIMER_REARM_US 250
+
 static struct qemu_alarm_timer *alarm_timer;
 
 #ifdef _WIN32
@@ -802,12 +821,17 @@
 
 static int win32_start_timer(struct qemu_alarm_timer *t);
 static void win32_stop_timer(struct qemu_alarm_timer *t);
+static void win32_rearm_timer(struct qemu_alarm_timer *t);
 
 #else
 
 static int unix_start_timer(struct qemu_alarm_timer *t);
 static void unix_stop_timer(struct qemu_alarm_timer *t);
 
+static int dynticks_start_timer(struct qemu_alarm_timer *t);
+static void dynticks_stop_timer(struct qemu_alarm_timer *t);
+static void dynticks_rearm_timer(struct qemu_alarm_timer *t);
+
 #ifdef __linux__
 
 static int hpet_start_timer(struct qemu_alarm_timer *t);
@@ -816,21 +840,23 @@
 static int rtc_start_timer(struct qemu_alarm_timer *t);
 static void rtc_stop_timer(struct qemu_alarm_timer *t);
 
-#endif
+#endif /* __linux__ */
 
 #endif /* _WIN32 */
 
 static struct qemu_alarm_timer alarm_timers[] = {
+#ifndef _WIN32
+    {"dynticks", ALARM_FLAG_DYNTICKS, dynticks_start_timer, dynticks_stop_timer, dynticks_rearm_timer, NULL},
 #ifdef __linux__
     /* HPET - if available - is preferred */
-    {"hpet", hpet_start_timer, hpet_stop_timer, NULL},
+    {"hpet", 0, hpet_start_timer, hpet_stop_timer, NULL, NULL},
     /* ...otherwise try RTC */
-    {"rtc", rtc_start_timer, rtc_stop_timer, NULL},
+    {"rtc", 0, rtc_start_timer, rtc_stop_timer, NULL, NULL},
 #endif
-#ifndef _WIN32
-    {"unix", unix_start_timer, unix_stop_timer, NULL},
+    {"unix", 0, unix_start_timer, unix_stop_timer, NULL, NULL},
 #else
-    {"win32", win32_start_timer, win32_stop_timer, &alarm_win32_data},
+    {"dynticks", ALARM_FLAG_DYNTICKS, win32_start_timer, win32_stop_timer, win32_rearm_timer, &alarm_win32_data},
+    {"win32", 0, win32_start_timer, win32_stop_timer, NULL, &alarm_win32_data},
 #endif
     {NULL, }
 };
@@ -949,6 +975,8 @@
         }
         pt = &t->next;
     }
+
+    qemu_rearm_alarm_timer(alarm_timer);
 }
 
 /* modify the current timer so that it will be fired when current_time
@@ -1008,6 +1036,7 @@
         /* run the callback (the timer list can be modified) */
         ts->cb(ts->opaque);
     }
+    qemu_rearm_alarm_timer(alarm_timer);
 }
 
 int64_t qemu_get_clock(QEMUClock *clock)
@@ -1115,7 +1144,8 @@
         last_clock = ti;
     }
 #endif
-    if (qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
+    if (alarm_has_dynticks(alarm_timer) ||
+        qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
                            qemu_get_clock(vm_clock)) ||
         qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME],
                            qemu_get_clock(rt_clock))) {
@@ -1136,6 +1166,27 @@
     }
 }
 
+static uint64_t qemu_next_deadline(void) {
+    uint64_t nearest_delta_us = ULLONG_MAX;
+    uint64_t vmdelta_us;
+
+    if (active_timers[QEMU_TIMER_REALTIME])
+        nearest_delta_us = (active_timers[QEMU_TIMER_REALTIME]->expire_time - qemu_get_clock(rt_clock))*1000;
+
+    if (active_timers[QEMU_TIMER_VIRTUAL]) {
+        /* round up */
+        vmdelta_us = (active_timers[QEMU_TIMER_VIRTUAL]->expire_time - qemu_get_clock(vm_clock)+999)/1000;
+        if (vmdelta_us < nearest_delta_us)
+            nearest_delta_us = vmdelta_us;
+    }
+
+    /* Avoid arming the timer to negative, zero, or too low values */
+    if (nearest_delta_us <= MIN_TIMER_REARM_US)
+        nearest_delta_us = MIN_TIMER_REARM_US;
+
+    return nearest_delta_us;
+}
+
 #ifndef _WIN32
 
 #if defined(__linux__)
@@ -1243,6 +1294,80 @@
 
 #endif /* !defined(__linux__) */
 
+static int dynticks_start_timer(struct qemu_alarm_timer *t)
+{
+    struct sigevent ev;
+    timer_t host_timer;
+    struct sigaction act;
+
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0;
+#if defined(TARGET_I386) && defined(USE_CODE_COPY)
+    act.sa_flags |= SA_ONSTACK;
+#endif
+    act.sa_handler = host_alarm_handler;
+
+    sigaction(SIGALRM, &act, NULL);
+
+    ev.sigev_value.sival_int = 0;
+    ev.sigev_notify = SIGEV_SIGNAL;
+    ev.sigev_signo = SIGALRM;
+
+    if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) {
+        perror("timer_create");
+
+        /* disable dynticks */
+        fprintf(stderr, "Dynamic Ticks disabled\n");
+
+        return -1;
+    }
+
+    t->priv = (void *)host_timer;
+
+    return 0;
+}
+
+static void dynticks_stop_timer(struct qemu_alarm_timer *t)
+{
+    timer_t host_timer = (timer_t)t->priv;
+
+    timer_delete(host_timer);
+}
+
+static void dynticks_rearm_timer(struct qemu_alarm_timer *t)
+{
+    timer_t host_timer = (timer_t)t->priv;
+    struct itimerspec timeout;
+    int64_t nearest_delta_us = INT64_MAX;
+    int64_t current_us;
+
+    if (!active_timers[QEMU_TIMER_REALTIME] &&
+                !active_timers[QEMU_TIMER_VIRTUAL])
+            return;
+
+    nearest_delta_us = qemu_next_deadline();
+
+    /* check whether a timer is already running */
+    if (timer_gettime(host_timer, &timeout)) {
+        perror("gettime");
+        fprintf(stderr, "Internal timer error: aborting\n");
+        exit(1);
+    }
+    current_us = timeout.it_value.tv_sec * 1000000 + timeout.it_value.tv_nsec/1000;
+    if (current_us && current_us <= nearest_delta_us)
+        return;
+
+    timeout.it_interval.tv_sec = 0;
+    timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
+    timeout.it_value.tv_sec =  nearest_delta_us / 1000000;
+    timeout.it_value.tv_nsec = (nearest_delta_us % 1000000) * 1000;
+    if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) {
+        perror("settime");
+        fprintf(stderr, "Internal timer error: aborting\n");
+        exit(1);
+    }
+}
+
 static int unix_start_timer(struct qemu_alarm_timer *t)
 {
     struct sigaction act;
@@ -1288,6 +1413,7 @@
 {
     TIMECAPS tc;
     struct qemu_alarm_win32 *data = t->priv;
+    UINT flags;
 
     data->host_alarm = CreateEvent(NULL, FALSE, FALSE, NULL);
     if (!data->host_alarm) {
@@ -1304,11 +1430,17 @@
 
     timeBeginPeriod(data->period);
 
+    flags = TIME_CALLBACK_FUNCTION;
+    if (alarm_has_dynticks(t))
+        flags |= TIME_ONESHOT;
+    else
+        flags |= TIME_PERIODIC;
+
     data->timerId = timeSetEvent(1,         // interval (ms)
                         data->period,       // resolution
                         host_alarm_handler, // function
                         (DWORD)t,           // parameter
-                        TIME_PERIODIC | TIME_CALLBACK_FUNCTION);
+                        flags);
 
     if (!data->timerId) {
         perror("Failed to initialize win32 alarm timer");
@@ -1333,6 +1465,35 @@
     CloseHandle(data->host_alarm);
 }
 
+static void win32_rearm_timer(struct qemu_alarm_timer *t)
+{
+    struct qemu_alarm_win32 *data = t->priv;
+    uint64_t nearest_delta_us;
+
+    if (!active_timers[QEMU_TIMER_REALTIME] &&
+                !active_timers[QEMU_TIMER_VIRTUAL])
+            return;
+
+    nearest_delta_us = qemu_next_deadline();
+    nearest_delta_us /= 1000;
+
+    timeKillEvent(data->timerId);
+
+    data->timerId = timeSetEvent(1,
+                        data->period,
+                        host_alarm_handler,
+                        (DWORD)t,
+                        TIME_ONESHOT | TIME_PERIODIC);
+
+    if (!data->timerId) {
+        perror("Failed to re-arm win32 alarm timer");
+
+        timeEndPeriod(data->period);
+        CloseHandle(data->host_alarm);
+        exit(1);
+    }
+}
+
 #endif /* _WIN32 */
 
 static void init_timer_alarm(void)
@@ -6491,6 +6652,7 @@
         cpu_enable_ticks();
         vm_running = 1;
         vm_state_notify(1);
+        qemu_rearm_alarm_timer(alarm_timer);
     }
 }
 

Luca
-- 
Let me make your mind, leave yourself behind
Be not afraid

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-18 22:02     ` [Qemu-devel] " Luca Tettamanti
@ 2007-08-18 23:58       ` Anthony Liguori
  2007-08-19  7:36         ` [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure -take2 Dor Laor
  2007-08-19  8:24         ` [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Avi Kivity
  2007-08-19 16:52       ` [Qemu-devel] Re: [kvm-devel] " Luca
  1 sibling, 2 replies; 53+ messages in thread
From: Anthony Liguori @ 2007-08-18 23:58 UTC (permalink / raw)
  To: Luca Tettamanti; +Cc: kvm-devel, qemu-devel

I think this is a really nice and important patch set.  Just a couple
things:

On Sun, 2007-08-19 at 00:02 +0200, Luca Tettamanti wrote:

> > In this case the dyn-tick minimum res will be 1msec. I believe it should
> > work ok since this is the case without any dyn-tick.
> 
> Actually minimum resolution depends on host HZ setting, but - yes -
> essentially you have the same behaviour of the "unix" timer, plus the
> overhead of reprogramming the timer. 

Is this significant?  At a high guest HZ, this is could be quite a lot
of additional syscalls right?

> Add support for dynamic ticks.
> 
> If the the dynticks alarm timer is used qemu does not attempt to generate
> SIGALRM at a constant rate. Rather, the system timer is set to generate SIGALRM
> only when it is needed. Dynticks timer reduces the number of SIGALRMs sent to
> idle dynamic-ticked guests.
> Original patch from Dan Kenigsberg <dank@qumranet.com>
> 
> Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
> 
> ---
>  vl.c |  178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 170 insertions(+), 8 deletions(-)
> 
> Index: qemu/vl.c
> ===================================================================
> --- qemu.orig/vl.c	2007-08-18 23:23:47.000000000 +0200
> +++ qemu/vl.c	2007-08-18 23:23:53.000000000 +0200
> @@ -784,12 +784,31 @@
>  
>  struct qemu_alarm_timer {
>      char const *name;
> +    unsigned int flags;
>  
>      int (*start)(struct qemu_alarm_timer *t);
>      void (*stop)(struct qemu_alarm_timer *t);
> +    void (*rearm)(struct qemu_alarm_timer *t);
>      void *priv;
>  };
>  
> +#define ALARM_FLAG_DYNTICKS  0x1
> +
> +static inline int alarm_has_dynticks(struct qemu_alarm_timer *t)
> +{
> +    return t->flags & ALARM_FLAG_DYNTICKS;
> +}
> +
> +static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) {

The '{' should be on the next line.

The rest looks fine.

Regards,

Anthony Liguori

> +    if (!alarm_has_dynticks(t))
> +        return;
> +
> +    t->rearm(t);
> +}
> +
> +/* TODO: MIN_TIMER_REARM_US should be optimized */
> +#define MIN_TIMER_REARM_US 250
> +
>  static struct qemu_alarm_timer *alarm_timer;
>  
>  #ifdef _WIN32
> @@ -802,12 +821,17 @@
>  
>  static int win32_start_timer(struct qemu_alarm_timer *t);
>  static void win32_stop_timer(struct qemu_alarm_timer *t);
> +static void win32_rearm_timer(struct qemu_alarm_timer *t);
>  
>  #else
>  
>  static int unix_start_timer(struct qemu_alarm_timer *t);
>  static void unix_stop_timer(struct qemu_alarm_timer *t);
>  
> +static int dynticks_start_timer(struct qemu_alarm_timer *t);
> +static void dynticks_stop_timer(struct qemu_alarm_timer *t);
> +static void dynticks_rearm_timer(struct qemu_alarm_timer *t);
> +
>  #ifdef __linux__
>  
>  static int hpet_start_timer(struct qemu_alarm_timer *t);
> @@ -816,21 +840,23 @@
>  static int rtc_start_timer(struct qemu_alarm_timer *t);
>  static void rtc_stop_timer(struct qemu_alarm_timer *t);
>  
> -#endif
> +#endif /* __linux__ */
>  
>  #endif /* _WIN32 */
>  
>  static struct qemu_alarm_timer alarm_timers[] = {
> +#ifndef _WIN32
> +    {"dynticks", ALARM_FLAG_DYNTICKS, dynticks_start_timer, dynticks_stop_timer, dynticks_rearm_timer, NULL},
>  #ifdef __linux__
>      /* HPET - if available - is preferred */
> -    {"hpet", hpet_start_timer, hpet_stop_timer, NULL},
> +    {"hpet", 0, hpet_start_timer, hpet_stop_timer, NULL, NULL},
>      /* ...otherwise try RTC */
> -    {"rtc", rtc_start_timer, rtc_stop_timer, NULL},
> +    {"rtc", 0, rtc_start_timer, rtc_stop_timer, NULL, NULL},
>  #endif
> -#ifndef _WIN32
> -    {"unix", unix_start_timer, unix_stop_timer, NULL},
> +    {"unix", 0, unix_start_timer, unix_stop_timer, NULL, NULL},
>  #else
> -    {"win32", win32_start_timer, win32_stop_timer, &alarm_win32_data},
> +    {"dynticks", ALARM_FLAG_DYNTICKS, win32_start_timer, win32_stop_timer, win32_rearm_timer, &alarm_win32_data},
> +    {"win32", 0, win32_start_timer, win32_stop_timer, NULL, &alarm_win32_data},
>  #endif
>      {NULL, }
>  };
> @@ -949,6 +975,8 @@
>          }
>          pt = &t->next;
>      }
> +
> +    qemu_rearm_alarm_timer(alarm_timer);
>  }
>  
>  /* modify the current timer so that it will be fired when current_time
> @@ -1008,6 +1036,7 @@
>          /* run the callback (the timer list can be modified) */
>          ts->cb(ts->opaque);
>      }
> +    qemu_rearm_alarm_timer(alarm_timer);
>  }
>  
>  int64_t qemu_get_clock(QEMUClock *clock)
> @@ -1115,7 +1144,8 @@
>          last_clock = ti;
>      }
>  #endif
> -    if (qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
> +    if (alarm_has_dynticks(alarm_timer) ||
> +        qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
>                             qemu_get_clock(vm_clock)) ||
>          qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME],
>                             qemu_get_clock(rt_clock))) {
> @@ -1136,6 +1166,27 @@
>      }
>  }
>  
> +static uint64_t qemu_next_deadline(void) {
> +    uint64_t nearest_delta_us = ULLONG_MAX;
> +    uint64_t vmdelta_us;
> +
> +    if (active_timers[QEMU_TIMER_REALTIME])
> +        nearest_delta_us = (active_timers[QEMU_TIMER_REALTIME]->expire_time - qemu_get_clock(rt_clock))*1000;
> +
> +    if (active_timers[QEMU_TIMER_VIRTUAL]) {
> +        /* round up */
> +        vmdelta_us = (active_timers[QEMU_TIMER_VIRTUAL]->expire_time - qemu_get_clock(vm_clock)+999)/1000;
> +        if (vmdelta_us < nearest_delta_us)
> +            nearest_delta_us = vmdelta_us;
> +    }
> +
> +    /* Avoid arming the timer to negative, zero, or too low values */
> +    if (nearest_delta_us <= MIN_TIMER_REARM_US)
> +        nearest_delta_us = MIN_TIMER_REARM_US;
> +
> +    return nearest_delta_us;
> +}
> +
>  #ifndef _WIN32
>  
>  #if defined(__linux__)
> @@ -1243,6 +1294,80 @@
>  
>  #endif /* !defined(__linux__) */
>  
> +static int dynticks_start_timer(struct qemu_alarm_timer *t)
> +{
> +    struct sigevent ev;
> +    timer_t host_timer;
> +    struct sigaction act;
> +
> +    sigfillset(&act.sa_mask);
> +    act.sa_flags = 0;
> +#if defined(TARGET_I386) && defined(USE_CODE_COPY)
> +    act.sa_flags |= SA_ONSTACK;
> +#endif
> +    act.sa_handler = host_alarm_handler;
> +
> +    sigaction(SIGALRM, &act, NULL);
> +
> +    ev.sigev_value.sival_int = 0;
> +    ev.sigev_notify = SIGEV_SIGNAL;
> +    ev.sigev_signo = SIGALRM;
> +
> +    if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) {
> +        perror("timer_create");
> +
> +        /* disable dynticks */
> +        fprintf(stderr, "Dynamic Ticks disabled\n");
> +
> +        return -1;
> +    }
> +
> +    t->priv = (void *)host_timer;
> +
> +    return 0;
> +}
> +
> +static void dynticks_stop_timer(struct qemu_alarm_timer *t)
> +{
> +    timer_t host_timer = (timer_t)t->priv;
> +
> +    timer_delete(host_timer);
> +}
> +
> +static void dynticks_rearm_timer(struct qemu_alarm_timer *t)
> +{
> +    timer_t host_timer = (timer_t)t->priv;
> +    struct itimerspec timeout;
> +    int64_t nearest_delta_us = INT64_MAX;
> +    int64_t current_us;
> +
> +    if (!active_timers[QEMU_TIMER_REALTIME] &&
> +                !active_timers[QEMU_TIMER_VIRTUAL])
> +            return;
> +
> +    nearest_delta_us = qemu_next_deadline();
> +
> +    /* check whether a timer is already running */
> +    if (timer_gettime(host_timer, &timeout)) {
> +        perror("gettime");
> +        fprintf(stderr, "Internal timer error: aborting\n");
> +        exit(1);
> +    }
> +    current_us = timeout.it_value.tv_sec * 1000000 + timeout.it_value.tv_nsec/1000;
> +    if (current_us && current_us <= nearest_delta_us)
> +        return;
> +
> +    timeout.it_interval.tv_sec = 0;
> +    timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */
> +    timeout.it_value.tv_sec =  nearest_delta_us / 1000000;
> +    timeout.it_value.tv_nsec = (nearest_delta_us % 1000000) * 1000;
> +    if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) {
> +        perror("settime");
> +        fprintf(stderr, "Internal timer error: aborting\n");
> +        exit(1);
> +    }
> +}
> +
>  static int unix_start_timer(struct qemu_alarm_timer *t)
>  {
>      struct sigaction act;
> @@ -1288,6 +1413,7 @@
>  {
>      TIMECAPS tc;
>      struct qemu_alarm_win32 *data = t->priv;
> +    UINT flags;
>  
>      data->host_alarm = CreateEvent(NULL, FALSE, FALSE, NULL);
>      if (!data->host_alarm) {
> @@ -1304,11 +1430,17 @@
>  
>      timeBeginPeriod(data->period);
>  
> +    flags = TIME_CALLBACK_FUNCTION;
> +    if (alarm_has_dynticks(t))
> +        flags |= TIME_ONESHOT;
> +    else
> +        flags |= TIME_PERIODIC;
> +
>      data->timerId = timeSetEvent(1,         // interval (ms)
>                          data->period,       // resolution
>                          host_alarm_handler, // function
>                          (DWORD)t,           // parameter
> -                        TIME_PERIODIC | TIME_CALLBACK_FUNCTION);
> +                        flags);
>  
>      if (!data->timerId) {
>          perror("Failed to initialize win32 alarm timer");
> @@ -1333,6 +1465,35 @@
>      CloseHandle(data->host_alarm);
>  }
>  
> +static void win32_rearm_timer(struct qemu_alarm_timer *t)
> +{
> +    struct qemu_alarm_win32 *data = t->priv;
> +    uint64_t nearest_delta_us;
> +
> +    if (!active_timers[QEMU_TIMER_REALTIME] &&
> +                !active_timers[QEMU_TIMER_VIRTUAL])
> +            return;
> +
> +    nearest_delta_us = qemu_next_deadline();
> +    nearest_delta_us /= 1000;
> +
> +    timeKillEvent(data->timerId);
> +
> +    data->timerId = timeSetEvent(1,
> +                        data->period,
> +                        host_alarm_handler,
> +                        (DWORD)t,
> +                        TIME_ONESHOT | TIME_PERIODIC);
> +
> +    if (!data->timerId) {
> +        perror("Failed to re-arm win32 alarm timer");
> +
> +        timeEndPeriod(data->period);
> +        CloseHandle(data->host_alarm);
> +        exit(1);
> +    }
> +}
> +
>  #endif /* _WIN32 */
>  
>  static void init_timer_alarm(void)
> @@ -6491,6 +6652,7 @@
>          cpu_enable_ticks();
>          vm_running = 1;
>          vm_state_notify(1);
> +        qemu_rearm_alarm_timer(alarm_timer);
>      }
>  }
>  
> 
> Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure -take2
  2007-08-18 23:58       ` Anthony Liguori
@ 2007-08-19  7:36         ` Dor Laor
  2007-08-19  8:24         ` [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Avi Kivity
  1 sibling, 0 replies; 53+ messages in thread
From: Dor Laor @ 2007-08-19  7:36 UTC (permalink / raw)
  To: Anthony Liguori, Luca Tettamanti; +Cc: kvm-devel, qemu-devel

>I think this is a really nice and important patch set.  Just a couple
>things:
>
>On Sun, 2007-08-19 at 00:02 +0200, Luca Tettamanti wrote:
>
>> > In this case the dyn-tick minimum res will be 1msec. I believe it
>should
>> > work ok since this is the case without any dyn-tick.
>>
>> Actually minimum resolution depends on host HZ setting, but - yes -
>> essentially you have the same behaviour of the "unix" timer, plus the
>> overhead of reprogramming the timer.
>
>Is this significant?  At a high guest HZ, this is could be quite a lot
>of additional syscalls right?
>

I believe it's no significant since without dyn-tick the guest will get
the 
same amount of signals so the overhead is doubling the syscalls (not a 
magnitude bigger).

On the other size guests with low HZ and linux guests with dyn-tick will
enojy
from lesser syscalls.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-18 23:58       ` Anthony Liguori
  2007-08-19  7:36         ` [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure -take2 Dor Laor
@ 2007-08-19  8:24         ` Avi Kivity
  2007-08-19 13:10           ` Jamie Lokier
  1 sibling, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-19  8:24 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel, Luca Tettamanti, qemu-devel

Anthony Liguori wrote:
> I think this is a really nice and important patch set.  Just a couple
> things:
>
> On Sun, 2007-08-19 at 00:02 +0200, Luca Tettamanti wrote:
>
>   
>>> In this case the dyn-tick minimum res will be 1msec. I believe it should
>>> work ok since this is the case without any dyn-tick.
>>>       
>> Actually minimum resolution depends on host HZ setting, but - yes -
>> essentially you have the same behaviour of the "unix" timer, plus the
>> overhead of reprogramming the timer. 
>>     
>
> Is this significant?  At a high guest HZ, this is could be quite a lot
> of additional syscalls right?
>
>   

At HZ=1000, this adds a small multiple of 1000 syscalls, which is a 
fairly small overhead.  At HZ>1000 (only possible with a dyntick guest), 
the current implementation doesn't cope at all, so we can't compare 
overhead.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19  8:24         ` [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Avi Kivity
@ 2007-08-19 13:10           ` Jamie Lokier
  2007-08-19 13:48             ` [kvm-devel] [Qemu-devel] " Avi Kivity
  0 siblings, 1 reply; 53+ messages in thread
From: Jamie Lokier @ 2007-08-19 13:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Luca Tettamanti

Avi Kivity wrote:
> >>>In this case the dyn-tick minimum res will be 1msec. I believe it should
> >>>work ok since this is the case without any dyn-tick.
> >>>      
> >>Actually minimum resolution depends on host HZ setting, but - yes -
> >>essentially you have the same behaviour of the "unix" timer, plus the
> >>overhead of reprogramming the timer. 
> >>    
> >
> >Is this significant?  At a high guest HZ, this is could be quite a lot
> >of additional syscalls right?
> >
> At HZ=1000, this adds a small multiple of 1000 syscalls, which is a 
> fairly small overhead.

Small, but maybe measurable.

That overhead could be removed if the dyn-tick code were to
predictively set the host timer into a repeating mode when guests do
actually require a regular tick.

I'm thinking when it detects it needed a tick a small number of times
in a row, with the same interval, it could set the host timer to
trigger repeatedly at that interval.  Then it wouldn't need to reprogram
if that stayed the same (except maybe to correct for drift?)

If a tick then _wasn't_ required for that interval (i.e. it was
required for less, more, or not at all), then it would have to
reprogram the host timer.  If it really mattered, it wouldn't have to
reprogram the host timer when the next required tick is further in the
future or not at all; it would simply be a redundant SIGALRM.  In
weird cases that's worthwhile, but I suspect it generally isn't.

-- Jamie

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 13:10           ` Jamie Lokier
@ 2007-08-19 13:48             ` Avi Kivity
  2007-08-19 13:57               ` Paul Brook
  0 siblings, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-19 13:48 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: kvm-devel, qemu-devel

Jamie Lokier wrote:
> Avi Kivity wrote:
>   
>>>>> In this case the dyn-tick minimum res will be 1msec. I believe it should
>>>>> work ok since this is the case without any dyn-tick.
>>>>>      
>>>>>           
>>>> Actually minimum resolution depends on host HZ setting, but - yes -
>>>> essentially you have the same behaviour of the "unix" timer, plus the
>>>> overhead of reprogramming the timer. 
>>>>    
>>>>         
>>> Is this significant?  At a high guest HZ, this is could be quite a lot
>>> of additional syscalls right?
>>>
>>>       
>> At HZ=1000, this adds a small multiple of 1000 syscalls, which is a 
>> fairly small overhead.
>>     
>
> Small, but maybe measurable.
>
> That overhead could be removed if the dyn-tick code were to
> predictively set the host timer into a repeating mode when guests do
> actually require a regular tick.
>
> I'm thinking when it detects it needed a tick a small number of times
> in a row, with the same interval, it could set the host timer to
> trigger repeatedly at that interval.  Then it wouldn't need to reprogram
> if that stayed the same (except maybe to correct for drift?)
>
> If a tick then _wasn't_ required for that interval (i.e. it was
> required for less, more, or not at all), then it would have to
> reprogram the host timer.  If it really mattered, it wouldn't have to
> reprogram the host timer when the next required tick is further in the
> future or not at all; it would simply be a redundant SIGALRM.  In
> weird cases that's worthwhile, but I suspect it generally isn't.
>
>   

Yes, good thinking, but this should only be done if it actually impacts 
something.  Reducing overhead from 0.1% to 0.05% is not worthwhile if it 
introduces extra complexity.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 13:48             ` [kvm-devel] [Qemu-devel] " Avi Kivity
@ 2007-08-19 13:57               ` Paul Brook
  2007-08-19 14:07                 ` Avi Kivity
  2007-08-19 17:15                 ` Jamie Lokier
  0 siblings, 2 replies; 53+ messages in thread
From: Paul Brook @ 2007-08-19 13:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel

> Yes, good thinking, but this should only be done if it actually impacts
> something.  Reducing overhead from 0.1% to 0.05% is not worthwhile if it
> introduces extra complexity.


If the overhead is that small, why are we touching this code in the first 
place?

Paul

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 13:57               ` Paul Brook
@ 2007-08-19 14:07                 ` Avi Kivity
  2007-08-19 14:27                   ` Dor Laor
  2007-08-20  9:25                   ` Avi Kivity
  2007-08-19 17:15                 ` Jamie Lokier
  1 sibling, 2 replies; 53+ messages in thread
From: Avi Kivity @ 2007-08-19 14:07 UTC (permalink / raw)
  To: Paul Brook; +Cc: kvm-devel, qemu-devel

Paul Brook wrote:
>> Yes, good thinking, but this should only be done if it actually impacts
>> something.  Reducing overhead from 0.1% to 0.05% is not worthwhile if it
>> introduces extra complexity.
>>     
>
>
> If the overhead is that small, why are we touching this code in the first 
> place?
>   

Accuracy is much more important from my point of view.  Also, the 
reduction in the number of signals delivered when the guest uses 100Hz 
is significant.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 14:07                 ` Avi Kivity
@ 2007-08-19 14:27                   ` Dor Laor
  2007-08-20  9:25                   ` Avi Kivity
  1 sibling, 0 replies; 53+ messages in thread
From: Dor Laor @ 2007-08-19 14:27 UTC (permalink / raw)
  To: Avi Kivity, Paul Brook; +Cc: kvm-devel, qemu-devel

>>> Yes, good thinking, but this should only be done if it actually
>impacts
>>> something.  Reducing overhead from 0.1% to 0.05% is not worthwhile
if
>it
>>> introduces extra complexity.
>>>
>>
>>
>> If the overhead is that small, why are we touching this code in the
>first
>> place?
>>
>
>Accuracy is much more important from my point of view.  Also, the
>reduction in the number of signals delivered when the guest uses 100Hz
>is significant.
>

Actually the main motivation for dyn-tick in qemu was to enable smooth
fix of
time drift problem in the guests. On kernels without dyn-tick enabled
sigalarm
didn't reach qemu on accurate times, thus caused convergence of several
pit irqs 
into one.

Currently we use a time-drift-fix written by Uri Lublin which counts the
number
of irq acked by the guest and tries to inject another one immidietly
until the drift
is fixed.

This worked fine but caused jumps for multimedia applications.
We wanted a way to smooth it by increasing the rate of pit timer in case
of drift
to a frequency higher than 1000HZ until the drift is fixed.

Dan K. tries to test application behviour with it.
-- Dor.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-18 22:02     ` [Qemu-devel] " Luca Tettamanti
  2007-08-18 23:58       ` Anthony Liguori
@ 2007-08-19 16:52       ` Luca
  2007-08-19 19:31         ` Avi Kivity
  1 sibling, 1 reply; 53+ messages in thread
From: Luca @ 2007-08-19 16:52 UTC (permalink / raw)
  To: kvm-devel; +Cc: qemu-devel

On 8/19/07, Luca Tettamanti <kronos.it@gmail.com> wrote:
> +static uint64_t qemu_next_deadline(void) {
> +    uint64_t nearest_delta_us = ULLONG_MAX;
> +    uint64_t vmdelta_us;

Hum, I introduced a bug here... those vars should be signed.

On the overhead introduced: how do you measure it?

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 13:57               ` Paul Brook
  2007-08-19 14:07                 ` Avi Kivity
@ 2007-08-19 17:15                 ` Jamie Lokier
  2007-08-19 19:29                   ` [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarmtimer " Dor Laor
  2007-08-19 19:30                   ` [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer " Avi Kivity
  1 sibling, 2 replies; 53+ messages in thread
From: Jamie Lokier @ 2007-08-19 17:15 UTC (permalink / raw)
  To: Paul Brook; +Cc: kvm-devel, qemu-devel

Paul Brook wrote:
> > Yes, good thinking, but this should only be done if it actually impacts
> > something.  Reducing overhead from 0.1% to 0.05% is not worthwhile if it
> > introduces extra complexity.
> 
> If the overhead is that small, why are we touching this code in the first 
> place?

Insightful.

A benchmark result was posted which is rather interesting:

>[mkent@localhost ~]$ time ./hackbench 50
>x86_64 host                 : real 0m10.845s
>x86_64 host, bound to 1 cpu : real 0m21.884s
>i386 guest+unix clock       : real 0m49.206s
>i386 guest+hpet clock       : real 0m48.292s
>i386 guest+dynticks clock   : real 0m28.835s
>
>Results are repeatable and verfied with a stopwatch because I didn't
>believe them at first :)

I am surprised if 1000 redundant SIGALRMs per second is really causing
70% overhead in normal qemu execution, except on a rather old or slow
machine where signal delivery is very slow.

It would be good to understand the cause of that benchmark result.

-- Jamie

^ permalink raw reply	[flat|nested] 53+ messages in thread

* RE: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarmtimer infrastrucure - take2
  2007-08-19 17:15                 ` Jamie Lokier
@ 2007-08-19 19:29                   ` Dor Laor
  2007-08-19 19:30                   ` [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer " Avi Kivity
  1 sibling, 0 replies; 53+ messages in thread
From: Dor Laor @ 2007-08-19 19:29 UTC (permalink / raw)
  To: Jamie Lokier, Paul Brook; +Cc: kvm-devel, Avi Kivity, qemu-devel

>Paul Brook wrote:
>> > Yes, good thinking, but this should only be done if it actually
>impacts
>> > something.  Reducing overhead from 0.1% to 0.05% is not worthwhile
>if it
>> > introduces extra complexity.
>>
>> If the overhead is that small, why are we touching this code in the
>first
>> place?
>
>Insightful.
>
>A benchmark result was posted which is rather interesting:
>
>>[mkent@localhost ~]$ time ./hackbench 50
>>x86_64 host                 : real 0m10.845s
>>x86_64 host, bound to 1 cpu : real 0m21.884s
>>i386 guest+unix clock       : real 0m49.206s
>>i386 guest+hpet clock       : real 0m48.292s
>>i386 guest+dynticks clock   : real 0m28.835s
>>
>>Results are repeatable and verfied with a stopwatch because I didn't
>>believe them at first :)
>
>I am surprised if 1000 redundant SIGALRMs per second is really causing
>70% overhead in normal qemu execution, except on a rather old or slow
>machine where signal delivery is very slow.
>
>It would be good to understand the cause of that benchmark result.

while I don't know the benchmark [I head it's something like paralled
chat messaging, 
the performance gain is probably achieved by improved latency and
response times that
the dyn-tick provides.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 17:15                 ` Jamie Lokier
  2007-08-19 19:29                   ` [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarmtimer " Dor Laor
@ 2007-08-19 19:30                   ` Avi Kivity
  1 sibling, 0 replies; 53+ messages in thread
From: Avi Kivity @ 2007-08-19 19:30 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: kvm-devel, Paul Brook, qemu-devel

Jamie Lokier wrote:
> Paul Brook wrote:
>   
>>> Yes, good thinking, but this should only be done if it actually impacts
>>> something.  Reducing overhead from 0.1% to 0.05% is not worthwhile if it
>>> introduces extra complexity.
>>>       
>> If the overhead is that small, why are we touching this code in the first 
>> place?
>>     
>
> Insightful.
>
> A benchmark result was posted which is rather interesting:
>
>   
>> [mkent@localhost ~]$ time ./hackbench 50
>> x86_64 host                 : real 0m10.845s
>> x86_64 host, bound to 1 cpu : real 0m21.884s
>> i386 guest+unix clock       : real 0m49.206s
>> i386 guest+hpet clock       : real 0m48.292s
>> i386 guest+dynticks clock   : real 0m28.835s
>>
>> Results are repeatable and verfied with a stopwatch because I didn't
>> believe them at first :)
>>     
>
> I am surprised if 1000 redundant SIGALRMs per second is really causing
> 70% overhead in normal qemu execution, except on a rather old or slow
> machine where signal delivery is very slow.
>   
No, something else is happening here.  On kvm, a signal is maybe 10us,
to the SIGALRMs represent 1% of overhead.  Respectable, but not more.

I suspect that the increased accuracy causes something to behave better
(or just differently).  This won't be representative.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 16:52       ` [Qemu-devel] Re: [kvm-devel] " Luca
@ 2007-08-19 19:31         ` Avi Kivity
  2007-08-20 21:20           ` Luca Tettamanti
  0 siblings, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-19 19:31 UTC (permalink / raw)
  To: Luca; +Cc: kvm-devel, qemu-devel

Luca wrote:
> On 8/19/07, Luca Tettamanti <kronos.it@gmail.com> wrote:
>   
>> +static uint64_t qemu_next_deadline(void) {
>> +    uint64_t nearest_delta_us = ULLONG_MAX;
>> +    uint64_t vmdelta_us;
>>     
>
> Hum, I introduced a bug here... those vars should be signed.
>
> On the overhead introduced: how do you measure it?
>
>   

Run a 100Hz guest, measure cpu usage using something accurate like
cyclesoak, with and without dynticks, with and without kvm.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 14:07                 ` Avi Kivity
  2007-08-19 14:27                   ` Dor Laor
@ 2007-08-20  9:25                   ` Avi Kivity
  1 sibling, 0 replies; 53+ messages in thread
From: Avi Kivity @ 2007-08-20  9:25 UTC (permalink / raw)
  To: Paul Brook; +Cc: kvm-devel, qemu-devel

Avi Kivity wrote:
> Paul Brook wrote:
>>> Yes, good thinking, but this should only be done if it actually impacts
>>> something.  Reducing overhead from 0.1% to 0.05% is not worthwhile 
>>> if it
>>> introduces extra complexity.
>>>     
>>
>>
>> If the overhead is that small, why are we touching this code in the 
>> first place?
>>   
>
> Accuracy is much more important from my point of view.  Also, the 
> reduction in the number of signals delivered when the guest uses 100Hz 
> is significant.
>

You'd also get better battery life on laptops.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-19 19:31         ` Avi Kivity
@ 2007-08-20 21:20           ` Luca Tettamanti
  2007-08-20 21:55             ` malc
  2007-08-21 12:09             ` [Qemu-devel] Re: [kvm-devel] " Avi Kivity
  0 siblings, 2 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-20 21:20 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, qemu-devel

Il Sun, Aug 19, 2007 at 10:31:26PM +0300, Avi Kivity ha scritto: 
> Luca wrote:
> > On 8/19/07, Luca Tettamanti <kronos.it@gmail.com> wrote:
> >   
> >> +static uint64_t qemu_next_deadline(void) {
> >> +    uint64_t nearest_delta_us = ULLONG_MAX;
> >> +    uint64_t vmdelta_us;
> >>     
> >
> > Hum, I introduced a bug here... those vars should be signed.
> >
> > On the overhead introduced: how do you measure it?
> >
> >   
> 
> Run a 100Hz guest, measure cpu usage using something accurate like
> cyclesoak, with and without dynticks, with and without kvm.

Ok, here I've measured the CPU usage on the host when running an idle
guest.

At 100Hz

QEMU
hpet            4.8%
dynticks        5.1%

Note: I've taken the mean over a period of 20 secs, but the difference
between hpet and dynticks is well inside the variability of the test.

KVM
hpet            2.2%
dynticks        1.0%

Hum... here the numbers jumps a bit, but dynticks is always below hpet.

At 1000Hz:

QEMU
hpet            5.5%
dynticks       11.7%

KVM
hpet            3.4%
dynticks        7.3%

No surprises here, you can see the additional 1k syscalls per second. On
the bright side, keep in mind that with a tickless guest and dynticks
I've seen as little as 50-60 timer ticks per second.

Hackbench (hackbench -pipe 50) inside the guest:

QEMU: impossible to measure, the variance of the results is much bigger
than difference between dynticks and hpet.

KVM: 
Around 0.8s slower in case on dynticks; variance of the results is
about 0.3s in both cases.

Luca
-- 
"Chi parla in tono cortese, ma continua a prepararsi, potra` andare avanti;
 chi parla in tono bellicoso e avanza rapidamente dovra` ritirarsi" 
Sun Tzu -- L'arte della guerra

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-20 21:20           ` Luca Tettamanti
@ 2007-08-20 21:55             ` malc
  2007-08-20 22:49               ` [kvm-devel] [Qemu-devel] " Luca
  2007-08-21 12:09             ` [Qemu-devel] Re: [kvm-devel] " Avi Kivity
  1 sibling, 1 reply; 53+ messages in thread
From: malc @ 2007-08-20 21:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel

On Mon, 20 Aug 2007, Luca Tettamanti wrote:

> Il Sun, Aug 19, 2007 at 10:31:26PM +0300, Avi Kivity ha scritto:
>> Luca wrote:
>>> On 8/19/07, Luca Tettamanti <kronos.it@gmail.com> wrote:
>>>
>>>> +static uint64_t qemu_next_deadline(void) {
>>>> +    uint64_t nearest_delta_us = ULLONG_MAX;
>>>> +    uint64_t vmdelta_us;
>>>>
>>>
>>> Hum, I introduced a bug here... those vars should be signed.
>>>
>>> On the overhead introduced: how do you measure it?
>>>
>>>
>>
>> Run a 100Hz guest, measure cpu usage using something accurate like
>> cyclesoak, with and without dynticks, with and without kvm.
>
> Ok, here I've measured the CPU usage on the host when running an idle
> guest.
>

[..snip the numbers..]

After briefly looking at the cyclesoak it indeed looks like it does
the right thing, but considering the limitations of user-space only
approach it introduces some (sometimes really unwanted) variables
into the system, those can, and i guess will, influence things.

The upshot is this - if you have used any standard utility (iostat,
top - basically anything /proc/stat based) the accounting has a fair
chance of being inaccurate. If cyclesoak is what you have used then
the results should be better, but still i would be worried about
them.

In conclusion until kernel native accounting is fixed your best bet
is to use: http://www.boblycat.org/~malc/apc/

-- 
vale

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-20 21:55             ` malc
@ 2007-08-20 22:49               ` Luca
  0 siblings, 0 replies; 53+ messages in thread
From: Luca @ 2007-08-20 22:49 UTC (permalink / raw)
  To: malc; +Cc: kvm-devel, qemu-devel

On 8/20/07, malc <av1474@comtv.ru> wrote:
> On Mon, 20 Aug 2007, Luca Tettamanti wrote:
>
> > Il Sun, Aug 19, 2007 at 10:31:26PM +0300, Avi Kivity ha scritto:
> >> Luca wrote:
> >>> On 8/19/07, Luca Tettamanti <kronos.it@gmail.com> wrote:
> >>>
> >>>> +static uint64_t qemu_next_deadline(void) {
> >>>> +    uint64_t nearest_delta_us = ULLONG_MAX;
> >>>> +    uint64_t vmdelta_us;
> >>>>
> >>>
> >>> Hum, I introduced a bug here... those vars should be signed.
> >>>
> >>> On the overhead introduced: how do you measure it?
> >>>
> >>>
> >>
> >> Run a 100Hz guest, measure cpu usage using something accurate like
> >> cyclesoak, with and without dynticks, with and without kvm.
> >
> > Ok, here I've measured the CPU usage on the host when running an idle
> > guest.
> >
[...]
> The upshot is this - if you have used any standard utility (iostat,
> top - basically anything /proc/stat based) the accounting has a fair
> chance of being inaccurate. If cyclesoak is what you have used then
> the results should be better, but still i would be worried about
> them.

Yes, I've used cyclesoak.

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-20 21:20           ` Luca Tettamanti
  2007-08-20 21:55             ` malc
@ 2007-08-21 12:09             ` Avi Kivity
  2007-08-21 19:38               ` Luca Tettamanti
  1 sibling, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-21 12:09 UTC (permalink / raw)
  To: Luca Tettamanti; +Cc: kvm-devel, qemu-devel

Luca Tettamanti wrote:
>>>   
>>>       
>> Run a 100Hz guest, measure cpu usage using something accurate like
>> cyclesoak, with and without dynticks, with and without kvm.
>>     
>
> Ok, here I've measured the CPU usage on the host when running an idle
> guest.
>
> At 100Hz
>
> QEMU
> hpet            4.8%
> dynticks        5.1%
>
> Note: I've taken the mean over a period of 20 secs, but the difference
> between hpet and dynticks is well inside the variability of the test.
>
> KVM
> hpet            2.2%
> dynticks        1.0%
>
> Hum... here the numbers jumps a bit, but dynticks is always below hpet.
>   

The differences here are small, so I'll focus on the 1000Hz case.

> At 1000Hz:
>
> QEMU
> hpet            5.5%
> dynticks       11.7%
>
> KVM
> hpet            3.4%
> dynticks        7.3%
>
> No surprises here, you can see the additional 1k syscalls per second. 

This is very surprising to me.  The 6.2% difference for the qemu case 
translates to 62ms per second, or 62us per tick at 1000Hz.  That's more 
than a hundred simple syscalls on modern processors.  We shouldn't have 
to issue a hundred syscalls per guest clock tick.

The difference with kvm is smaller (just 3.9%), which is not easily 
explained as the time for the extra syscalls should be about the same.  
My guess is that guest behavior is different; with dynticks the guest 
does about twice as much work as with hpet.

Can you verify this by running

    strace -c -p `pgrep qemu` & sleep 10; pkill strace

for all 4 cases, and posting the results?


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-17 23:11 ` [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer Luca Tettamanti
@ 2007-08-21 19:24   ` Matthew Kent
  2007-08-21 19:40     ` Luca
  0 siblings, 1 reply; 53+ messages in thread
From: Matthew Kent @ 2007-08-21 19:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Luca Tettamanti

On Sat, 2007-18-08 at 01:11 +0200, Luca Tettamanti wrote:
> plain text document attachment (clock-hpet)
> Linux operates the HPET timer in legacy replacement mode, which means that
> the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
> to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
> RTC; the periodic interrupt is delivered via SIGIO and is handled in the
> same way as the RTC timer.
> 
> Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>

I must be missing something silly here.. should I be able to open more
than one instance of qemu with -clock hpet? Because upon invoking a
second instance of qemu HPET_IE_ON fails.

I also tried running the example in the kernel docs under
Documentation/hpet.txt

mkent@localhost [/home/mkent]# ./demo poll /dev/hpet 1 1000
-hpet: executing poll
hpet_poll: info.hi_flags 0x0
hpet_poll: expired time = 0x8
hpet_poll: revents = 0x1
hpet_poll: data 0x1

<in another term>
mkent@localhost [/home/mkent]# ./demo poll /dev/hpet 1 1000
-hpet: executing poll
hpet_poll: info.hi_flags 0x0
hpet_poll, HPET_IE_ON failed

This is on 2.6.23-rc3 x86_64 with the patch-2.6.23-rc3-hrt2.patch
hrtimers patch.
-- 
Matthew Kent <mkent@magoazul.com>
http://magoazul.com

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-21 12:09             ` [Qemu-devel] Re: [kvm-devel] " Avi Kivity
@ 2007-08-21 19:38               ` Luca Tettamanti
  2007-08-21 19:44                 ` malc
  2007-08-22  5:02                 ` Avi Kivity
  0 siblings, 2 replies; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-21 19:38 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, qemu-devel

Avi Kivity ha scritto: 
> Luca Tettamanti wrote:
>> At 1000Hz:
>>
>> QEMU
>> hpet            5.5%
>> dynticks       11.7%
>>
>> KVM
>> hpet            3.4%
>> dynticks        7.3%
>>
>> No surprises here, you can see the additional 1k syscalls per second. 
>
> This is very surprising to me.  The 6.2% difference for the qemu case 
> translates to 62ms per second, or 62us per tick at 1000Hz.  That's more 
> than a hundred simple syscalls on modern processors.  We shouldn't have to 
> issue a hundred syscalls per guest clock tick.

APIC or PIT interrupts are delivered using the timer, which will be
re-armed after each tick, so I'd expect 1k timer_settime per second. But
according to strace it's not happening, maybe I'm misreading the code?

> The difference with kvm is smaller (just 3.9%), which is not easily 
> explained as the time for the extra syscalls should be about the same.  My 
> guess is that guest behavior is different; with dynticks the guest does 
> about twice as much work as with hpet.

Actually I'm having troubles with cyclesoak (probably it's calibration),
numbers are not very stable across multiple runs...

I've also tried APC which was suggested by malc[1] and:
- readings are far more stable
- the gap between dynticks and non-dynticks seems not significant

> Can you verify this by running
>
>    strace -c -p `pgrep qemu` & sleep 10; pkill strace
>
> for all 4 cases, and posting the results?

Plain QEMU:

With dynticks:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 57.97    0.000469           0     13795           clock_gettime
 32.88    0.000266           0      1350           gettimeofday
  7.42    0.000060           0      1423      1072 sigreturn
  1.73    0.000014           0      5049           timer_gettime
  0.00    0.000000           0      1683      1072 select
  0.00    0.000000           0      2978           timer_settime
------ ----------- ----------- --------- --------- ----------------
100.00    0.000809                 26278      2144 total

HPET:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 87.48    0.010459           1     10381     10050 select
  8.45    0.001010           0     40736           clock_gettime
  2.73    0.000326           0     10049           gettimeofday
  1.35    0.000161           0     10086     10064 sigreturn
------ ----------- ----------- --------- --------- ----------------
100.00    0.011956                 71252     20114 total

Unix (SIGALRM):

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 90.36    0.011663           1     10291      9959 select
  7.38    0.000953           0     40355           clock_gettime
  2.05    0.000264           0      9960           gettimeofday
  0.21    0.000027           0      9985      9969 sigreturn
------ ----------- ----------- --------- --------- ----------------
100.00    0.012907                 70591     19928 total

And KVM:

dynticks:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 78.90    0.004001           1      6681      5088 rt_sigtimedwait
 10.87    0.000551           0     27901           clock_gettime
  4.93    0.000250           0      7622           timer_settime
  4.30    0.000218           0     10078           timer_gettime
  0.39    0.000020           0      3863           gettimeofday
  0.35    0.000018           0      6054           ioctl
  0.26    0.000013           0      4196           select
  0.00    0.000000           0      1593           rt_sigaction
------ ----------- ----------- --------- --------- ----------------
100.00    0.005071                 67988      5088 total

HPET:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 90.20    0.011029           0     32437     22244 rt_sigtimedwait
  4.46    0.000545           0     44164           clock_gettime
  2.59    0.000317           0     12128           gettimeofday
  1.50    0.000184           0     10193           rt_sigaction
  1.10    0.000134           0     12461           select
  0.15    0.000018           0      6060           ioctl
------ ----------- ----------- --------- --------- ----------------
100.00    0.012227                117443     22244 total

Unix:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 83.29    0.012522           0     31652     21709 rt_sigtimedwait
  6.91    0.001039           0     43125           clock_gettime
  3.50    0.000526           0      6042           ioctl
  2.74    0.000412           0      9943           rt_sigaction
  1.98    0.000298           0     12183           select
  1.58    0.000238           0     11850           gettimeofday
------ ----------- ----------- --------- --------- ----------------
100.00    0.015035                114795     21709 total

The guest is an idle kernel with HZ=1000.

Luca
[1] copy_to_user inside spinlock is a big no-no ;)
-- 
La somma dell'intelligenza sulla terra e` una costante.
La popolazione e` in aumento.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-21 19:24   ` Matthew Kent
@ 2007-08-21 19:40     ` Luca
  2007-08-21 20:15       ` Matthew Kent
  2007-09-03  8:40       ` GUERRAZ Francois
  0 siblings, 2 replies; 53+ messages in thread
From: Luca @ 2007-08-21 19:40 UTC (permalink / raw)
  To: Matthew Kent; +Cc: kvm-devel, qemu-devel

On 8/21/07, Matthew Kent <mkent@magoazul.com> wrote:
> On Sat, 2007-18-08 at 01:11 +0200, Luca Tettamanti wrote:
> > plain text document attachment (clock-hpet)
> > Linux operates the HPET timer in legacy replacement mode, which means that
> > the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
> > to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
> > RTC; the periodic interrupt is delivered via SIGIO and is handled in the
> > same way as the RTC timer.
> >
> > Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
>
> I must be missing something silly here.. should I be able to open more
> than one instance of qemu with -clock hpet? Because upon invoking a
> second instance of qemu HPET_IE_ON fails.

It depends on your hardware. Theoretically it's possible, but I've yet
to see a motherboard with more than one periodic timer.

"dmesg | grep hpet" should tell you something like:

hpet0: 3 64-bit timers, 14318180 Hz

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-21 19:38               ` Luca Tettamanti
@ 2007-08-21 19:44                 ` malc
  2007-08-22  5:02                 ` Avi Kivity
  1 sibling, 0 replies; 53+ messages in thread
From: malc @ 2007-08-21 19:44 UTC (permalink / raw)
  To: Luca Tettamanti; +Cc: kvm-devel, qemu-devel

On Tue, 21 Aug 2007, Luca Tettamanti wrote:

> Avi Kivity ha scritto:
>> Luca Tettamanti wrote:
>>> At 1000Hz:
>>>
>>> QEMU
>>> hpet            5.5%
>>> dynticks       11.7%
>>>
>>> KVM
>>> hpet            3.4%
>>> dynticks        7.3%
>>>
>>> No surprises here, you can see the additional 1k syscalls per second.
>>
>> This is very surprising to me.  The 6.2% difference for the qemu case
>> translates to 62ms per second, or 62us per tick at 1000Hz.  That's more
>> than a hundred simple syscalls on modern processors.  We shouldn't have to
>> issue a hundred syscalls per guest clock tick.
>
[..snip preulde..]

> I've also tried APC which was suggested by malc[1] and:
> - readings are far more stable
> - the gap between dynticks and non-dynticks seems not significant

[..dont snip the obvious fact and snip the numbers..]

>
> Luca
> [1] copy_to_user inside spinlock is a big no-no ;)
>

[..notice a projectile targeting at you and rush to see the code..]

Mixed feelings about this... But in principle the code ofcourse is
dangerous, thank you kindly for pointing this out.

I see two ways out of this:

a. moving the lock/unlock inside the loop with unlock preceding
    sometimes sleep deprived copy_to_user

b. fill temporaries and after the loop is done copy it in one go

Too late, too hot, i wouldn't mind beying on a receiving side of
a good advice.

-- 
vale

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-21 19:40     ` Luca
@ 2007-08-21 20:15       ` Matthew Kent
  2007-08-22  6:48         ` [kvm-devel] " Dan Kenigsberg
  2007-09-03  8:40       ` GUERRAZ Francois
  1 sibling, 1 reply; 53+ messages in thread
From: Matthew Kent @ 2007-08-21 20:15 UTC (permalink / raw)
  To: Luca; +Cc: kvm-devel, qemu-devel

On Tue, 2007-21-08 at 21:40 +0200, Luca wrote:
> On 8/21/07, Matthew Kent <mkent@magoazul.com> wrote:
> > On Sat, 2007-18-08 at 01:11 +0200, Luca Tettamanti wrote:
> > > plain text document attachment (clock-hpet)
> > > Linux operates the HPET timer in legacy replacement mode, which means that
> > > the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
> > > to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
> > > RTC; the periodic interrupt is delivered via SIGIO and is handled in the
> > > same way as the RTC timer.
> > >
> > > Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
> >
> > I must be missing something silly here.. should I be able to open more
> > than one instance of qemu with -clock hpet? Because upon invoking a
> > second instance of qemu HPET_IE_ON fails.
> 
> It depends on your hardware. Theoretically it's possible, but I've yet
> to see a motherboard with more than one periodic timer.

Ah thank you, after re-reading the docs I think I better understand
this.
-- 
Matthew Kent <mkent@magoazul.com>
http://magoazul.com

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-21 19:38               ` Luca Tettamanti
  2007-08-21 19:44                 ` malc
@ 2007-08-22  5:02                 ` Avi Kivity
  2007-08-22 16:12                   ` Luca Tettamanti
  1 sibling, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-22  5:02 UTC (permalink / raw)
  To: Luca Tettamanti; +Cc: kvm-devel, qemu-devel

Luca Tettamanti wrote:

> Actually I'm having troubles with cyclesoak (probably it's calibration),
> numbers are not very stable across multiple runs...
>   

I've had good results with cyclesoak; maybe you need to run it in
runlevel 3 so the load generated by moving the mouse or breathing
doesn't affect meaurements.

> I've also tried APC which was suggested by malc[1] and:
> - readings are far more stable
> - the gap between dynticks and non-dynticks seems not significant
>
>   
>> Can you verify this by running
>>
>>    strace -c -p `pgrep qemu` & sleep 10; pkill strace
>>
>> for all 4 cases, and posting the results?
>>     
>
> Plain QEMU:
>
> With dynticks:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  57.97    0.000469           0     13795           clock_gettime
>  32.88    0.000266           0      1350           gettimeofday
>   7.42    0.000060           0      1423      1072 sigreturn
>   1.73    0.000014           0      5049           timer_gettime
>   0.00    0.000000           0      1683      1072 select
>   0.00    0.000000           0      2978           timer_settime
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.000809                 26278      2144 total
>   

The 1072 select() errors are the delivered ticks (EINTR).  But why only
1000?  would have expected 10000 for a 1000Hz guest in a 10 sec period.

> HPET:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  87.48    0.010459           1     10381     10050 select
>   8.45    0.001010           0     40736           clock_gettime
>   2.73    0.000326           0     10049           gettimeofday
>   1.35    0.000161           0     10086     10064 sigreturn
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.011956                 71252     20114 total
>   

This is expected.  1 tick per millisecond.

> Unix (SIGALRM):
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  90.36    0.011663           1     10291      9959 select
>   7.38    0.000953           0     40355           clock_gettime
>   2.05    0.000264           0      9960           gettimeofday
>   0.21    0.000027           0      9985      9969 sigreturn
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.012907                 70591     19928 total
>   

Same here.

> And KVM:
>
> dynticks:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  78.90    0.004001           1      6681      5088 rt_sigtimedwait
>  10.87    0.000551           0     27901           clock_gettime
>   4.93    0.000250           0      7622           timer_settime
>   4.30    0.000218           0     10078           timer_gettime
>   0.39    0.000020           0      3863           gettimeofday
>   0.35    0.000018           0      6054           ioctl
>   0.26    0.000013           0      4196           select
>   0.00    0.000000           0      1593           rt_sigaction
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.005071                 67988      5088 total
>   

kvm uses sigtimedwait() to wait for signals.  Here, an error (ETIMEDOUT)
indicates we did _not_ get a wakeup, so there are 1500 wakeups in a 10
second period.  Strange.  Some calibration error?

> HPET:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  90.20    0.011029           0     32437     22244 rt_sigtimedwait
>   4.46    0.000545           0     44164           clock_gettime
>   2.59    0.000317           0     12128           gettimeofday
>   1.50    0.000184           0     10193           rt_sigaction
>   1.10    0.000134           0     12461           select
>   0.15    0.000018           0      6060           ioctl
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.012227                117443     22244 total
>   

10K wakeups per second.  The code is not particularly efficient (11
syscalls per tick), but overhead is still low.

> Unix:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  83.29    0.012522           0     31652     21709 rt_sigtimedwait
>   6.91    0.001039           0     43125           clock_gettime
>   3.50    0.000526           0      6042           ioctl
>   2.74    0.000412           0      9943           rt_sigaction
>   1.98    0.000298           0     12183           select
>   1.58    0.000238           0     11850           gettimeofday
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.015035                114795     21709 total
>   

Same thing.

> The guest is an idle kernel with HZ=1000.
>   

Can you double check this?  The dyntick results show that this is either
a 100Hz kernel, or that there is a serious bug in dynticks.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-21 20:15       ` Matthew Kent
@ 2007-08-22  6:48         ` Dan Kenigsberg
  2007-08-22  7:03           ` Avi Kivity
  0 siblings, 1 reply; 53+ messages in thread
From: Dan Kenigsberg @ 2007-08-22  6:48 UTC (permalink / raw)
  To: Matthew Kent; +Cc: kvm-devel, Luca, qemu-devel

On Tue, Aug 21, 2007 at 01:15:22PM -0700, Matthew Kent wrote:
> On Tue, 2007-21-08 at 21:40 +0200, Luca wrote:
> > On 8/21/07, Matthew Kent <mkent@magoazul.com> wrote:
> > > On Sat, 2007-18-08 at 01:11 +0200, Luca Tettamanti wrote:
> > > > plain text document attachment (clock-hpet)
> > > > Linux operates the HPET timer in legacy replacement mode, which means that
> > > > the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
> > > > to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
> > > > RTC; the periodic interrupt is delivered via SIGIO and is handled in the
> > > > same way as the RTC timer.
> > > >
> > > > Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
> > >
> > > I must be missing something silly here.. should I be able to open more
> > > than one instance of qemu with -clock hpet? Because upon invoking a
> > > second instance of qemu HPET_IE_ON fails.
> > 
> > It depends on your hardware. Theoretically it's possible, but I've yet
> > to see a motherboard with more than one periodic timer.
> 
> Ah thank you, after re-reading the docs I think I better understand
> this.

In a risk of being off-topic, maybe you can help me try the hpet support.
When I try the hpet Documentation demo I get

# ./hpet poll /dev/hpet 1 1000
-hpet: executing poll
hpet_poll: info.hi_flags 0x0
hpet_poll, HPET_IE_ON failed

while I have

$ dmesg|grep -i HPET
ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
ACPI: HPET id: 0x8086a301 base: 0xfed00000
hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
hpet0: 4 64-bit timers, 14318180 Hz
hpet_resources: 0xfed00000 is busy
Time: hpet clocksource has been installed.

Any idea what I am misconfiguring?

Thanks,

Dan.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-22  6:48         ` [kvm-devel] " Dan Kenigsberg
@ 2007-08-22  7:03           ` Avi Kivity
  2007-08-22 12:34             ` Andi Kleen
  0 siblings, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-22  7:03 UTC (permalink / raw)
  To: Dan Kenigsberg; +Cc: Matthew Kent, kvm-devel, qemu-devel

Dan Kenigsberg wrote:
> On Tue, Aug 21, 2007 at 01:15:22PM -0700, Matthew Kent wrote:
>   
>> On Tue, 2007-21-08 at 21:40 +0200, Luca wrote:
>>     
>>> On 8/21/07, Matthew Kent <mkent@magoazul.com> wrote:
>>>       
>>>> On Sat, 2007-18-08 at 01:11 +0200, Luca Tettamanti wrote:
>>>>         
>>>>> plain text document attachment (clock-hpet)
>>>>> Linux operates the HPET timer in legacy replacement mode, which means that
>>>>> the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
>>>>> to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
>>>>> RTC; the periodic interrupt is delivered via SIGIO and is handled in the
>>>>> same way as the RTC timer.
>>>>>
>>>>> Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
>>>>>           
>>>> I must be missing something silly here.. should I be able to open more
>>>> than one instance of qemu with -clock hpet? Because upon invoking a
>>>> second instance of qemu HPET_IE_ON fails.
>>>>         
>>> It depends on your hardware. Theoretically it's possible, but I've yet
>>> to see a motherboard with more than one periodic timer.
>>>       
>> Ah thank you, after re-reading the docs I think I better understand
>> this.
>>     
>
> In a risk of being off-topic, maybe you can help me try the hpet support.
> When I try the hpet Documentation demo I get
>
> # ./hpet poll /dev/hpet 1 1000
> -hpet: executing poll
> hpet_poll: info.hi_flags 0x0
> hpet_poll, HPET_IE_ON failed
>
> while I have
>
> $ dmesg|grep -i HPET
> ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
> ACPI: HPET id: 0x8086a301 base: 0xfed00000
> hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
> hpet0: 4 64-bit timers, 14318180 Hz
> hpet_resources: 0xfed00000 is busy
> Time: hpet clocksource has been installed.
>
> Any idea what I am misconfiguring?
>   

Maybe the kernel is using the timer, so userspace can't.  Just a guess.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-22  7:03           ` Avi Kivity
@ 2007-08-22 12:34             ` Andi Kleen
  2007-08-22 21:11               ` Dan Kenigsberg
  0 siblings, 1 reply; 53+ messages in thread
From: Andi Kleen @ 2007-08-22 12:34 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

On Wed, Aug 22, 2007 at 10:03:32AM +0300, Avi Kivity wrote:
> Maybe the kernel is using the timer, so userspace can't.  Just a guess.

HPET has multiple timers (variable, but typically 2 or 4). The kernel
only uses timer 0. It's possible someone else in user space is using
it though. Try lsof /dev/hpet

-Andi

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22  5:02                 ` Avi Kivity
@ 2007-08-22 16:12                   ` Luca Tettamanti
  2007-08-22 16:21                     ` Avi Kivity
  0 siblings, 1 reply; 53+ messages in thread
From: Luca Tettamanti @ 2007-08-22 16:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, qemu-devel

Il Wed, Aug 22, 2007 at 08:02:07AM +0300, Avi Kivity ha scritto: 
> Luca Tettamanti wrote:
> 
> > Actually I'm having troubles with cyclesoak (probably it's calibration),
> > numbers are not very stable across multiple runs...
> >   
> 
> I've had good results with cyclesoak; maybe you need to run it in
> runlevel 3 so the load generated by moving the mouse or breathing
> doesn't affect meaurements.

This is what I did, I tested with -no-grapich in text console.

> > The guest is an idle kernel with HZ=1000.
> >   
> 
> Can you double check this?  The dyntick results show that this is either
> a 100Hz kernel, or that there is a serious bug in dynticks.

Ops I sent the wrong files, sorry.

This is QEMU, with dynticks and HPET:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 52.10    0.002966           0     96840           clock_gettime
 19.50    0.001110           0     37050           timer_gettime
 10.66    0.000607           0     20086           timer_settime
 10.40    0.000592           0      8985      2539 sigreturn
  4.94    0.000281           0      8361      2485 select
  2.41    0.000137           0      8362           gettimeofday
------ ----------- ----------- --------- --------- ----------------
100.00    0.005693                179684      5024 total

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 93.37    0.025541           3     10194     10193 select
  4.82    0.001319           0     33259           clock_gettime
  1.10    0.000301           0     10195           gettimeofday
  0.71    0.000195           0     10196     10194 sigreturn
------ ----------- ----------- --------- --------- ----------------
100.00    0.027356                 63844     20387 total

And this KVM:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 42.66    0.002885           0     45527        24 ioctl
 25.62    0.001733           0     89305           clock_gettime
 13.12    0.000887           0     34894           timer_gettime
  7.97    0.000539           0     18016           timer_settime
  4.70    0.000318           0     12224      7270 rt_sigtimedwait
  2.79    0.000189           0      7271           select
  1.86    0.000126           0      7271           gettimeofday
  1.27    0.000086           0      4954           rt_sigaction
------ ----------- ----------- --------- --------- ----------------
100.00    0.006763                219462      7294 total

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 49.41    0.004606           0     59900        27 ioctl
 24.14    0.002250           0     31252     21082 rt_sigtimedwait
  9.65    0.000900           0     51856           clock_gettime
  8.44    0.000787           0     17819           select
  4.42    0.000412           0     17819           gettimeofday
  3.94    0.000367           0     10170           rt_sigaction
------ ----------- ----------- --------- --------- ----------------
100.00    0.009322                188816     21109 total


Luca
-- 
Runtime error 6D at f000:a12f : user incompetente

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 16:12                   ` Luca Tettamanti
@ 2007-08-22 16:21                     ` Avi Kivity
  2007-08-22 16:38                       ` Luca
  0 siblings, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-22 16:21 UTC (permalink / raw)
  To: Luca Tettamanti; +Cc: kvm-devel, qemu-devel

Luca Tettamanti wrote:
> Il Wed, Aug 22, 2007 at 08:02:07AM +0300, Avi Kivity ha scritto: 
>   
>> Luca Tettamanti wrote:
>>
>>     
>>> Actually I'm having troubles with cyclesoak (probably it's calibration),
>>> numbers are not very stable across multiple runs...
>>>   
>>>       
>> I've had good results with cyclesoak; maybe you need to run it in
>> runlevel 3 so the load generated by moving the mouse or breathing
>> doesn't affect meaurements.
>>     
>
> This is what I did, I tested with -no-grapich in text console.
>
>   

Okay.  Maybe cpu frequency scaling confused it then.  Or something else?

>>> The guest is an idle kernel with HZ=1000.
>>>   
>>>       
>> Can you double check this?  The dyntick results show that this is either
>> a 100Hz kernel, or that there is a serious bug in dynticks.
>>     
>
> Ops I sent the wrong files, sorry.
>
> This is QEMU, with dynticks and HPET:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  52.10    0.002966           0     96840           clock_gettime
>  19.50    0.001110           0     37050           timer_gettime
>  10.66    0.000607           0     20086           timer_settime
>  10.40    0.000592           0      8985      2539 sigreturn
>   4.94    0.000281           0      8361      2485 select
>   2.41    0.000137           0      8362           gettimeofday
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.005693                179684      5024 total
>   

This looks like 250 Hz?  And a huge number of settime calls?

Something's broken with dynticks.

> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  93.37    0.025541           3     10194     10193 select
>   4.82    0.001319           0     33259           clock_gettime
>   1.10    0.000301           0     10195           gettimeofday
>   0.71    0.000195           0     10196     10194 sigreturn
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.027356                 63844     20387 total
>   

This is expected and sane.

> And this KVM:
>
> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  42.66    0.002885           0     45527        24 ioctl
>  25.62    0.001733           0     89305           clock_gettime
>  13.12    0.000887           0     34894           timer_gettime
>   7.97    0.000539           0     18016           timer_settime
>   4.70    0.000318           0     12224      7270 rt_sigtimedwait
>   2.79    0.000189           0      7271           select
>   1.86    0.000126           0      7271           gettimeofday
>   1.27    0.000086           0      4954           rt_sigaction
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.006763                219462      7294 total
>   

Similarly broken.  The effective frequency is twice qemu's.  I think we
had the same effect last time.

> % time     seconds  usecs/call     calls    errors syscall
> ------ ----------- ----------- --------- --------- ----------------
>  49.41    0.004606           0     59900        27 ioctl
>  24.14    0.002250           0     31252     21082 rt_sigtimedwait
>   9.65    0.000900           0     51856           clock_gettime
>   8.44    0.000787           0     17819           select
>   4.42    0.000412           0     17819           gettimeofday
>   3.94    0.000367           0     10170           rt_sigaction
> ------ ----------- ----------- --------- --------- ----------------
> 100.00    0.009322                188816     21109 total
>
>   

Similarly sane.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 16:21                     ` Avi Kivity
@ 2007-08-22 16:38                       ` Luca
  2007-08-22 16:45                         ` Avi Kivity
  2007-08-22 20:42                         ` Dan Kenigsberg
  0 siblings, 2 replies; 53+ messages in thread
From: Luca @ 2007-08-22 16:38 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

On 8/22/07, Avi Kivity <avi@qumranet.com> wrote:
> Luca Tettamanti wrote:
> > Il Wed, Aug 22, 2007 at 08:02:07AM +0300, Avi Kivity ha scritto:
> >
> >> Luca Tettamanti wrote:
> >>
> >>
> >>> Actually I'm having troubles with cyclesoak (probably it's calibration),
> >>> numbers are not very stable across multiple runs...
> >>>
> >>>
> >> I've had good results with cyclesoak; maybe you need to run it in
> >> runlevel 3 so the load generated by moving the mouse or breathing
> >> doesn't affect meaurements.
> >>
> >
> > This is what I did, I tested with -no-grapich in text console.
>
> Okay.  Maybe cpu frequency scaling confused it then. Or something else?

I set it performance, frequency was locked at 2.1GHz.

> >>> The guest is an idle kernel with HZ=1000.
> >>>
> >>>
> >> Can you double check this?  The dyntick results show that this is either
> >> a 100Hz kernel, or that there is a serious bug in dynticks.
> >>
> >
> > Ops I sent the wrong files, sorry.
> >
> > This is QEMU, with dynticks and HPET:
> >
> > % time     seconds  usecs/call     calls    errors syscall
> > ------ ----------- ----------- --------- --------- ----------------
> >  52.10    0.002966           0     96840           clock_gettime
> >  19.50    0.001110           0     37050           timer_gettime
> >  10.66    0.000607           0     20086           timer_settime
> >  10.40    0.000592           0      8985      2539 sigreturn
> >   4.94    0.000281           0      8361      2485 select
> >   2.41    0.000137           0      8362           gettimeofday
> > ------ ----------- ----------- --------- --------- ----------------
> > 100.00    0.005693                179684      5024 total
> >
>
> This looks like 250 Hz?

Nope:

# CONFIG_NO_HZ is not set
# CONFIG_HZ_100 is not set
# CONFIG_HZ_250 is not set
# CONFIG_HZ_300 is not set
CONFIG_HZ_1000=y
CONFIG_HZ=1000

and I'm reading it from /proc/config.gz on the guest.

> And a huge number of settime calls?

Yes, maybe some QEMU timer is using an interval < 1ms?
Dan do you any any idea of what's going on?

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 16:38                       ` Luca
@ 2007-08-22 16:45                         ` Avi Kivity
  2007-08-22 17:23                           ` Luca
  2007-08-22 20:42                         ` Dan Kenigsberg
  1 sibling, 1 reply; 53+ messages in thread
From: Avi Kivity @ 2007-08-22 16:45 UTC (permalink / raw)
  To: Luca; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

Luca wrote:
>>> This is QEMU, with dynticks and HPET:
>>>
>>> % time     seconds  usecs/call     calls    errors syscall
>>> ------ ----------- ----------- --------- --------- ----------------
>>>  52.10    0.002966           0     96840           clock_gettime
>>>  19.50    0.001110           0     37050           timer_gettime
>>>  10.66    0.000607           0     20086           timer_settime
>>>  10.40    0.000592           0      8985      2539 sigreturn
>>>   4.94    0.000281           0      8361      2485 select
>>>   2.41    0.000137           0      8362           gettimeofday
>>> ------ ----------- ----------- --------- --------- ----------------
>>> 100.00    0.005693                179684      5024 total
>>>
>>>       
>> This looks like 250 Hz?
>>     
>
> Nope:
>
> # CONFIG_NO_HZ is not set
> # CONFIG_HZ_100 is not set
> # CONFIG_HZ_250 is not set
> # CONFIG_HZ_300 is not set
> CONFIG_HZ_1000=y
> CONFIG_HZ=1000
>
> and I'm reading it from /proc/config.gz on the guest.
>   

Yeah, thought so -- so dyntick is broken at present.

Or maybe your host kernel can't support such a high rate.  Probably
needs hrtimers or qemu dyntick over hpet oneshot support.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 16:45                         ` Avi Kivity
@ 2007-08-22 17:23                           ` Luca
  2007-08-22 17:39                             ` Luca
  2007-08-22 19:21                             ` Luca
  0 siblings, 2 replies; 53+ messages in thread
From: Luca @ 2007-08-22 17:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

On 8/22/07, Avi Kivity <avi@qumranet.com> wrote:
> Luca wrote:
> >>> This is QEMU, with dynticks and HPET:
> >>>
> >>> % time     seconds  usecs/call     calls    errors syscall
> >>> ------ ----------- ----------- --------- --------- ----------------
> >>>  52.10    0.002966           0     96840           clock_gettime
> >>>  19.50    0.001110           0     37050           timer_gettime
> >>>  10.66    0.000607           0     20086           timer_settime
> >>>  10.40    0.000592           0      8985      2539 sigreturn
> >>>   4.94    0.000281           0      8361      2485 select
> >>>   2.41    0.000137           0      8362           gettimeofday
> >>> ------ ----------- ----------- --------- --------- ----------------
> >>> 100.00    0.005693                179684      5024 total
> >>>
> >>>
> >> This looks like 250 Hz?
> >>
> >
> > Nope:
> >
> > # CONFIG_NO_HZ is not set
> > # CONFIG_HZ_100 is not set
> > # CONFIG_HZ_250 is not set
> > # CONFIG_HZ_300 is not set
> > CONFIG_HZ_1000=y
> > CONFIG_HZ=1000
> >
> > and I'm reading it from /proc/config.gz on the guest.
> >
>
> Yeah, thought so -- so dyntick is broken at present.

I see a lot of sub ms timer_settime(). Many of them are the result of
->expire_time being less than the current qemu_get_clock(). This
results into 250us timer due to MIN_TIMER_REARM_US; this happens only
for the REALTIME timer. Other sub-ms timers are generated by the
VIRTUAL timer.

This first issue is easily fixed; if expire_time < current time then
the timer has expired and hasn't been reprogrammed (and thus can be
ignored).
VIRTUAL just becomes more accurate with dyntics, before multiple
timers were batched together.

> Or maybe your host kernel can't support such a high rate.

I don't know... a simple printf tells me that the signal handler is
called about 1050 times per second, which sounds about right.

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 17:23                           ` Luca
@ 2007-08-22 17:39                             ` Luca
  2007-08-22 19:21                             ` Luca
  1 sibling, 0 replies; 53+ messages in thread
From: Luca @ 2007-08-22 17:39 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

On 8/22/07, Luca <kronos.it@gmail.com> wrote:
> I see a lot of sub ms timer_settime(). Many of them are the result of
> ->expire_time being less than the current qemu_get_clock().

False alarm, this was a bug in the debug code :D

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 17:23                           ` Luca
  2007-08-22 17:39                             ` Luca
@ 2007-08-22 19:21                             ` Luca
  2007-08-22 21:35                               ` [Qemu-devel] " Dor Laor
  1 sibling, 1 reply; 53+ messages in thread
From: Luca @ 2007-08-22 19:21 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

On 8/22/07, Luca <kronos.it@gmail.com> wrote:
> On 8/22/07, Avi Kivity <avi@qumranet.com> wrote:
> > Luca wrote:
> > >>> This is QEMU, with dynticks and HPET:
> > >>>
> > >>> % time     seconds  usecs/call     calls    errors syscall
> > >>> ------ ----------- ----------- --------- --------- ----------------
> > >>>  52.10    0.002966           0     96840           clock_gettime
> > >>>  19.50    0.001110           0     37050           timer_gettime
> > >>>  10.66    0.000607           0     20086           timer_settime
> > >>>  10.40    0.000592           0      8985      2539 sigreturn
> > >>>   4.94    0.000281           0      8361      2485 select
> > >>>   2.41    0.000137           0      8362           gettimeofday
> > >>> ------ ----------- ----------- --------- --------- ----------------
> > >>> 100.00    0.005693                179684      5024 total
> > >>>
> > >>>
> > >> This looks like 250 Hz?
> > >>
> > >
> > > Nope:
> > >
> > > # CONFIG_NO_HZ is not set
> > > # CONFIG_HZ_100 is not set
> > > # CONFIG_HZ_250 is not set
> > > # CONFIG_HZ_300 is not set
> > > CONFIG_HZ_1000=y
> > > CONFIG_HZ=1000
> > >
> > > and I'm reading it from /proc/config.gz on the guest.
> > >
> >
> > Yeah, thought so -- so dyntick is broken at present.
>
> I see a lot of sub ms timer_settime(). Many of them are the result of
> ->expire_time being less than the current qemu_get_clock(). This
> results into 250us timer due to MIN_TIMER_REARM_US; this happens only
> for the REALTIME timer. Other sub-ms timers are generated by the
> VIRTUAL timer.
>
> This first issue is easily fixed; if expire_time < current time then
> the timer has expired and hasn't been reprogrammed (and thus can be
> ignored).
> VIRTUAL just becomes more accurate with dyntics, before multiple
> timers were batched together.
>
> > Or maybe your host kernel can't support such a high rate.
>
> I don't know... a simple printf tells me that the signal handler is
> called about 1050 times per second, which sounds about right.

...unless strace is attached. ptrace()'ing the process really screw up
the timing with dynticks; HPET is also affected but the performance
hit is not as severe.

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 16:38                       ` Luca
  2007-08-22 16:45                         ` Avi Kivity
@ 2007-08-22 20:42                         ` Dan Kenigsberg
  1 sibling, 0 replies; 53+ messages in thread
From: Dan Kenigsberg @ 2007-08-22 20:42 UTC (permalink / raw)
  To: Luca; +Cc: kvm-devel, qemu-devel

On Wed, Aug 22, 2007 at 06:38:18PM +0200, Luca wrote:
> and I'm reading it from /proc/config.gz on the guest.
> 
> > And a huge number of settime calls?
> 
> Yes, maybe some QEMU timer is using an interval < 1ms?
> Dan do you any any idea of what's going on?

Not really...

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-22 12:34             ` Andi Kleen
@ 2007-08-22 21:11               ` Dan Kenigsberg
  2007-08-22 22:09                 ` Andi Kleen
  0 siblings, 1 reply; 53+ messages in thread
From: Dan Kenigsberg @ 2007-08-22 21:11 UTC (permalink / raw)
  To: Andi Kleen; +Cc: kvm-devel, qemu-devel

On Wed, Aug 22, 2007 at 02:34:24PM +0200, Andi Kleen wrote:
> On Wed, Aug 22, 2007 at 10:03:32AM +0300, Avi Kivity wrote:
> > Maybe the kernel is using the timer, so userspace can't.  Just a guess.
> 
> HPET has multiple timers (variable, but typically 2 or 4). The kernel
> only uses timer 0. It's possible someone else in user space is using
> it though. Try lsof /dev/hpet

Thanks for the ideas; however even after I made the kernel use tsc as
time source, and made sure that no one opens /dev/hpet, I fail to use
HPET (with same errors as before)

I now have

$ dmesg |grep -i hpet
ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
ACPI: HPET id: 0x8086a301 base: 0xfed00000
hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
hpet0: 4 64-bit timers, 14318180 Hz
hpet_resources: 0xfed00000 is busy

Any other idea?

Dan.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 19:21                             ` Luca
@ 2007-08-22 21:35                               ` Dor Laor
  2007-08-22 22:07                                 ` [Qemu-devel] " Luca
  0 siblings, 1 reply; 53+ messages in thread
From: Dor Laor @ 2007-08-22 21:35 UTC (permalink / raw)
  To: Luca, Avi Kivity; +Cc: Dan Kenigsberg, kvm-devel, qemu-devel

>> > >>> This is QEMU, with dynticks and HPET:
>> > >>>
>> > >>> % time     seconds  usecs/call     calls    errors syscall
>> > >>> ------ ----------- ----------- --------- ---------
-------------
>---
>> > >>>  52.10    0.002966           0     96840
clock_gettime
>> > >>>  19.50    0.001110           0     37050
timer_gettime
>> > >>>  10.66    0.000607           0     20086
timer_settime
>> > >>>  10.40    0.000592           0      8985      2539 sigreturn
>> > >>>   4.94    0.000281           0      8361      2485 select
>> > >>>   2.41    0.000137           0      8362           gettimeofday
>> > >>> ------ ----------- ----------- --------- ---------
-------------
>---
>> > >>> 100.00    0.005693                179684      5024 total
>> > >>>
>> > >>>
>> > >> This looks like 250 Hz?
>> > >>
>> > >
>> > > Nope:
>> > >
>> > > # CONFIG_NO_HZ is not set
>> > > # CONFIG_HZ_100 is not set
>> > > # CONFIG_HZ_250 is not set
>> > > # CONFIG_HZ_300 is not set
>> > > CONFIG_HZ_1000=y
>> > > CONFIG_HZ=1000
>> > >
>> > > and I'm reading it from /proc/config.gz on the guest.
>> > >
>> >
>> > Yeah, thought so -- so dyntick is broken at present.
>>
>> I see a lot of sub ms timer_settime(). Many of them are the result of
>> ->expire_time being less than the current qemu_get_clock(). This
>> results into 250us timer due to MIN_TIMER_REARM_US; this happens only
>> for the REALTIME timer. Other sub-ms timers are generated by the
>> VIRTUAL timer.
>>
>> This first issue is easily fixed; if expire_time < current time then
>> the timer has expired and hasn't been reprogrammed (and thus can be
>> ignored).
>> VIRTUAL just becomes more accurate with dyntics, before multiple
>> timers were batched together.
>>
>> > Or maybe your host kernel can't support such a high rate.
>>
>> I don't know... a simple printf tells me that the signal handler is
>> called about 1050 times per second, which sounds about right.
>
>...unless strace is attached. ptrace()'ing the process really screw up
>the timing with dynticks; HPET is also affected but the performance
>hit is not as severe.
>
>Luca

I didn't figure out how you use both hpet and dyn-tick together.
Hpet has periodic timer while dyn-tick is one shot timer each time.
Is ther a chance that both are working and that's the source of our 
problems?
Dor

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2
  2007-08-22 21:35                               ` [Qemu-devel] " Dor Laor
@ 2007-08-22 22:07                                 ` Luca
  0 siblings, 0 replies; 53+ messages in thread
From: Luca @ 2007-08-22 22:07 UTC (permalink / raw)
  To: Dor Laor; +Cc: Dan Kenigsberg, Avi Kivity, kvm-devel, qemu-devel

On 8/22/07, Dor Laor <dor.laor@qumranet.com> wrote:
> >> > >>> This is QEMU, with dynticks and HPET:
> >> > >>>
> >> > >>> % time     seconds  usecs/call     calls    errors syscall
> >> > >>> ------ ----------- ----------- --------- ---------
> -------------
> >---
> >> > >>>  52.10    0.002966           0     96840
> clock_gettime
> >> > >>>  19.50    0.001110           0     37050
> timer_gettime
> >> > >>>  10.66    0.000607           0     20086
> timer_settime
> >> > >>>  10.40    0.000592           0      8985      2539 sigreturn
> >> > >>>   4.94    0.000281           0      8361      2485 select
> >> > >>>   2.41    0.000137           0      8362           gettimeofday
> >> > >>> ------ ----------- ----------- --------- ---------
> -------------
> >---
> >> > >>> 100.00    0.005693                179684      5024 total
> >> > >>>
> >> > >>>
> >> > >> This looks like 250 Hz?
> >> > >>
> >> > >
> >> > > Nope:
> >> > >
> >> > > # CONFIG_NO_HZ is not set
> >> > > # CONFIG_HZ_100 is not set
> >> > > # CONFIG_HZ_250 is not set
> >> > > # CONFIG_HZ_300 is not set
> >> > > CONFIG_HZ_1000=y
> >> > > CONFIG_HZ=1000
> >> > >
> >> > > and I'm reading it from /proc/config.gz on the guest.
> >> > >
> >> >
> >> > Yeah, thought so -- so dyntick is broken at present.
> >>
> >> I see a lot of sub ms timer_settime(). Many of them are the result of
> >> ->expire_time being less than the current qemu_get_clock(). This
> >> results into 250us timer due to MIN_TIMER_REARM_US; this happens only
> >> for the REALTIME timer. Other sub-ms timers are generated by the
> >> VIRTUAL timer.
> >>
> >> This first issue is easily fixed; if expire_time < current time then
> >> the timer has expired and hasn't been reprogrammed (and thus can be
> >> ignored).
> >> VIRTUAL just becomes more accurate with dyntics, before multiple
> >> timers were batched together.
> >>
> >> > Or maybe your host kernel can't support such a high rate.
> >>
> >> I don't know... a simple printf tells me that the signal handler is
> >> called about 1050 times per second, which sounds about right.
> >
> >...unless strace is attached. ptrace()'ing the process really screw up
> >the timing with dynticks; HPET is also affected but the performance
> >hit is not as severe.
> >
> I didn't figure out how you use both hpet and dyn-tick together.

I don't. Only one timer source is active at any time; the selection is
done at startup with -clock option.

> Hpet has periodic timer while dyn-tick is one shot timer each time.
> Is ther a chance that both are working and that's the source of our
> problems?

No, the various sources are exclusive (though it might be possible to
use HPET in one shot mode).

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-22 21:11               ` Dan Kenigsberg
@ 2007-08-22 22:09                 ` Andi Kleen
  2007-08-23  7:02                   ` Dan Kenigsberg
  0 siblings, 1 reply; 53+ messages in thread
From: Andi Kleen @ 2007-08-22 22:09 UTC (permalink / raw)
  To: Dan Kenigsberg; +Cc: kvm-devel, Andi Kleen, qemu-devel

> $ dmesg |grep -i hpet
> ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
> ACPI: HPET id: 0x8086a301 base: 0xfed00000
> hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
> hpet0: 4 64-bit timers, 14318180 Hz
> hpet_resources: 0xfed00000 is busy

What kernel version was that? There was a bug that caused this pre .22

-Andi

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-22 22:09                 ` Andi Kleen
@ 2007-08-23  7:02                   ` Dan Kenigsberg
  2007-08-24 20:18                     ` Luca
  0 siblings, 1 reply; 53+ messages in thread
From: Dan Kenigsberg @ 2007-08-23  7:02 UTC (permalink / raw)
  To: qemu-devel; +Cc: kvm-devel, Andi Kleen

On Thu, Aug 23, 2007 at 12:09:47AM +0200, Andi Kleen wrote:
> > $ dmesg |grep -i hpet
> > ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
> > ACPI: HPET id: 0x8086a301 base: 0xfed00000
> > hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
> > hpet0: 4 64-bit timers, 14318180 Hz
> > hpet_resources: 0xfed00000 is busy
> 
> What kernel version was that? There was a bug that caused this pre .22
> 

I have vanilla 2.6.22.3 on that machine.

Thanks,
    Dan.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-23  7:02                   ` Dan Kenigsberg
@ 2007-08-24 20:18                     ` Luca
  2007-08-25  8:24                       ` Dan Kenigsberg
  0 siblings, 1 reply; 53+ messages in thread
From: Luca @ 2007-08-24 20:18 UTC (permalink / raw)
  To: Dan Kenigsberg; +Cc: kvm-devel, qemu-devel, Andi Kleen

On 8/23/07, Dan Kenigsberg <danken@qumranet.com> wrote:
> On Thu, Aug 23, 2007 at 12:09:47AM +0200, Andi Kleen wrote:
> > > $ dmesg |grep -i hpet
> > > ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
> > > ACPI: HPET id: 0x8086a301 base: 0xfed00000
> > > hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
> > > hpet0: 4 64-bit timers, 14318180 Hz
> > > hpet_resources: 0xfed00000 is busy
> >
> > What kernel version was that? There was a bug that caused this pre .22
> >
>
> I have vanilla 2.6.22.3 on that machine.

Try:
cat /sys/devices/system/clocksource/clocksource0/available_clocksource

do you see HPET listed twice?

Luca

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [kvm-devel] [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-24 20:18                     ` Luca
@ 2007-08-25  8:24                       ` Dan Kenigsberg
  0 siblings, 0 replies; 53+ messages in thread
From: Dan Kenigsberg @ 2007-08-25  8:24 UTC (permalink / raw)
  To: Luca; +Cc: kvm-devel, qemu-devel, Andi Kleen

On Fri, Aug 24, 2007 at 10:18:47PM +0200, Luca wrote:
> On 8/23/07, Dan Kenigsberg <danken@qumranet.com> wrote:
> > On Thu, Aug 23, 2007 at 12:09:47AM +0200, Andi Kleen wrote:
> > > > $ dmesg |grep -i hpet
> > > > ACPI: HPET 7D5B6AE0, 0038 (r1 A M I  OEMHPET   5000708 MSFT       97)
> > > > ACPI: HPET id: 0x8086a301 base: 0xfed00000
> > > > hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0
> > > > hpet0: 4 64-bit timers, 14318180 Hz
> > > > hpet_resources: 0xfed00000 is busy
> > >
> > > What kernel version was that? There was a bug that caused this pre .22
> > >
> >
> > I have vanilla 2.6.22.3 on that machine.
> 
> Try:
> cat /sys/devices/system/clocksource/clocksource0/available_clocksource
> 
> do you see HPET listed twice?
> 
No, only once. Is that wrong?

# cat /sys/devices/system/clocksource/clocksource0/available_clocksource
hpet acpi_pm jiffies tsc 

# cat /sys/devices/system/clocksource/clocksource0/current_clocksource 
tsc 

Dan.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer.
  2007-08-21 19:40     ` Luca
  2007-08-21 20:15       ` Matthew Kent
@ 2007-09-03  8:40       ` GUERRAZ Francois
  1 sibling, 0 replies; 53+ messages in thread
From: GUERRAZ Francois @ 2007-09-03  8:40 UTC (permalink / raw)
  To: qemu-devel

Hello,

Can you confirm to me that if I have only one periodic timer on my
motherboard I can only have one qemu-instance running using hpet?

Thanks.

François.

Le mardi 21 août 2007 à 21:40 +0200, Luca a écrit :
> On 8/21/07, Matthew Kent <mkent@magoazul.com> wrote:
> > On Sat, 2007-18-08 at 01:11 +0200, Luca Tettamanti wrote:
> > > plain text document attachment (clock-hpet)
> > > Linux operates the HPET timer in legacy replacement mode, which means that
> > > the periodic interrupt of the CMOS RTC is not delivered (qemu won't be able
> > > to use /dev/rtc). Add support for HPET (/dev/hpet) as a replacement for the
> > > RTC; the periodic interrupt is delivered via SIGIO and is handled in the
> > > same way as the RTC timer.
> > >
> > > Signed-off-by: Luca Tettamanti <kronos.it@gmail.com>
> >
> > I must be missing something silly here.. should I be able to open more
> > than one instance of qemu with -clock hpet? Because upon invoking a
> > second instance of qemu HPET_IE_ON fails.
> 
> It depends on your hardware. Theoretically it's possible, but I've yet
> to see a motherboard with more than one periodic timer.
> 
> "dmesg | grep hpet" should tell you something like:
> 
> hpet0: 3 64-bit timers, 14318180 Hz
> 
> Luca
> 
> 
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2007-09-03  8:41 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-17 23:11 [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Luca Tettamanti
2007-08-17 23:11 ` [Qemu-devel] [PATCH 1/4] Rework alarm timer infrastrucure Luca Tettamanti
2007-08-17 23:11 ` [Qemu-devel] [PATCH 2/4] Add -clock option Luca Tettamanti
2007-08-17 23:11 ` [Qemu-devel] [PATCH 3/4] Add support for HPET periodic timer Luca Tettamanti
2007-08-21 19:24   ` Matthew Kent
2007-08-21 19:40     ` Luca
2007-08-21 20:15       ` Matthew Kent
2007-08-22  6:48         ` [kvm-devel] " Dan Kenigsberg
2007-08-22  7:03           ` Avi Kivity
2007-08-22 12:34             ` Andi Kleen
2007-08-22 21:11               ` Dan Kenigsberg
2007-08-22 22:09                 ` Andi Kleen
2007-08-23  7:02                   ` Dan Kenigsberg
2007-08-24 20:18                     ` Luca
2007-08-25  8:24                       ` Dan Kenigsberg
2007-09-03  8:40       ` GUERRAZ Francois
2007-08-17 23:11 ` [Qemu-devel] [PATCH 4/4] Add support for dynamic ticks Luca Tettamanti
2007-08-17 23:48 ` [Qemu-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take 2 Christian MICHON
2007-08-18  0:10   ` [kvm-devel] " Luca
2007-08-18 15:17 ` [Qemu-devel] Re: [kvm-devel] " Anthony Liguori
2007-08-18 16:53   ` [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Dor Laor
2007-08-18 22:02     ` [Qemu-devel] " Luca Tettamanti
2007-08-18 23:58       ` Anthony Liguori
2007-08-19  7:36         ` [Qemu-devel] RE: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure -take2 Dor Laor
2007-08-19  8:24         ` [Qemu-devel] Re: [kvm-devel] [PATCH 0/4] Rework alarm timer infrastrucure - take2 Avi Kivity
2007-08-19 13:10           ` Jamie Lokier
2007-08-19 13:48             ` [kvm-devel] [Qemu-devel] " Avi Kivity
2007-08-19 13:57               ` Paul Brook
2007-08-19 14:07                 ` Avi Kivity
2007-08-19 14:27                   ` Dor Laor
2007-08-20  9:25                   ` Avi Kivity
2007-08-19 17:15                 ` Jamie Lokier
2007-08-19 19:29                   ` [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarmtimer " Dor Laor
2007-08-19 19:30                   ` [kvm-devel] [Qemu-devel] Re: [PATCH 0/4] Rework alarm timer " Avi Kivity
2007-08-19 16:52       ` [Qemu-devel] Re: [kvm-devel] " Luca
2007-08-19 19:31         ` Avi Kivity
2007-08-20 21:20           ` Luca Tettamanti
2007-08-20 21:55             ` malc
2007-08-20 22:49               ` [kvm-devel] [Qemu-devel] " Luca
2007-08-21 12:09             ` [Qemu-devel] Re: [kvm-devel] " Avi Kivity
2007-08-21 19:38               ` Luca Tettamanti
2007-08-21 19:44                 ` malc
2007-08-22  5:02                 ` Avi Kivity
2007-08-22 16:12                   ` Luca Tettamanti
2007-08-22 16:21                     ` Avi Kivity
2007-08-22 16:38                       ` Luca
2007-08-22 16:45                         ` Avi Kivity
2007-08-22 17:23                           ` Luca
2007-08-22 17:39                             ` Luca
2007-08-22 19:21                             ` Luca
2007-08-22 21:35                               ` [Qemu-devel] " Dor Laor
2007-08-22 22:07                                 ` [Qemu-devel] " Luca
2007-08-22 20:42                         ` Dan Kenigsberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).