[PATCH] softmmu/dirtylimit: Fix usleep early return on signal

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] softmmu/dirtylimit: Fix usleep early return on signal
@ 2023-09-01  2:18 alloc.young
  2023-09-04 13:27 ` Yong Huang
  0 siblings, 1 reply; 5+ messages in thread
From: alloc.young @ 2023-09-01  2:18 UTC (permalink / raw)
  To: yong.huang; +Cc: qemu-devel, alloc

From: alloc <alloc.young@outlook.com>

Timeout functions like usleep can return early on signal, which reduces
more dirty pages than expected. In dirtylimit case, dirtyrate meter
thread needs to kick all vcpus out to sync. The callchain:

vcpu_calculate_dirtyrate
    global_dirty_log_sync
        memory_global_dirty_log_sync
            kvm_log_sync_global
                kvm_dirty_ring_flush
                    kvm_cpu_synchronize_kick_all <---- send vcpu signal

For long time sleep, use qemu_cond_timedwait_iothread to handle cpu stop
event.

Signed-off-by: alloc <alloc.young@outlook.com>
---
 softmmu/dirtylimit.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
index fa959d7743..ee938c636d 100644
--- a/softmmu/dirtylimit.c
+++ b/softmmu/dirtylimit.c
@@ -411,13 +411,28 @@ void dirtylimit_set_all(uint64_t quota,
 
 void dirtylimit_vcpu_execute(CPUState *cpu)
 {
+    int64_t sleep_us, endtime_us;
+
+    dirtylimit_state_lock();
     if (dirtylimit_in_service() &&
         dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
         cpu->throttle_us_per_full) {
         trace_dirtylimit_vcpu_execute(cpu->cpu_index,
                 cpu->throttle_us_per_full);
-        usleep(cpu->throttle_us_per_full);
-    }
+        sleep_us = cpu->throttle_us_per_full;
+        dirtylimit_state_unlock();
+        endtime_us = qemu_clock_get_us(QEMU_CLOCK_REALTIME) + sleep_us;
+        while (sleep_us > 0 && !cpu->stop) {
+            if (sleep_us > SCALE_US) {
+                qemu_mutex_lock_iothread();
+                qemu_cond_timedwait_iothread(cpu->halt_cond, sleep_us / SCALE_US);
+                qemu_mutex_unlock_iothread();
+            } else
+                g_usleep(sleep_us);
+            sleep_us = endtime_us - qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+        }
+    } else
+        dirtylimit_state_unlock();
 }
 
 static void dirtylimit_init(void)
-- 
2.39.3



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] softmmu/dirtylimit: Fix usleep early return on signal
  2023-09-01  2:18 [PATCH] softmmu/dirtylimit: Fix usleep early return on signal alloc.young
@ 2023-09-04 13:27 ` Yong Huang
  2023-09-05  2:17   ` alloc young
  2023-09-08  1:11   ` alloc young
  0 siblings, 2 replies; 5+ messages in thread
From: Yong Huang @ 2023-09-04 13:27 UTC (permalink / raw)
  To: alloc.young; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2553 bytes --]

On Fri, Sep 1, 2023 at 10:19 AM <alloc.young@outlook.com> wrote:

> From: alloc <alloc.young@outlook.com>
>
> Timeout functions like usleep can return early on signal, which reduces
> more dirty pages than expected. In dirtylimit case, dirtyrate meter
> thread needs to kick all vcpus out to sync. The callchain:
>
> vcpu_calculate_dirtyrate
>     global_dirty_log_sync
>         memory_global_dirty_log_sync
>             kvm_log_sync_global
>                 kvm_dirty_ring_flush
>                     kvm_cpu_synchronize_kick_all <---- send vcpu signal
>
> For long time sleep, use qemu_cond_timedwait_iothread to handle cpu stop
> event.
>
>
The Dirty Limit algorithm seeks to keep the vCPU dirty page rate within
the set limit; since it focuses more emphasis on processing time and
precision, I feel that improvement should strive for the same result.
Could you please provide the final test results showing the impact of
that improvement?


> Signed-off-by: alloc <alloc.young@outlook.com>
> ---
>  softmmu/dirtylimit.c | 19 +++++++++++++++++--
>  1 file changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
> index fa959d7743..ee938c636d 100644
> --- a/softmmu/dirtylimit.c
> +++ b/softmmu/dirtylimit.c
> @@ -411,13 +411,28 @@ void dirtylimit_set_all(uint64_t quota,
>
>  void dirtylimit_vcpu_execute(CPUState *cpu)
>  {
> +    int64_t sleep_us, endtime_us;
> +
> +    dirtylimit_state_lock();
>      if (dirtylimit_in_service() &&
>          dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
>          cpu->throttle_us_per_full) {
>          trace_dirtylimit_vcpu_execute(cpu->cpu_index,
>                  cpu->throttle_us_per_full);
> -        usleep(cpu->throttle_us_per_full);
> -    }
> +        sleep_us = cpu->throttle_us_per_full;
> +        dirtylimit_state_unlock();
> +        endtime_us = qemu_clock_get_us(QEMU_CLOCK_REALTIME) + sleep_us;
> +        while (sleep_us > 0 && !cpu->stop) {
> +            if (sleep_us > SCALE_US) {
> +                qemu_mutex_lock_iothread();
> +                qemu_cond_timedwait_iothread(cpu->halt_cond, sleep_us /
> SCALE_US);
> +                qemu_mutex_unlock_iothread();
> +            } else
> +                g_usleep(sleep_us);
> +            sleep_us = endtime_us -
> qemu_clock_get_us(QEMU_CLOCK_REALTIME);
> +        }
> +    } else
> +        dirtylimit_state_unlock();
>  }
>
>  static void dirtylimit_init(void)
> --
> 2.39.3
>
>

-- 
Best regards

[-- Attachment #2: Type: text/html, Size: 4366 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] softmmu/dirtylimit: Fix usleep early return on signal
  2023-09-04 13:27 ` Yong Huang
@ 2023-09-05  2:17   ` alloc young
  2023-10-16 15:37     ` Hyman Huang
  2023-09-08  1:11   ` alloc young
  1 sibling, 1 reply; 5+ messages in thread
From: alloc young @ 2023-09-05  2:17 UTC (permalink / raw)
  To: Yong Huang; +Cc: qemu-devel



On 2023/9/4 21:27, Yong Huang wrote:
> 
> 
> On Fri, Sep 1, 2023 at 10:19 AM <alloc.young@outlook.com 
> <mailto:alloc.young@outlook.com>> wrote:
> 
>     From: alloc <alloc.young@outlook.com <mailto:alloc.young@outlook.com>>
> 
>     Timeout functions like usleep can return early on signal, which reduces
>     more dirty pages than expected. In dirtylimit case, dirtyrate meter
>     thread needs to kick all vcpus out to sync. The callchain:
> 
>     vcpu_calculate_dirtyrate
>          global_dirty_log_sync
>              memory_global_dirty_log_sync
>                  kvm_log_sync_global
>                      kvm_dirty_ring_flush
>                          kvm_cpu_synchronize_kick_all <---- send vcpu signal
> 
>     For long time sleep, use qemu_cond_timedwait_iothread to handle cpu stop
>     event.
> 
> 
> The Dirty Limit algorithm seeks to keep the vCPU dirty page rate within
> the set limit; since it focuses more emphasis on processing time and
> precision, I feel that improvement should strive for the same result.
> Could you please provide the final test results showing the impact of
> that improvement?
Use the command line below with initrd built in tests/migration, guest 
runs stress at 5GiB/s. Migration with dirty limit on and default 
parameters, migration can't finish within 1 hour. The default vcpu dirty 
limit set is 1 MB/s, however, the mig_src.log show copy rate at 
128MiB/s. With this patch, migration finsih in 39s.

/usr/libexec/qemu-kvm -display none -vga none -name 
mig_src,debug-threads=on -monitor stdio -accel kvm,dirty-ring-size=4096 
-cpu host -kernel /boot/vmlinuz-5.14.0-70.22.1.el9_0.x86_64 -initrd 
/root/initrd-stress.img -append noapic edd=off printk.time=1 
noreplace-smp cgroup_disable=memory pci=noearly console=ttyS0 debug 
ramsize=1 ncpus=1 -chardev file,id=charserial0,path=/var/log/mig_src.log 
-serial chardev:charserial0 -m 1536 -smp 1


> 
>     Signed-off-by: alloc <alloc.young@outlook.com
>     <mailto:alloc.young@outlook.com>>
>     ---
>       softmmu/dirtylimit.c | 19 +++++++++++++++++--
>       1 file changed, 17 insertions(+), 2 deletions(-)
> 
>     diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
>     index fa959d7743..ee938c636d 100644
>     --- a/softmmu/dirtylimit.c
>     +++ b/softmmu/dirtylimit.c
>     @@ -411,13 +411,28 @@ void dirtylimit_set_all(uint64_t quota,
> 
>       void dirtylimit_vcpu_execute(CPUState *cpu)
>       {
>     +    int64_t sleep_us, endtime_us;
>     +
>     +    dirtylimit_state_lock();
>           if (dirtylimit_in_service() &&
>               dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
>               cpu->throttle_us_per_full) {
>               trace_dirtylimit_vcpu_execute(cpu->cpu_index,
>                       cpu->throttle_us_per_full);
>     -        usleep(cpu->throttle_us_per_full);
>     -    }
>     +        sleep_us = cpu->throttle_us_per_full;
>     +        dirtylimit_state_unlock();
>     +        endtime_us = qemu_clock_get_us(QEMU_CLOCK_REALTIME) + sleep_us;
>     +        while (sleep_us > 0 && !cpu->stop) {
>     +            if (sleep_us > SCALE_US) {
>     +                qemu_mutex_lock_iothread();
>     +                qemu_cond_timedwait_iothread(cpu->halt_cond,
>     sleep_us / SCALE_US);
>     +                qemu_mutex_unlock_iothread();
>     +            } else
>     +                g_usleep(sleep_us);
>     +            sleep_us = endtime_us -
>     qemu_clock_get_us(QEMU_CLOCK_REALTIME);
>     +        }
>     +    } else
>     +        dirtylimit_state_unlock();
>       }
> 
>       static void dirtylimit_init(void)
>     -- 
>     2.39.3
> 
> 
> 
> -- 
> Best regards


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] softmmu/dirtylimit: Fix usleep early return on signal
  2023-09-04 13:27 ` Yong Huang
  2023-09-05  2:17   ` alloc young
@ 2023-09-08  1:11   ` alloc young
  1 sibling, 0 replies; 5+ messages in thread
From: alloc young @ 2023-09-08  1:11 UTC (permalink / raw)
  To: Yong Huang; +Cc: qemu-devel



On 2023/9/4 21:27, Yong Huang wrote:
> 
> 
> On Fri, Sep 1, 2023 at 10:19 AM <alloc.young@outlook.com 
> <mailto:alloc.young@outlook.com>> wrote:
> 
>     From: alloc <alloc.young@outlook.com <mailto:alloc.young@outlook.com>>
> 
>     Timeout functions like usleep can return early on signal, which reduces
>     more dirty pages than expected. In dirtylimit case, dirtyrate meter
>     thread needs to kick all vcpus out to sync. The callchain:
> 
>     vcpu_calculate_dirtyrate
>          global_dirty_log_sync
>              memory_global_dirty_log_sync
>                  kvm_log_sync_global
>                      kvm_dirty_ring_flush
>                          kvm_cpu_synchronize_kick_all <---- send vcpu signal
> 
>     For long time sleep, use qemu_cond_timedwait_iothread to handle cpu stop
>     event.
> 
> 
> The Dirty Limit algorithm seeks to keep the vCPU dirty page rate within
> the set limit; since it focuses more emphasis on processing time and
> precision, I feel that improvement should strive for the same result.
> Could you please provide the final test results showing the impact of
> that improvement?

The kvm_cpu_sync in dirty ring flush has to wait all vcpu to exit to run 
sync action, while the vcpu sleep may blocks this. Before this patch, 
the kick can reduce early vcpu return and add more dirty pages. It seems
the kvm_cpu_sync conflicts with vcpu sleep, why not measure dirty rate 
when dirty ring fulls?

> 
>     Signed-off-by: alloc <alloc.young@outlook.com
>     <mailto:alloc.young@outlook.com>>
>     ---
>       softmmu/dirtylimit.c | 19 +++++++++++++++++--
>       1 file changed, 17 insertions(+), 2 deletions(-)
> 
>     diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
>     index fa959d7743..ee938c636d 100644
>     --- a/softmmu/dirtylimit.c
>     +++ b/softmmu/dirtylimit.c
>     @@ -411,13 +411,28 @@ void dirtylimit_set_all(uint64_t quota,
> 
>       void dirtylimit_vcpu_execute(CPUState *cpu)
>       {
>     +    int64_t sleep_us, endtime_us;
>     +
>     +    dirtylimit_state_lock();
>           if (dirtylimit_in_service() &&
>               dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
>               cpu->throttle_us_per_full) {
>               trace_dirtylimit_vcpu_execute(cpu->cpu_index,
>                       cpu->throttle_us_per_full);
>     -        usleep(cpu->throttle_us_per_full);
>     -    }
>     +        sleep_us = cpu->throttle_us_per_full;
>     +        dirtylimit_state_unlock();
>     +        endtime_us = qemu_clock_get_us(QEMU_CLOCK_REALTIME) + sleep_us;
>     +        while (sleep_us > 0 && !cpu->stop) {
>     +            if (sleep_us > SCALE_US) {
>     +                qemu_mutex_lock_iothread();
>     +                qemu_cond_timedwait_iothread(cpu->halt_cond,
>     sleep_us / SCALE_US);
>     +                qemu_mutex_unlock_iothread();
>     +            } else
>     +                g_usleep(sleep_us);
>     +            sleep_us = endtime_us -
>     qemu_clock_get_us(QEMU_CLOCK_REALTIME);
>     +        }
>     +    } else
>     +        dirtylimit_state_unlock();
>       }
> 
>       static void dirtylimit_init(void)
>     -- 
>     2.39.3
> 
> 
> 
> -- 
> Best regards


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] softmmu/dirtylimit: Fix usleep early return on signal
  2023-09-05  2:17   ` alloc young
@ 2023-10-16 15:37     ` Hyman Huang
  0 siblings, 0 replies; 5+ messages in thread
From: Hyman Huang @ 2023-10-16 15:37 UTC (permalink / raw)
  To: alloc young; +Cc: qemu-devel


在 2023/9/5 10:17, alloc young 写道:
>
>
> On 2023/9/4 21:27, Yong Huang wrote:
>>
>>
>> On Fri, Sep 1, 2023 at 10:19 AM <alloc.young@outlook.com 
>> <mailto:alloc.young@outlook.com>> wrote:
>>
>>     From: alloc <alloc.young@outlook.com 
>> <mailto:alloc.young@outlook.com>>
>>
>>     Timeout functions like usleep can return early on signal, which 
>> reduces
>>     more dirty pages than expected. In dirtylimit case, dirtyrate meter
>>     thread needs to kick all vcpus out to sync. The callchain:
>>
>>     vcpu_calculate_dirtyrate
>>          global_dirty_log_sync
>>              memory_global_dirty_log_sync
>>                  kvm_log_sync_global
>>                      kvm_dirty_ring_flush
>>                          kvm_cpu_synchronize_kick_all <---- send vcpu 
>> signal
>>
>>     For long time sleep, use qemu_cond_timedwait_iothread to handle 
>> cpu stop
>>     event.
>>
>>
>> The Dirty Limit algorithm seeks to keep the vCPU dirty page rate within
>> the set limit; since it focuses more emphasis on processing time and
>> precision, I feel that improvement should strive for the same result.
>> Could you please provide the final test results showing the impact of
>> that improvement?
> Use the command line below with initrd built in tests/migration, guest 
> runs stress at 5GiB/s. Migration with dirty limit on and default 
> parameters, migration can't finish within 1 hour. The default vcpu 
> dirty limit set is 1 MB/s, however, the mig_src.log show copy rate at 
> 128MiB/s. With this patch, migration finsih in 39s.
>
> /usr/libexec/qemu-kvm -display none -vga none -name 
> mig_src,debug-threads=on -monitor stdio -accel 
> kvm,dirty-ring-size=4096 -cpu host -kernel 
> /boot/vmlinuz-5.14.0-70.22.1.el9_0.x86_64 -initrd 
> /root/initrd-stress.img -append noapic edd=off printk.time=1 
> noreplace-smp cgroup_disable=memory pci=noearly console=ttyS0 debug 
> ramsize=1 ncpus=1 -chardev 
> file,id=charserial0,path=/var/log/mig_src.log -serial 
> chardev:charserial0 -m 1536 -smp 1
>
I calculated the migration time and success probability using the

QEMU test/migration/guestperf.py script. With the patch, performance

seems to not increase. Would you kindly post the test case's specifics?

IMHO, conducting tests more than once and obtaining probability

statistics data could be more compelling.

Thanks, Yong

>
>>
>>     Signed-off-by: alloc <alloc.young@outlook.com
>>     <mailto:alloc.young@outlook.com>>
>>     ---
>>       softmmu/dirtylimit.c | 19 +++++++++++++++++--
>>       1 file changed, 17 insertions(+), 2 deletions(-)
>>
>>     diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
>>     index fa959d7743..ee938c636d 100644
>>     --- a/softmmu/dirtylimit.c
>>     +++ b/softmmu/dirtylimit.c
>>     @@ -411,13 +411,28 @@ void dirtylimit_set_all(uint64_t quota,
>>
>>       void dirtylimit_vcpu_execute(CPUState *cpu)
>>       {
>>     +    int64_t sleep_us, endtime_us;
>>     +
>>     +    dirtylimit_state_lock();
>>           if (dirtylimit_in_service() &&
>>  dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
>>               cpu->throttle_us_per_full) {
>>               trace_dirtylimit_vcpu_execute(cpu->cpu_index,
>>                       cpu->throttle_us_per_full);
>>     -        usleep(cpu->throttle_us_per_full);
>>     -    }
>>     +        sleep_us = cpu->throttle_us_per_full;
>>     +        dirtylimit_state_unlock();
>>     +        endtime_us = qemu_clock_get_us(QEMU_CLOCK_REALTIME) + 
>> sleep_us;
>>     +        while (sleep_us > 0 && !cpu->stop) {
>>     +            if (sleep_us > SCALE_US) {
>>     +                qemu_mutex_lock_iothread();
>>     + qemu_cond_timedwait_iothread(cpu->halt_cond,
>>     sleep_us / SCALE_US);
>>     +                qemu_mutex_unlock_iothread();
>>     +            } else
>>     +                g_usleep(sleep_us);
>>     +            sleep_us = endtime_us -
>>     qemu_clock_get_us(QEMU_CLOCK_REALTIME);
>>     +        }
>>     +    } else
>>     +        dirtylimit_state_unlock();
>>       }
>>
>>       static void dirtylimit_init(void)
>>     --     2.39.3
>>
>>
>>
>> -- 
>> Best regards


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-10-16 15:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-01  2:18 [PATCH] softmmu/dirtylimit: Fix usleep early return on signal alloc.young
2023-09-04 13:27 ` Yong Huang
2023-09-05  2:17   ` alloc young
2023-10-16 15:37     ` Hyman Huang
2023-09-08  1:11   ` alloc young

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).