* Re: [Qemu-devel] [RFC]Two ideas to optimize updating irq routing table
@ 2014-03-26 8:22 ` Gonglei (Arei)
0 siblings, 0 replies; 16+ messages in thread
From: Gonglei (Arei) @ 2014-03-26 8:22 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel@nongnu.org, kvm@vger.kernel.org
Cc: Huangweidong (C), gleb@redhat.com, mst@redhat.com,
Christian Borntraeger, avi.kivity@gmail.com,
Herongguang (Stephen)
> > Based on discussions in:
> > http://lists.gnu.org/archive/html/qemu-devel/2013-11/threads.html#03322
> >
> > About KVM_SET_GSI_ROUTING ioctl, I tested changing RCU to SRCU, but
> unfortunately
> > it looks like SRCU's grace period is no better than RCU.
>
> Really? This is not what Christian Borntraeger claimed at
> http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/118083 -- in
> fact, Andrew Theurer is currently testing a variant of that patch and I
> was going to post it for 3.16.
>
> Have you tried synchronize_srcu_expedited? Unlike the RCU variant, you
> can use this function.
>
Yes, previously I was using synchronize_srcu, which is not good. When I
changed it to synchronize_srcu_expedited, grace period delay is much better
than synchronize_srcu. Though in our tests, we can still see some impact
of KVM_SET_GSI_ROUTING ioctl.
Our testing scenario is like this. In VM we run a script that sets smp_affinity
for each IRQ every 0.5s (this leads QEMU to do KVM_SET_GSI_ROUTING ioctl).
Outside the VM we ping that VM.
Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
overall good, though sometimes ping time jump to 1ms-3ms.
With following raw patch, ping time is like call_rcu patch, that not influenced
by setting IRQ affinity, keeps 0.3ms, and there is no vulnerability, frequent
intermidiate KVM_SET_GSI_ROUTING settings are just skipped, and always the newest
setting would take effect.
Would you think this patch is acceptable or has problem? Thanks in advance.
diff -urp kvm_kmod/include/linux/kvm_host.h kvm_kmod_new//include/linux/kvm_host.h
--- kvm_kmod/include/linux/kvm_host.h 2014-03-10 14:08:28.000000000 +0000
+++ kvm_kmod_new//include/linux/kvm_host.h 2014-03-26 15:07:48.000000000 +0000
@@ -337,6 +337,12 @@ struct kvm {
struct mutex irq_lock;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
+ struct task_struct *kthread;
+ wait_queue_head_t wq;
+ struct mutex irq_routing_lock;
+ struct kvm_irq_routing to_update_routing;
+ struct kvm_irq_routing_entry *to_update_entries;
+ atomic_t have_new;
/*
* Update side is protected by irq_lock and,
* if configured, irqfds.lock.
diff -urp kvm_kmod/x86/assigned-dev.c kvm_kmod_new//x86/assigned-dev.c
--- kvm_kmod/x86/assigned-dev.c 2014-03-10 14:08:28.000000000 +0000
+++ kvm_kmod_new//x86/assigned-dev.c 2014-03-26 15:22:33.000000000 +0000
@@ -1056,12 +1056,23 @@ long kvm_vm_ioctl_assigned_device(struct
r = -EFAULT;
urouting = argp;
if (copy_from_user(entries, urouting->entries,
- routing.nr * sizeof(*entries)))
- goto out_free_irq_routing;
- r = kvm_set_irq_routing(kvm, entries, routing.nr,
- routing.flags);
- out_free_irq_routing:
- vfree(entries);
+ routing.nr * sizeof(*entries))) {
+ vfree(entries);
+ break;
+ }
+
+ mutex_lock(&kvm->irq_routing_lock);
+ if (kvm->to_update_entries) {
+ vfree(kvm->to_update_entries);
+ kvm->to_update_entries = NULL;
+ }
+ kvm->to_update_routing = routing;
+ kvm->to_update_entries = entries;
+ atomic_set(&kvm->have_new, 1);
+ mutex_unlock(&kvm->irq_routing_lock);
+
+ wake_up(&kvm->wq);
+ r = 0; /* parameter validity or memory alloc avalibity should be checked before return */
break;
}
#endif /* KVM_CAP_IRQ_ROUTING */
diff -urp kvm_kmod/x86/kvm_main.c kvm_kmod_new//x86/kvm_main.c
--- kvm_kmod/x86/kvm_main.c 2014-03-10 14:08:28.000000000 +0000
+++ kvm_kmod_new//x86/kvm_main.c 2014-03-26 15:27:02.000000000 +0000
@@ -83,6 +83,7 @@
#include <linux/slab.h>
#include <linux/sort.h>
#include <linux/bsearch.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -501,6 +502,49 @@ static void kvm_init_memslots_id(struct
slots->id_to_index[i] = slots->memslots[i].id = i;
}
+static int do_irq_routing_table_update(struct kvm *kvm)
+{
+ struct kvm_irq_routing_entry *entries;
+ unsigned nr;
+ unsigned flags;
+
+ mutex_lock(&kvm->irq_routing_lock);
+ BUG_ON(!kvm->to_update_entries);
+
+ nr = kvm->to_update_routing.nr;
+ flags = kvm->to_update_routing.flags;
+ entries = vmalloc(nr * sizeof(*entries));
+ if (!entries) {
+ mutex_unlock(&kvm->irq_routing_lock);
+ return 0;
+ }
+ /* this memcpy can be eliminated by doing set in mutex_lock and doing synchronize_rcu outside */
+ memcpy(entries, kvm->to_update_entries, nr * sizeof(*entries));
+
+ atomic_set(&kvm->have_new, 0);
+ mutex_unlock(&kvm->irq_routing_lock);
+
+ kvm_set_irq_routing(kvm, entries, nr, flags);
+
+ return 0;
+}
+
+static int do_irq_routing_rcu(void *data)
+{
+ struct kvm *kvm = (struct kvm *)data;
+
+ while (1) {
+ wait_event_interruptible(kvm->wq,
+ atomic_read(&kvm->have_new) || kthread_should_stop());
+
+ if (kthread_should_stop())
+ break;
+
+ do_irq_routing_table_update(kvm);
+ }
+
+ return 0;
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
@@ -529,6 +573,12 @@ static struct kvm *kvm_create_vm(unsigne
kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
goto out_err_nosrcu;
+
+ atomic_set(&kvm->have_new, 0);
+ init_waitqueue_head(&kvm->wq);
+ mutex_init(&kvm->irq_routing_lock);
+ kvm->kthread = kthread_run(do_irq_routing_rcu, kvm, "irq_routing");
+
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
@@ -635,6 +685,11 @@ static void kvm_destroy_vm(struct kvm *k
list_del(&kvm->vm_list);
raw_spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
+
+ kthread_stop(kvm->kthread);
+ if (kvm->to_update_entries)
+ vfree(kvm->to_update_entries);
+
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
kvm_coalesced_mmio_free(kvm);
Best regards,
-Gonglei
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [RFC]Two ideas to optimize updating irq routing table
2014-03-26 8:22 ` [Qemu-devel] " Gonglei (Arei)
@ 2014-03-26 9:37 ` Paolo Bonzini
-1 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2014-03-26 9:37 UTC (permalink / raw)
To: Gonglei (Arei), qemu-devel@nongnu.org, kvm@vger.kernel.org
Cc: gleb@redhat.com, mst@redhat.com, avi.kivity@gmail.com,
Herongguang (Stephen), Huangweidong (C), Christian Borntraeger
Il 26/03/2014 09:22, Gonglei (Arei) ha scritto:
> Yes, previously I was using synchronize_srcu, which is not good. When I
> changed it to synchronize_srcu_expedited, grace period delay is much better
> than synchronize_srcu. Though in our tests, we can still see some impact
> of KVM_SET_GSI_ROUTING ioctl.
>
> Our testing scenario is like this. In VM we run a script that sets smp_affinity
> for each IRQ every 0.5s (this leads QEMU to do KVM_SET_GSI_ROUTING ioctl).
> Outside the VM we ping that VM.
Does the affinity actually change every 0.5s?
> Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
> patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
> overall good, though sometimes ping time jump to 1ms-3ms.
>
> With following raw patch, ping time is like call_rcu patch, that not influenced
> by setting IRQ affinity, keeps 0.3ms, and there is no vulnerability, frequent
> intermidiate KVM_SET_GSI_ROUTING settings are just skipped, and always the newest
> setting would take effect.
Interesting, but it only works for assigned-dev.c which is deprecated.
If you used VFIO you'd see no improvement, and Christian's s390 usecase
would also see no improvement.
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [RFC]Two ideas to optimize updating irq routing table
@ 2014-03-26 9:37 ` Paolo Bonzini
0 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2014-03-26 9:37 UTC (permalink / raw)
To: Gonglei (Arei), qemu-devel@nongnu.org, kvm@vger.kernel.org
Cc: Huangweidong (C), gleb@redhat.com, mst@redhat.com,
Christian Borntraeger, avi.kivity@gmail.com,
Herongguang (Stephen)
Il 26/03/2014 09:22, Gonglei (Arei) ha scritto:
> Yes, previously I was using synchronize_srcu, which is not good. When I
> changed it to synchronize_srcu_expedited, grace period delay is much better
> than synchronize_srcu. Though in our tests, we can still see some impact
> of KVM_SET_GSI_ROUTING ioctl.
>
> Our testing scenario is like this. In VM we run a script that sets smp_affinity
> for each IRQ every 0.5s (this leads QEMU to do KVM_SET_GSI_ROUTING ioctl).
> Outside the VM we ping that VM.
Does the affinity actually change every 0.5s?
> Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
> patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
> overall good, though sometimes ping time jump to 1ms-3ms.
>
> With following raw patch, ping time is like call_rcu patch, that not influenced
> by setting IRQ affinity, keeps 0.3ms, and there is no vulnerability, frequent
> intermidiate KVM_SET_GSI_ROUTING settings are just skipped, and always the newest
> setting would take effect.
Interesting, but it only works for assigned-dev.c which is deprecated.
If you used VFIO you'd see no improvement, and Christian's s390 usecase
would also see no improvement.
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC]Two ideas to optimize updating irq routing table
2014-03-26 8:22 ` [Qemu-devel] " Gonglei (Arei)
@ 2014-03-26 11:14 ` Christian Borntraeger
-1 siblings, 0 replies; 16+ messages in thread
From: Christian Borntraeger @ 2014-03-26 11:14 UTC (permalink / raw)
To: Gonglei (Arei), Paolo Bonzini, qemu-devel@nongnu.org,
kvm@vger.kernel.org
Cc: gleb@redhat.com, mst@redhat.com, avi.kivity@gmail.com,
Herongguang (Stephen), Huangweidong (C)
On 26/03/14 09:22, Gonglei (Arei) wrote:
> Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
> patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
> overall good, though sometimes ping time jump to 1ms-3ms.
Just to understand whats going on, does something like
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 3318d82..432c2a2 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
*/
#define SRCU_RETRY_CHECK_DELAY 5
#define SYNCHRONIZE_SRCU_TRYCOUNT 2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
+#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 50
/*
* @@@ Wait until all pre-existing readers complete. Such readers
make the problem go away?
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [RFC]Two ideas to optimize updating irq routing table
@ 2014-03-26 11:14 ` Christian Borntraeger
0 siblings, 0 replies; 16+ messages in thread
From: Christian Borntraeger @ 2014-03-26 11:14 UTC (permalink / raw)
To: Gonglei (Arei), Paolo Bonzini, qemu-devel@nongnu.org,
kvm@vger.kernel.org
Cc: Herongguang (Stephen), Huangweidong (C), avi.kivity@gmail.com,
gleb@redhat.com, mst@redhat.com
On 26/03/14 09:22, Gonglei (Arei) wrote:
> Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
> patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
> overall good, though sometimes ping time jump to 1ms-3ms.
Just to understand whats going on, does something like
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 3318d82..432c2a2 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
*/
#define SRCU_RETRY_CHECK_DELAY 5
#define SYNCHRONIZE_SRCU_TRYCOUNT 2
-#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
+#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 50
/*
* @@@ Wait until all pre-existing readers complete. Such readers
make the problem go away?
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [RFC]Two ideas to optimize updating irq routing table
2014-03-26 8:22 ` [Qemu-devel] " Gonglei (Arei)
@ 2014-03-26 11:45 ` Michael S. Tsirkin
-1 siblings, 0 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2014-03-26 11:45 UTC (permalink / raw)
To: Gonglei (Arei)
Cc: Paolo Bonzini, qemu-devel@nongnu.org, kvm@vger.kernel.org,
Huangweidong (C), gleb@redhat.com, Christian Borntraeger,
avi.kivity@gmail.com, Herongguang (Stephen)
On Wed, Mar 26, 2014 at 08:22:29AM +0000, Gonglei (Arei) wrote:
> > > Based on discussions in:
> > > http://lists.gnu.org/archive/html/qemu-devel/2013-11/threads.html#03322
> > >
> > > About KVM_SET_GSI_ROUTING ioctl, I tested changing RCU to SRCU, but
> > unfortunately
> > > it looks like SRCU's grace period is no better than RCU.
> >
> > Really? This is not what Christian Borntraeger claimed at
> > http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/118083 -- in
> > fact, Andrew Theurer is currently testing a variant of that patch and I
> > was going to post it for 3.16.
> >
> > Have you tried synchronize_srcu_expedited? Unlike the RCU variant, you
> > can use this function.
> >
> Yes, previously I was using synchronize_srcu, which is not good. When I
> changed it to synchronize_srcu_expedited, grace period delay is much better
> than synchronize_srcu. Though in our tests, we can still see some impact
> of KVM_SET_GSI_ROUTING ioctl.
>
> Our testing scenario is like this. In VM we run a script that sets smp_affinity
> for each IRQ every 0.5s (this leads QEMU to do KVM_SET_GSI_ROUTING ioctl).
> Outside the VM we ping that VM.
>
> Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
> patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
> overall good, though sometimes ping time jump to 1ms-3ms.
>
> With following raw patch, ping time is like call_rcu patch, that not influenced
> by setting IRQ affinity, keeps 0.3ms, and there is no vulnerability, frequent
> intermidiate KVM_SET_GSI_ROUTING settings are just skipped, and always the newest
> setting would take effect.
>
> Would you think this patch is acceptable or has problem? Thanks in advance.
So this will just queue the update and never bother waiting for it
to take effect.
I'm not sure it's safe in all cases, but maybe we can optimize out
some cases, by detecting in qemu that changes only affect the VCPU that isn't running,
and telling KVM that it can delay the expensive synchronization.
> diff -urp kvm_kmod/include/linux/kvm_host.h kvm_kmod_new//include/linux/kvm_host.h
> --- kvm_kmod/include/linux/kvm_host.h 2014-03-10 14:08:28.000000000 +0000
> +++ kvm_kmod_new//include/linux/kvm_host.h 2014-03-26 15:07:48.000000000 +0000
> @@ -337,6 +337,12 @@ struct kvm {
>
> struct mutex irq_lock;
> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> + struct task_struct *kthread;
> + wait_queue_head_t wq;
> + struct mutex irq_routing_lock;
> + struct kvm_irq_routing to_update_routing;
> + struct kvm_irq_routing_entry *to_update_entries;
> + atomic_t have_new;
> /*
> * Update side is protected by irq_lock and,
> * if configured, irqfds.lock.
> diff -urp kvm_kmod/x86/assigned-dev.c kvm_kmod_new//x86/assigned-dev.c
> --- kvm_kmod/x86/assigned-dev.c 2014-03-10 14:08:28.000000000 +0000
> +++ kvm_kmod_new//x86/assigned-dev.c 2014-03-26 15:22:33.000000000 +0000
> @@ -1056,12 +1056,23 @@ long kvm_vm_ioctl_assigned_device(struct
> r = -EFAULT;
> urouting = argp;
> if (copy_from_user(entries, urouting->entries,
> - routing.nr * sizeof(*entries)))
> - goto out_free_irq_routing;
> - r = kvm_set_irq_routing(kvm, entries, routing.nr,
> - routing.flags);
> - out_free_irq_routing:
> - vfree(entries);
> + routing.nr * sizeof(*entries))) {
> + vfree(entries);
> + break;
> + }
> +
> + mutex_lock(&kvm->irq_routing_lock);
> + if (kvm->to_update_entries) {
> + vfree(kvm->to_update_entries);
> + kvm->to_update_entries = NULL;
> + }
> + kvm->to_update_routing = routing;
> + kvm->to_update_entries = entries;
> + atomic_set(&kvm->have_new, 1);
> + mutex_unlock(&kvm->irq_routing_lock);
> +
> + wake_up(&kvm->wq);
> + r = 0; /* parameter validity or memory alloc avalibity should be checked before return */
> break;
> }
> #endif /* KVM_CAP_IRQ_ROUTING */
> diff -urp kvm_kmod/x86/kvm_main.c kvm_kmod_new//x86/kvm_main.c
> --- kvm_kmod/x86/kvm_main.c 2014-03-10 14:08:28.000000000 +0000
> +++ kvm_kmod_new//x86/kvm_main.c 2014-03-26 15:27:02.000000000 +0000
> @@ -83,6 +83,7 @@
> #include <linux/slab.h>
> #include <linux/sort.h>
> #include <linux/bsearch.h>
> +#include <linux/kthread.h>
>
> #include <asm/processor.h>
> #include <asm/io.h>
> @@ -501,6 +502,49 @@ static void kvm_init_memslots_id(struct
> slots->id_to_index[i] = slots->memslots[i].id = i;
> }
>
> +static int do_irq_routing_table_update(struct kvm *kvm)
> +{
> + struct kvm_irq_routing_entry *entries;
> + unsigned nr;
> + unsigned flags;
> +
> + mutex_lock(&kvm->irq_routing_lock);
> + BUG_ON(!kvm->to_update_entries);
> +
> + nr = kvm->to_update_routing.nr;
> + flags = kvm->to_update_routing.flags;
> + entries = vmalloc(nr * sizeof(*entries));
> + if (!entries) {
> + mutex_unlock(&kvm->irq_routing_lock);
> + return 0;
> + }
> + /* this memcpy can be eliminated by doing set in mutex_lock and doing synchronize_rcu outside */
> + memcpy(entries, kvm->to_update_entries, nr * sizeof(*entries));
> +
> + atomic_set(&kvm->have_new, 0);
> + mutex_unlock(&kvm->irq_routing_lock);
> +
> + kvm_set_irq_routing(kvm, entries, nr, flags);
> +
> + return 0;
> +}
> +
> +static int do_irq_routing_rcu(void *data)
> +{
> + struct kvm *kvm = (struct kvm *)data;
> +
> + while (1) {
> + wait_event_interruptible(kvm->wq,
> + atomic_read(&kvm->have_new) || kthread_should_stop());
> +
> + if (kthread_should_stop())
> + break;
> +
> + do_irq_routing_table_update(kvm);
> + }
> +
> + return 0;
> +}
> +
> static struct kvm *kvm_create_vm(unsigned long type)
> {
> int r, i;
> @@ -529,6 +573,12 @@ static struct kvm *kvm_create_vm(unsigne
> kvm_init_memslots_id(kvm);
> if (init_srcu_struct(&kvm->srcu))
> goto out_err_nosrcu;
> +
> + atomic_set(&kvm->have_new, 0);
> + init_waitqueue_head(&kvm->wq);
> + mutex_init(&kvm->irq_routing_lock);
> + kvm->kthread = kthread_run(do_irq_routing_rcu, kvm, "irq_routing");
> +
> for (i = 0; i < KVM_NR_BUSES; i++) {
> kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
> GFP_KERNEL);
> @@ -635,6 +685,11 @@ static void kvm_destroy_vm(struct kvm *k
> list_del(&kvm->vm_list);
> raw_spin_unlock(&kvm_lock);
> kvm_free_irq_routing(kvm);
> +
> + kthread_stop(kvm->kthread);
> + if (kvm->to_update_entries)
> + vfree(kvm->to_update_entries);
> +
> for (i = 0; i < KVM_NR_BUSES; i++)
> kvm_io_bus_destroy(kvm->buses[i]);
> kvm_coalesced_mmio_free(kvm);
>
>
> Best regards,
> -Gonglei
^ permalink raw reply [flat|nested] 16+ messages in thread* Re: [Qemu-devel] [RFC]Two ideas to optimize updating irq routing table
@ 2014-03-26 11:45 ` Michael S. Tsirkin
0 siblings, 0 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2014-03-26 11:45 UTC (permalink / raw)
To: Gonglei (Arei)
Cc: Huangweidong (C), kvm@vger.kernel.org, gleb@redhat.com,
qemu-devel@nongnu.org, Christian Borntraeger,
avi.kivity@gmail.com, Paolo Bonzini, Herongguang (Stephen)
On Wed, Mar 26, 2014 at 08:22:29AM +0000, Gonglei (Arei) wrote:
> > > Based on discussions in:
> > > http://lists.gnu.org/archive/html/qemu-devel/2013-11/threads.html#03322
> > >
> > > About KVM_SET_GSI_ROUTING ioctl, I tested changing RCU to SRCU, but
> > unfortunately
> > > it looks like SRCU's grace period is no better than RCU.
> >
> > Really? This is not what Christian Borntraeger claimed at
> > http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/118083 -- in
> > fact, Andrew Theurer is currently testing a variant of that patch and I
> > was going to post it for 3.16.
> >
> > Have you tried synchronize_srcu_expedited? Unlike the RCU variant, you
> > can use this function.
> >
> Yes, previously I was using synchronize_srcu, which is not good. When I
> changed it to synchronize_srcu_expedited, grace period delay is much better
> than synchronize_srcu. Though in our tests, we can still see some impact
> of KVM_SET_GSI_ROUTING ioctl.
>
> Our testing scenario is like this. In VM we run a script that sets smp_affinity
> for each IRQ every 0.5s (this leads QEMU to do KVM_SET_GSI_ROUTING ioctl).
> Outside the VM we ping that VM.
>
> Without patches, ping time can jump from 0.3ms to 2ms-30ms. With synchronize_srcu
> patch, ping time is worse. With synchronize_srcu_expedited patch, ping time is
> overall good, though sometimes ping time jump to 1ms-3ms.
>
> With following raw patch, ping time is like call_rcu patch, that not influenced
> by setting IRQ affinity, keeps 0.3ms, and there is no vulnerability, frequent
> intermidiate KVM_SET_GSI_ROUTING settings are just skipped, and always the newest
> setting would take effect.
>
> Would you think this patch is acceptable or has problem? Thanks in advance.
So this will just queue the update and never bother waiting for it
to take effect.
I'm not sure it's safe in all cases, but maybe we can optimize out
some cases, by detecting in qemu that changes only affect the VCPU that isn't running,
and telling KVM that it can delay the expensive synchronization.
> diff -urp kvm_kmod/include/linux/kvm_host.h kvm_kmod_new//include/linux/kvm_host.h
> --- kvm_kmod/include/linux/kvm_host.h 2014-03-10 14:08:28.000000000 +0000
> +++ kvm_kmod_new//include/linux/kvm_host.h 2014-03-26 15:07:48.000000000 +0000
> @@ -337,6 +337,12 @@ struct kvm {
>
> struct mutex irq_lock;
> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> + struct task_struct *kthread;
> + wait_queue_head_t wq;
> + struct mutex irq_routing_lock;
> + struct kvm_irq_routing to_update_routing;
> + struct kvm_irq_routing_entry *to_update_entries;
> + atomic_t have_new;
> /*
> * Update side is protected by irq_lock and,
> * if configured, irqfds.lock.
> diff -urp kvm_kmod/x86/assigned-dev.c kvm_kmod_new//x86/assigned-dev.c
> --- kvm_kmod/x86/assigned-dev.c 2014-03-10 14:08:28.000000000 +0000
> +++ kvm_kmod_new//x86/assigned-dev.c 2014-03-26 15:22:33.000000000 +0000
> @@ -1056,12 +1056,23 @@ long kvm_vm_ioctl_assigned_device(struct
> r = -EFAULT;
> urouting = argp;
> if (copy_from_user(entries, urouting->entries,
> - routing.nr * sizeof(*entries)))
> - goto out_free_irq_routing;
> - r = kvm_set_irq_routing(kvm, entries, routing.nr,
> - routing.flags);
> - out_free_irq_routing:
> - vfree(entries);
> + routing.nr * sizeof(*entries))) {
> + vfree(entries);
> + break;
> + }
> +
> + mutex_lock(&kvm->irq_routing_lock);
> + if (kvm->to_update_entries) {
> + vfree(kvm->to_update_entries);
> + kvm->to_update_entries = NULL;
> + }
> + kvm->to_update_routing = routing;
> + kvm->to_update_entries = entries;
> + atomic_set(&kvm->have_new, 1);
> + mutex_unlock(&kvm->irq_routing_lock);
> +
> + wake_up(&kvm->wq);
> + r = 0; /* parameter validity or memory alloc avalibity should be checked before return */
> break;
> }
> #endif /* KVM_CAP_IRQ_ROUTING */
> diff -urp kvm_kmod/x86/kvm_main.c kvm_kmod_new//x86/kvm_main.c
> --- kvm_kmod/x86/kvm_main.c 2014-03-10 14:08:28.000000000 +0000
> +++ kvm_kmod_new//x86/kvm_main.c 2014-03-26 15:27:02.000000000 +0000
> @@ -83,6 +83,7 @@
> #include <linux/slab.h>
> #include <linux/sort.h>
> #include <linux/bsearch.h>
> +#include <linux/kthread.h>
>
> #include <asm/processor.h>
> #include <asm/io.h>
> @@ -501,6 +502,49 @@ static void kvm_init_memslots_id(struct
> slots->id_to_index[i] = slots->memslots[i].id = i;
> }
>
> +static int do_irq_routing_table_update(struct kvm *kvm)
> +{
> + struct kvm_irq_routing_entry *entries;
> + unsigned nr;
> + unsigned flags;
> +
> + mutex_lock(&kvm->irq_routing_lock);
> + BUG_ON(!kvm->to_update_entries);
> +
> + nr = kvm->to_update_routing.nr;
> + flags = kvm->to_update_routing.flags;
> + entries = vmalloc(nr * sizeof(*entries));
> + if (!entries) {
> + mutex_unlock(&kvm->irq_routing_lock);
> + return 0;
> + }
> + /* this memcpy can be eliminated by doing set in mutex_lock and doing synchronize_rcu outside */
> + memcpy(entries, kvm->to_update_entries, nr * sizeof(*entries));
> +
> + atomic_set(&kvm->have_new, 0);
> + mutex_unlock(&kvm->irq_routing_lock);
> +
> + kvm_set_irq_routing(kvm, entries, nr, flags);
> +
> + return 0;
> +}
> +
> +static int do_irq_routing_rcu(void *data)
> +{
> + struct kvm *kvm = (struct kvm *)data;
> +
> + while (1) {
> + wait_event_interruptible(kvm->wq,
> + atomic_read(&kvm->have_new) || kthread_should_stop());
> +
> + if (kthread_should_stop())
> + break;
> +
> + do_irq_routing_table_update(kvm);
> + }
> +
> + return 0;
> +}
> +
> static struct kvm *kvm_create_vm(unsigned long type)
> {
> int r, i;
> @@ -529,6 +573,12 @@ static struct kvm *kvm_create_vm(unsigne
> kvm_init_memslots_id(kvm);
> if (init_srcu_struct(&kvm->srcu))
> goto out_err_nosrcu;
> +
> + atomic_set(&kvm->have_new, 0);
> + init_waitqueue_head(&kvm->wq);
> + mutex_init(&kvm->irq_routing_lock);
> + kvm->kthread = kthread_run(do_irq_routing_rcu, kvm, "irq_routing");
> +
> for (i = 0; i < KVM_NR_BUSES; i++) {
> kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
> GFP_KERNEL);
> @@ -635,6 +685,11 @@ static void kvm_destroy_vm(struct kvm *k
> list_del(&kvm->vm_list);
> raw_spin_unlock(&kvm_lock);
> kvm_free_irq_routing(kvm);
> +
> + kthread_stop(kvm->kthread);
> + if (kvm->to_update_entries)
> + vfree(kvm->to_update_entries);
> +
> for (i = 0; i < KVM_NR_BUSES; i++)
> kvm_io_bus_destroy(kvm->buses[i]);
> kvm_coalesced_mmio_free(kvm);
>
>
> Best regards,
> -Gonglei
^ permalink raw reply [flat|nested] 16+ messages in thread