* [PATCH 0/4] moving irq routing and notifiers to RCU locking
@ 2009-07-12 12:03 Gleb Natapov
2009-07-12 12:03 ` [PATCH 1/4] Move irq routing data structure to rcu locking Gleb Natapov
` (3 more replies)
0 siblings, 4 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-12 12:03 UTC (permalink / raw)
To: avi; +Cc: kvm
Make PIT to unregister IRQ ack notifier. Move irq ack notifier list in
teh same place where mask notifier list is.
Gleb Natapov (4):
Move irq routing data structure to rcu locking
Unregister ack notifier callback on PIT freeing.
Move irq ack notifier list to arch independent code.
Convert irq notifiers lists to RCU locking.
arch/ia64/include/asm/kvm_host.h | 1 -
arch/x86/include/asm/kvm_host.h | 1 -
arch/x86/kvm/i8254.c | 2 +
include/linux/kvm_host.h | 3 +-
virt/kvm/irq_comm.c | 73 ++++++++++++++++++--------------------
virt/kvm/kvm_main.c | 2 +-
6 files changed, 40 insertions(+), 42 deletions(-)
^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-12 12:03 [PATCH 0/4] moving irq routing and notifiers to RCU locking Gleb Natapov
@ 2009-07-12 12:03 ` Gleb Natapov
2009-07-13 12:55 ` Michael S. Tsirkin
2009-07-13 13:01 ` Gregory Haskins
2009-07-12 12:03 ` [PATCH 2/4] Unregister ack notifier callback on PIT freeing Gleb Natapov
` (2 subsequent siblings)
3 siblings, 2 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-12 12:03 UTC (permalink / raw)
To: avi; +Cc: kvm
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
include/linux/kvm_host.h | 2 +-
virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
virt/kvm/kvm_main.c | 1 -
3 files changed, 26 insertions(+), 32 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f54a0d3..6756b3e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -161,7 +161,7 @@ struct kvm {
struct mutex irq_lock;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
- struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
+ struct kvm_kernel_irq_routing_entry *irq_routing;
struct hlist_head mask_notifier_list;
#endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7af18b8..b2fa3f6 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
- list_for_each_entry(e, &kvm->irq_routing, link)
+ rcu_read_lock();
+ for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
if (e->gsi == irq) {
int r = e->set(e, kvm, sig_level);
if (r < 0)
@@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
ret = r + ((ret < 0) ? 0 : ret);
}
+ }
+ rcu_read_unlock();
return ret;
}
@@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
trace_kvm_ack_irq(irqchip, pin);
- list_for_each_entry(e, &kvm->irq_routing, link)
+ rcu_read_lock();
+ for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
if (e->irqchip.irqchip == irqchip &&
e->irqchip.pin == pin) {
gsi = e->gsi;
break;
}
+ }
+ rcu_read_unlock();
hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
if (kian->gsi == gsi)
@@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
kimn->func(kimn, mask);
}
-static void __kvm_free_irq_routing(struct list_head *irq_routing)
-{
- struct kvm_kernel_irq_routing_entry *e, *n;
-
- list_for_each_entry_safe(e, n, irq_routing, link)
- kfree(e);
-}
-
void kvm_free_irq_routing(struct kvm *kvm)
{
- mutex_lock(&kvm->irq_lock);
- __kvm_free_irq_routing(&kvm->irq_routing);
- mutex_unlock(&kvm->irq_lock);
+ /* Called only during vm destruction. Nobody can use the pointer
+ at this stage */
+ kfree(kvm->irq_routing);
}
static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
unsigned nr,
unsigned flags)
{
- struct list_head irq_list = LIST_HEAD_INIT(irq_list);
- struct list_head tmp = LIST_HEAD_INIT(tmp);
- struct kvm_kernel_irq_routing_entry *e = NULL;
+ struct kvm_kernel_irq_routing_entry *new, *old;
unsigned i;
int r;
+ /* last element is left zeroed and indicates the end of the array */
+ new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
+
+ if (!new)
+ return -ENOMEM;
+
for (i = 0; i < nr; ++i) {
r = -EINVAL;
if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
goto out;
if (ue->flags)
goto out;
- r = -ENOMEM;
- e = kzalloc(sizeof(*e), GFP_KERNEL);
- if (!e)
- goto out;
- r = setup_routing_entry(e, ue);
+ r = setup_routing_entry(new + i, ue);
if (r)
goto out;
++ue;
- list_add(&e->link, &irq_list);
- e = NULL;
}
mutex_lock(&kvm->irq_lock);
- list_splice(&kvm->irq_routing, &tmp);
- INIT_LIST_HEAD(&kvm->irq_routing);
- list_splice(&irq_list, &kvm->irq_routing);
- INIT_LIST_HEAD(&irq_list);
- list_splice(&tmp, &irq_list);
+ old = kvm->irq_routing;
+ rcu_assign_pointer(kvm->irq_routing, new);
mutex_unlock(&kvm->irq_lock);
+ synchronize_rcu();
+
r = 0;
+ new = old;
out:
- kfree(e);
- __kvm_free_irq_routing(&irq_list);
+ kfree(new);
return r;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cf20dc1..24013b4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
if (IS_ERR(kvm))
goto out;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
- INIT_LIST_HEAD(&kvm->irq_routing);
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
#endif
--
1.6.2.1
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [PATCH 2/4] Unregister ack notifier callback on PIT freeing.
2009-07-12 12:03 [PATCH 0/4] moving irq routing and notifiers to RCU locking Gleb Natapov
2009-07-12 12:03 ` [PATCH 1/4] Move irq routing data structure to rcu locking Gleb Natapov
@ 2009-07-12 12:03 ` Gleb Natapov
2009-07-12 12:03 ` [PATCH 3/4] Move irq ack notifier list to arch independent code Gleb Natapov
2009-07-12 12:03 ` [PATCH 4/4] Convert irq notifiers lists to RCU locking Gleb Natapov
3 siblings, 0 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-12 12:03 UTC (permalink / raw)
To: avi; +Cc: kvm
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/kvm/i8254.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8c3ac30..05e00a8 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -642,6 +642,8 @@ void kvm_free_pit(struct kvm *kvm)
if (kvm->arch.vpit) {
kvm_unregister_irq_mask_notifier(kvm, 0,
&kvm->arch.vpit->mask_notifier);
+ kvm_unregister_irq_ack_notifier(kvm,
+ &kvm->arch.vpit->pit_state.irq_ack_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
--
1.6.2.1
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [PATCH 3/4] Move irq ack notifier list to arch independent code.
2009-07-12 12:03 [PATCH 0/4] moving irq routing and notifiers to RCU locking Gleb Natapov
2009-07-12 12:03 ` [PATCH 1/4] Move irq routing data structure to rcu locking Gleb Natapov
2009-07-12 12:03 ` [PATCH 2/4] Unregister ack notifier callback on PIT freeing Gleb Natapov
@ 2009-07-12 12:03 ` Gleb Natapov
2009-07-12 12:03 ` [PATCH 4/4] Convert irq notifiers lists to RCU locking Gleb Natapov
3 siblings, 0 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-12 12:03 UTC (permalink / raw)
To: avi; +Cc: kvm
Mask irq notifier list is already there.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/ia64/include/asm/kvm_host.h | 1 -
arch/x86/include/asm/kvm_host.h | 1 -
include/linux/kvm_host.h | 1 +
virt/kvm/irq_comm.c | 4 ++--
virt/kvm/kvm_main.c | 1 +
5 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index d9b6325..a362e67 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
int iommu_flags;
- struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 08732d7..eb723a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -401,7 +401,6 @@ struct kvm_arch{
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
- struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
unsigned int tss_addr;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6756b3e..f795f25 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -163,6 +163,7 @@ struct kvm {
#ifdef CONFIG_HAVE_KVM_IRQCHIP
struct kvm_kernel_irq_routing_entry *irq_routing;
struct hlist_head mask_notifier_list;
+ struct hlist_head irq_ack_notifier_list;
#endif
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index b2fa3f6..5dde1ef 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -181,7 +181,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
}
rcu_read_unlock();
- hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+ hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
}
@@ -190,7 +190,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
- hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+ hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 24013b4..53d9b70 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -946,6 +946,7 @@ static struct kvm *kvm_create_vm(void)
goto out;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+ INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
--
1.6.2.1
^ permalink raw reply related [flat|nested] 41+ messages in thread
* [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-12 12:03 [PATCH 0/4] moving irq routing and notifiers to RCU locking Gleb Natapov
` (2 preceding siblings ...)
2009-07-12 12:03 ` [PATCH 3/4] Move irq ack notifier list to arch independent code Gleb Natapov
@ 2009-07-12 12:03 ` Gleb Natapov
2009-07-13 12:56 ` Michael S. Tsirkin
2009-07-13 13:02 ` Michael S. Tsirkin
3 siblings, 2 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-12 12:03 UTC (permalink / raw)
To: avi; +Cc: kvm
Use RCU locking for mask/ack notifiers lists.
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
virt/kvm/irq_comm.c | 20 +++++++++++---------
1 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 5dde1ef..ba3a115 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
break;
}
}
- rcu_read_unlock();
- hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
+ hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
+ rcu_read_unlock();
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
- hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
+ hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
@@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian)
{
mutex_lock(&kvm->irq_lock);
- hlist_del_init(&kian->link);
+ hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
+ synchronize_rcu();
}
int kvm_request_irq_source_id(struct kvm *kvm)
@@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
{
mutex_lock(&kvm->irq_lock);
kimn->irq = irq;
- hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+ hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
mutex_unlock(&kvm->irq_lock);
}
@@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
struct kvm_irq_mask_notifier *kimn)
{
mutex_lock(&kvm->irq_lock);
- hlist_del(&kimn->link);
+ hlist_del_rcu(&kimn->link);
mutex_unlock(&kvm->irq_lock);
+ synchronize_rcu();
}
void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
struct kvm_irq_mask_notifier *kimn;
struct hlist_node *n;
- WARN_ON(!mutex_is_locked(&kvm->irq_lock));
-
- hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
if (kimn->irq == irq)
kimn->func(kimn, mask);
+ rcu_read_unlock();
}
void kvm_free_irq_routing(struct kvm *kvm)
--
1.6.2.1
^ permalink raw reply related [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-12 12:03 ` [PATCH 1/4] Move irq routing data structure to rcu locking Gleb Natapov
@ 2009-07-13 12:55 ` Michael S. Tsirkin
2009-07-13 13:03 ` Gleb Natapov
2009-07-13 13:01 ` Gregory Haskins
1 sibling, 1 reply; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 12:55 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
> include/linux/kvm_host.h | 2 +-
> virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> virt/kvm/kvm_main.c | 1 -
> 3 files changed, 26 insertions(+), 32 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f54a0d3..6756b3e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -161,7 +161,7 @@ struct kvm {
>
> struct mutex irq_lock;
> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> + struct kvm_kernel_irq_routing_entry *irq_routing;
> struct hlist_head mask_notifier_list;
> #endif
>
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 7af18b8..b2fa3f6 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> * IOAPIC. So set the bit in both. The guest will ignore
> * writes to the unused one.
> */
> - list_for_each_entry(e, &kvm->irq_routing, link)
> + rcu_read_lock();
> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> if (e->gsi == irq) {
> int r = e->set(e, kvm, sig_level);
> if (r < 0)
> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>
> ret = r + ((ret < 0) ? 0 : ret);
> }
> + }
> + rcu_read_unlock();
> return ret;
> }
>
> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>
> trace_kvm_ack_irq(irqchip, pin);
>
> - list_for_each_entry(e, &kvm->irq_routing, link)
> + rcu_read_lock();
> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> if (e->irqchip.irqchip == irqchip &&
> e->irqchip.pin == pin) {
> gsi = e->gsi;
> break;
> }
> + }
> + rcu_read_unlock();
>
> hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> if (kian->gsi == gsi)
> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> kimn->func(kimn, mask);
> }
>
> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> -{
> - struct kvm_kernel_irq_routing_entry *e, *n;
> -
> - list_for_each_entry_safe(e, n, irq_routing, link)
> - kfree(e);
> -}
> -
> void kvm_free_irq_routing(struct kvm *kvm)
> {
> - mutex_lock(&kvm->irq_lock);
> - __kvm_free_irq_routing(&kvm->irq_routing);
> - mutex_unlock(&kvm->irq_lock);
> + /* Called only during vm destruction. Nobody can use the pointer
> + at this stage */
> + kfree(kvm->irq_routing);
> }
>
> static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> unsigned nr,
> unsigned flags)
> {
> - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> - struct list_head tmp = LIST_HEAD_INIT(tmp);
> - struct kvm_kernel_irq_routing_entry *e = NULL;
> + struct kvm_kernel_irq_routing_entry *new, *old;
> unsigned i;
> int r;
>
> + /* last element is left zeroed and indicates the end of the array */
> + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
There are up to 1K entries, and each one seems to be around 32 bytes.
Are there chances that we won't be able to find such a chunk of
contiguous memory on a busy system?
Since the tmp list is never traversed while it is assigned, we can, instead,
build a new list as we did and simply replace list_splice with these bits from
list_splice_init_rcu:
static inline void list_splice_tmp_rcu(struct list_head *tmp,
struct list_head *head) {
struct list_head *first = tmp->next;
struct list_head *last = tmp->prev;
struct list_head *at = head->next;
last->next = at;
rcu_assign_pointer(head->next, first);
first->prev = head;
at->prev = last;
}
> +
> + if (!new)
> + return -ENOMEM;
> +
> for (i = 0; i < nr; ++i) {
> r = -EINVAL;
> if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> goto out;
> if (ue->flags)
> goto out;
> - r = -ENOMEM;
> - e = kzalloc(sizeof(*e), GFP_KERNEL);
> - if (!e)
> - goto out;
> - r = setup_routing_entry(e, ue);
> + r = setup_routing_entry(new + i, ue);
> if (r)
> goto out;
> ++ue;
> - list_add(&e->link, &irq_list);
> - e = NULL;
> }
>
> mutex_lock(&kvm->irq_lock);
> - list_splice(&kvm->irq_routing, &tmp);
> - INIT_LIST_HEAD(&kvm->irq_routing);
> - list_splice(&irq_list, &kvm->irq_routing);
> - INIT_LIST_HEAD(&irq_list);
> - list_splice(&tmp, &irq_list);
> + old = kvm->irq_routing;
> + rcu_assign_pointer(kvm->irq_routing, new);
> mutex_unlock(&kvm->irq_lock);
>
> + synchronize_rcu();
> +
> r = 0;
> + new = old;
>
> out:
> - kfree(e);
> - __kvm_free_irq_routing(&irq_list);
> + kfree(new);
> return r;
> }
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index cf20dc1..24013b4 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> if (IS_ERR(kvm))
> goto out;
> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> - INIT_LIST_HEAD(&kvm->irq_routing);
> INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> #endif
>
> --
> 1.6.2.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-12 12:03 ` [PATCH 4/4] Convert irq notifiers lists to RCU locking Gleb Natapov
@ 2009-07-13 12:56 ` Michael S. Tsirkin
2009-07-13 13:05 ` Gleb Natapov
2009-07-13 13:48 ` Gregory Haskins
2009-07-13 13:02 ` Michael S. Tsirkin
1 sibling, 2 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 12:56 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> Use RCU locking for mask/ack notifiers lists.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
> virt/kvm/irq_comm.c | 20 +++++++++++---------
> 1 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 5dde1ef..ba3a115 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> break;
> }
> }
> - rcu_read_unlock();
>
> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> if (kian->gsi == gsi)
> kian->irq_acked(kian);
> + rcu_read_unlock();
> }
>
> void kvm_register_irq_ack_notifier(struct kvm *kvm,
> struct kvm_irq_ack_notifier *kian)
> {
> mutex_lock(&kvm->irq_lock);
> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> mutex_unlock(&kvm->irq_lock);
I think we need synchronize_rcu here as well. If the user adds a
notifier, he expects to get notified of irqs immediately
after the function returns, not after the next rcu grace period.
> }
>
> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> struct kvm_irq_ack_notifier *kian)
> {
> mutex_lock(&kvm->irq_lock);
> - hlist_del_init(&kian->link);
> + hlist_del_init_rcu(&kian->link);
> mutex_unlock(&kvm->irq_lock);
> + synchronize_rcu();
> }
>
> int kvm_request_irq_source_id(struct kvm *kvm)
> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> {
> mutex_lock(&kvm->irq_lock);
> kimn->irq = irq;
> - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> mutex_unlock(&kvm->irq_lock);
> }
>
> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> struct kvm_irq_mask_notifier *kimn)
> {
> mutex_lock(&kvm->irq_lock);
> - hlist_del(&kimn->link);
> + hlist_del_rcu(&kimn->link);
> mutex_unlock(&kvm->irq_lock);
> + synchronize_rcu();
> }
>
> void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> struct kvm_irq_mask_notifier *kimn;
> struct hlist_node *n;
>
> - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> -
> - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> + rcu_read_lock();
> + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> if (kimn->irq == irq)
> kimn->func(kimn, mask);
> + rcu_read_unlock();
> }
>
> void kvm_free_irq_routing(struct kvm *kvm)
> --
> 1.6.2.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-12 12:03 ` [PATCH 1/4] Move irq routing data structure to rcu locking Gleb Natapov
2009-07-13 12:55 ` Michael S. Tsirkin
@ 2009-07-13 13:01 ` Gregory Haskins
2009-07-13 13:15 ` Gleb Natapov
1 sibling, 1 reply; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:01 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
[-- Attachment #1: Type: text/plain, Size: 5010 bytes --]
Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
> include/linux/kvm_host.h | 2 +-
> virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> virt/kvm/kvm_main.c | 1 -
> 3 files changed, 26 insertions(+), 32 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f54a0d3..6756b3e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -161,7 +161,7 @@ struct kvm {
>
> struct mutex irq_lock;
> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> + struct kvm_kernel_irq_routing_entry *irq_routing;
> struct hlist_head mask_notifier_list;
> #endif
>
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 7af18b8..b2fa3f6 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> * IOAPIC. So set the bit in both. The guest will ignore
> * writes to the unused one.
> */
> - list_for_each_entry(e, &kvm->irq_routing, link)
> + rcu_read_lock();
> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>
Hi Gleb,
I haven't had a chance to fully digest and review these patches, but
one thing I did notice is that you seem to be converting from a list to
an open-coded structure. I am just curious why you made this design
decision instead of using the RCU variant of list?
Regards,
-Greg
> if (e->gsi == irq) {
> int r = e->set(e, kvm, sig_level);
> if (r < 0)
> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>
> ret = r + ((ret < 0) ? 0 : ret);
> }
> + }
> + rcu_read_unlock();
> return ret;
> }
>
> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>
> trace_kvm_ack_irq(irqchip, pin);
>
> - list_for_each_entry(e, &kvm->irq_routing, link)
> + rcu_read_lock();
> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> if (e->irqchip.irqchip == irqchip &&
> e->irqchip.pin == pin) {
> gsi = e->gsi;
> break;
> }
> + }
> + rcu_read_unlock();
>
> hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> if (kian->gsi == gsi)
> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> kimn->func(kimn, mask);
> }
>
> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> -{
> - struct kvm_kernel_irq_routing_entry *e, *n;
> -
> - list_for_each_entry_safe(e, n, irq_routing, link)
> - kfree(e);
> -}
> -
> void kvm_free_irq_routing(struct kvm *kvm)
> {
> - mutex_lock(&kvm->irq_lock);
> - __kvm_free_irq_routing(&kvm->irq_routing);
> - mutex_unlock(&kvm->irq_lock);
> + /* Called only during vm destruction. Nobody can use the pointer
> + at this stage */
> + kfree(kvm->irq_routing);
> }
>
> static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> unsigned nr,
> unsigned flags)
> {
> - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> - struct list_head tmp = LIST_HEAD_INIT(tmp);
> - struct kvm_kernel_irq_routing_entry *e = NULL;
> + struct kvm_kernel_irq_routing_entry *new, *old;
> unsigned i;
> int r;
>
> + /* last element is left zeroed and indicates the end of the array */
> + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> +
> + if (!new)
> + return -ENOMEM;
> +
> for (i = 0; i < nr; ++i) {
> r = -EINVAL;
> if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> goto out;
> if (ue->flags)
> goto out;
> - r = -ENOMEM;
> - e = kzalloc(sizeof(*e), GFP_KERNEL);
> - if (!e)
> - goto out;
> - r = setup_routing_entry(e, ue);
> + r = setup_routing_entry(new + i, ue);
> if (r)
> goto out;
> ++ue;
> - list_add(&e->link, &irq_list);
> - e = NULL;
> }
>
> mutex_lock(&kvm->irq_lock);
> - list_splice(&kvm->irq_routing, &tmp);
> - INIT_LIST_HEAD(&kvm->irq_routing);
> - list_splice(&irq_list, &kvm->irq_routing);
> - INIT_LIST_HEAD(&irq_list);
> - list_splice(&tmp, &irq_list);
> + old = kvm->irq_routing;
> + rcu_assign_pointer(kvm->irq_routing, new);
> mutex_unlock(&kvm->irq_lock);
>
> + synchronize_rcu();
> +
> r = 0;
> + new = old;
>
> out:
> - kfree(e);
> - __kvm_free_irq_routing(&irq_list);
> + kfree(new);
> return r;
> }
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index cf20dc1..24013b4 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> if (IS_ERR(kvm))
> goto out;
> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> - INIT_LIST_HEAD(&kvm->irq_routing);
> INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> #endif
>
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-12 12:03 ` [PATCH 4/4] Convert irq notifiers lists to RCU locking Gleb Natapov
2009-07-13 12:56 ` Michael S. Tsirkin
@ 2009-07-13 13:02 ` Michael S. Tsirkin
2009-07-13 13:11 ` Gleb Natapov
1 sibling, 1 reply; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 13:02 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> Use RCU locking for mask/ack notifiers lists.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
> virt/kvm/irq_comm.c | 20 +++++++++++---------
> 1 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> index 5dde1ef..ba3a115 100644
> --- a/virt/kvm/irq_comm.c
> +++ b/virt/kvm/irq_comm.c
> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> break;
> }
> }
> - rcu_read_unlock();
>
> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> if (kian->gsi == gsi)
> kian->irq_acked(kian);
> + rcu_read_unlock();
> }
>
> void kvm_register_irq_ack_notifier(struct kvm *kvm,
> struct kvm_irq_ack_notifier *kian)
> {
> mutex_lock(&kvm->irq_lock);
> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> mutex_unlock(&kvm->irq_lock);
> }
>
> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> struct kvm_irq_ack_notifier *kian)
> {
> mutex_lock(&kvm->irq_lock);
> - hlist_del_init(&kian->link);
> + hlist_del_init_rcu(&kian->link);
> mutex_unlock(&kvm->irq_lock);
> + synchronize_rcu();
> }
This is done under kvm->lock still, which means the lock might be held
potentially for a very long time. Can synchronize_rcu be moved out of
this lock?
> int kvm_request_irq_source_id(struct kvm *kvm)
> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> {
> mutex_lock(&kvm->irq_lock);
> kimn->irq = irq;
> - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> mutex_unlock(&kvm->irq_lock);
> }
>
> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> struct kvm_irq_mask_notifier *kimn)
> {
> mutex_lock(&kvm->irq_lock);
> - hlist_del(&kimn->link);
> + hlist_del_rcu(&kimn->link);
> mutex_unlock(&kvm->irq_lock);
> + synchronize_rcu();
> }
>
> void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> struct kvm_irq_mask_notifier *kimn;
> struct hlist_node *n;
>
> - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> -
> - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> + rcu_read_lock();
> + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> if (kimn->irq == irq)
> kimn->func(kimn, mask);
> + rcu_read_unlock();
> }
>
> void kvm_free_irq_routing(struct kvm *kvm)
> --
> 1.6.2.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 12:55 ` Michael S. Tsirkin
@ 2009-07-13 13:03 ` Gleb Natapov
2009-07-13 13:15 ` Michael S. Tsirkin
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:03 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> >
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> > include/linux/kvm_host.h | 2 +-
> > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > virt/kvm/kvm_main.c | 1 -
> > 3 files changed, 26 insertions(+), 32 deletions(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f54a0d3..6756b3e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -161,7 +161,7 @@ struct kvm {
> >
> > struct mutex irq_lock;
> > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > struct hlist_head mask_notifier_list;
> > #endif
> >
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 7af18b8..b2fa3f6 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > * IOAPIC. So set the bit in both. The guest will ignore
> > * writes to the unused one.
> > */
> > - list_for_each_entry(e, &kvm->irq_routing, link)
> > + rcu_read_lock();
> > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > if (e->gsi == irq) {
> > int r = e->set(e, kvm, sig_level);
> > if (r < 0)
> > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >
> > ret = r + ((ret < 0) ? 0 : ret);
> > }
> > + }
> > + rcu_read_unlock();
> > return ret;
> > }
> >
> > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >
> > trace_kvm_ack_irq(irqchip, pin);
> >
> > - list_for_each_entry(e, &kvm->irq_routing, link)
> > + rcu_read_lock();
> > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > if (e->irqchip.irqchip == irqchip &&
> > e->irqchip.pin == pin) {
> > gsi = e->gsi;
> > break;
> > }
> > + }
> > + rcu_read_unlock();
> >
> > hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> > if (kian->gsi == gsi)
> > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > kimn->func(kimn, mask);
> > }
> >
> > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > -{
> > - struct kvm_kernel_irq_routing_entry *e, *n;
> > -
> > - list_for_each_entry_safe(e, n, irq_routing, link)
> > - kfree(e);
> > -}
> > -
> > void kvm_free_irq_routing(struct kvm *kvm)
> > {
> > - mutex_lock(&kvm->irq_lock);
> > - __kvm_free_irq_routing(&kvm->irq_routing);
> > - mutex_unlock(&kvm->irq_lock);
> > + /* Called only during vm destruction. Nobody can use the pointer
> > + at this stage */
> > + kfree(kvm->irq_routing);
> > }
> >
> > static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > unsigned nr,
> > unsigned flags)
> > {
> > - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > - struct list_head tmp = LIST_HEAD_INIT(tmp);
> > - struct kvm_kernel_irq_routing_entry *e = NULL;
> > + struct kvm_kernel_irq_routing_entry *new, *old;
> > unsigned i;
> > int r;
> >
> > + /* last element is left zeroed and indicates the end of the array */
> > + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
>
> There are up to 1K entries, and each one seems to be around 32 bytes.
> Are there chances that we won't be able to find such a chunk of
> contiguous memory on a busy system?
>
> Since the tmp list is never traversed while it is assigned, we can, instead,
> build a new list as we did and simply replace list_splice with these bits from
> list_splice_init_rcu:
>
> static inline void list_splice_tmp_rcu(struct list_head *tmp,
> struct list_head *head) {
> struct list_head *first = tmp->next;
> struct list_head *last = tmp->prev;
> struct list_head *at = head->next;
> last->next = at;
> rcu_assign_pointer(head->next, first);
> first->prev = head;
> at->prev = last;
> }
>
Lets keep simple things simple. If there is real concern that 3-4
contiguous page will not be available we can use vmalloc() here. But the
not so long term plan is to not use irq routing table for msi injection
(new ioctl kvm_msi_inject) and reduce table to much smaller size (may be
make it hash).
>
> > +
> > + if (!new)
> > + return -ENOMEM;
> > +
> > for (i = 0; i < nr; ++i) {
> > r = -EINVAL;
> > if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > goto out;
> > if (ue->flags)
> > goto out;
> > - r = -ENOMEM;
> > - e = kzalloc(sizeof(*e), GFP_KERNEL);
> > - if (!e)
> > - goto out;
> > - r = setup_routing_entry(e, ue);
> > + r = setup_routing_entry(new + i, ue);
> > if (r)
> > goto out;
> > ++ue;
> > - list_add(&e->link, &irq_list);
> > - e = NULL;
> > }
> >
> > mutex_lock(&kvm->irq_lock);
> > - list_splice(&kvm->irq_routing, &tmp);
> > - INIT_LIST_HEAD(&kvm->irq_routing);
> > - list_splice(&irq_list, &kvm->irq_routing);
> > - INIT_LIST_HEAD(&irq_list);
> > - list_splice(&tmp, &irq_list);
> > + old = kvm->irq_routing;
> > + rcu_assign_pointer(kvm->irq_routing, new);
> > mutex_unlock(&kvm->irq_lock);
> >
> > + synchronize_rcu();
> > +
> > r = 0;
> > + new = old;
> >
> > out:
> > - kfree(e);
> > - __kvm_free_irq_routing(&irq_list);
> > + kfree(new);
> > return r;
> > }
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index cf20dc1..24013b4 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> > if (IS_ERR(kvm))
> > goto out;
> > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > - INIT_LIST_HEAD(&kvm->irq_routing);
> > INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> > #endif
> >
> > --
> > 1.6.2.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 12:56 ` Michael S. Tsirkin
@ 2009-07-13 13:05 ` Gleb Natapov
2009-07-13 13:29 ` Michael S. Tsirkin
2009-07-13 13:48 ` Gregory Haskins
1 sibling, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:05 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 03:56:40PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> > Use RCU locking for mask/ack notifiers lists.
> >
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> > virt/kvm/irq_comm.c | 20 +++++++++++---------
> > 1 files changed, 11 insertions(+), 9 deletions(-)
> >
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 5dde1ef..ba3a115 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > break;
> > }
> > }
> > - rcu_read_unlock();
> >
> > - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> > + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> > if (kian->gsi == gsi)
> > kian->irq_acked(kian);
> > + rcu_read_unlock();
> > }
> >
> > void kvm_register_irq_ack_notifier(struct kvm *kvm,
> > struct kvm_irq_ack_notifier *kian)
> > {
> > mutex_lock(&kvm->irq_lock);
> > - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> > + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> > mutex_unlock(&kvm->irq_lock);
>
> I think we need synchronize_rcu here as well. If the user adds a
> notifier, he expects to get notified of irqs immediately
> after the function returns, not after the next rcu grace period.
>
If ack notifier registration races with acknowledgment of the interrupt
it adds notifier for a user already does something terribly wrong.
> > }
> >
> > @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> > struct kvm_irq_ack_notifier *kian)
> > {
> > mutex_lock(&kvm->irq_lock);
> > - hlist_del_init(&kian->link);
> > + hlist_del_init_rcu(&kian->link);
> > mutex_unlock(&kvm->irq_lock);
> > + synchronize_rcu();
> > }
> >
> > int kvm_request_irq_source_id(struct kvm *kvm)
> > @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> > {
> > mutex_lock(&kvm->irq_lock);
> > kimn->irq = irq;
> > - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> > + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> > mutex_unlock(&kvm->irq_lock);
> > }
> >
> > @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> > struct kvm_irq_mask_notifier *kimn)
> > {
> > mutex_lock(&kvm->irq_lock);
> > - hlist_del(&kimn->link);
> > + hlist_del_rcu(&kimn->link);
> > mutex_unlock(&kvm->irq_lock);
> > + synchronize_rcu();
> > }
> >
> > void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > struct kvm_irq_mask_notifier *kimn;
> > struct hlist_node *n;
> >
> > - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> > -
> > - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> > + rcu_read_lock();
> > + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> > if (kimn->irq == irq)
> > kimn->func(kimn, mask);
> > + rcu_read_unlock();
> > }
> >
> > void kvm_free_irq_routing(struct kvm *kvm)
> > --
> > 1.6.2.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:02 ` Michael S. Tsirkin
@ 2009-07-13 13:11 ` Gleb Natapov
2009-07-13 13:26 ` Gregory Haskins
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:11 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> > Use RCU locking for mask/ack notifiers lists.
> >
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> > virt/kvm/irq_comm.c | 20 +++++++++++---------
> > 1 files changed, 11 insertions(+), 9 deletions(-)
> >
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 5dde1ef..ba3a115 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > break;
> > }
> > }
> > - rcu_read_unlock();
> >
> > - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> > + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> > if (kian->gsi == gsi)
> > kian->irq_acked(kian);
> > + rcu_read_unlock();
> > }
> >
> > void kvm_register_irq_ack_notifier(struct kvm *kvm,
> > struct kvm_irq_ack_notifier *kian)
> > {
> > mutex_lock(&kvm->irq_lock);
> > - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> > + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> > mutex_unlock(&kvm->irq_lock);
> > }
> >
> > @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> > struct kvm_irq_ack_notifier *kian)
> > {
> > mutex_lock(&kvm->irq_lock);
> > - hlist_del_init(&kian->link);
> > + hlist_del_init_rcu(&kian->link);
> > mutex_unlock(&kvm->irq_lock);
> > + synchronize_rcu();
> > }
>
> This is done under kvm->lock still, which means the lock might be held
> potentially for a very long time. Can synchronize_rcu be moved out of
> this lock?
>
Only if kvm_free_assigned_device() will be moved out of this lock.
Device de-assignment is not very frequent event though. How long do you
think it may be held? KVM RCU read sections are very brief.
> > int kvm_request_irq_source_id(struct kvm *kvm)
> > @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> > {
> > mutex_lock(&kvm->irq_lock);
> > kimn->irq = irq;
> > - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> > + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> > mutex_unlock(&kvm->irq_lock);
> > }
> >
> > @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> > struct kvm_irq_mask_notifier *kimn)
> > {
> > mutex_lock(&kvm->irq_lock);
> > - hlist_del(&kimn->link);
> > + hlist_del_rcu(&kimn->link);
> > mutex_unlock(&kvm->irq_lock);
> > + synchronize_rcu();
> > }
> >
> > void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > struct kvm_irq_mask_notifier *kimn;
> > struct hlist_node *n;
> >
> > - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> > -
> > - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> > + rcu_read_lock();
> > + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> > if (kimn->irq == irq)
> > kimn->func(kimn, mask);
> > + rcu_read_unlock();
> > }
> >
> > void kvm_free_irq_routing(struct kvm *kvm)
> > --
> > 1.6.2.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:03 ` Gleb Natapov
@ 2009-07-13 13:15 ` Michael S. Tsirkin
2009-07-13 13:23 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 13:15 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 04:03:10PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote:
> > On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> > >
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > > include/linux/kvm_host.h | 2 +-
> > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > virt/kvm/kvm_main.c | 1 -
> > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index f54a0d3..6756b3e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -161,7 +161,7 @@ struct kvm {
> > >
> > > struct mutex irq_lock;
> > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > struct hlist_head mask_notifier_list;
> > > #endif
> > >
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 7af18b8..b2fa3f6 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > * IOAPIC. So set the bit in both. The guest will ignore
> > > * writes to the unused one.
> > > */
> > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > + rcu_read_lock();
> > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > if (e->gsi == irq) {
> > > int r = e->set(e, kvm, sig_level);
> > > if (r < 0)
> > > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > >
> > > ret = r + ((ret < 0) ? 0 : ret);
> > > }
> > > + }
> > > + rcu_read_unlock();
> > > return ret;
> > > }
> > >
> > > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > >
> > > trace_kvm_ack_irq(irqchip, pin);
> > >
> > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > + rcu_read_lock();
> > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > if (e->irqchip.irqchip == irqchip &&
> > > e->irqchip.pin == pin) {
> > > gsi = e->gsi;
> > > break;
> > > }
> > > + }
> > > + rcu_read_unlock();
> > >
> > > hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> > > if (kian->gsi == gsi)
> > > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > > kimn->func(kimn, mask);
> > > }
> > >
> > > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > > -{
> > > - struct kvm_kernel_irq_routing_entry *e, *n;
> > > -
> > > - list_for_each_entry_safe(e, n, irq_routing, link)
> > > - kfree(e);
> > > -}
> > > -
> > > void kvm_free_irq_routing(struct kvm *kvm)
> > > {
> > > - mutex_lock(&kvm->irq_lock);
> > > - __kvm_free_irq_routing(&kvm->irq_routing);
> > > - mutex_unlock(&kvm->irq_lock);
> > > + /* Called only during vm destruction. Nobody can use the pointer
> > > + at this stage */
> > > + kfree(kvm->irq_routing);
> > > }
> > >
> > > static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > > unsigned nr,
> > > unsigned flags)
> > > {
> > > - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > > - struct list_head tmp = LIST_HEAD_INIT(tmp);
> > > - struct kvm_kernel_irq_routing_entry *e = NULL;
> > > + struct kvm_kernel_irq_routing_entry *new, *old;
> > > unsigned i;
> > > int r;
> > >
> > > + /* last element is left zeroed and indicates the end of the array */
> > > + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> >
> > There are up to 1K entries, and each one seems to be around 32 bytes.
> > Are there chances that we won't be able to find such a chunk of
> > contiguous memory on a busy system?
> >
> > Since the tmp list is never traversed while it is assigned, we can, instead,
> > build a new list as we did and simply replace list_splice with these bits from
> > list_splice_init_rcu:
> >
> > static inline void list_splice_tmp_rcu(struct list_head *tmp,
> > struct list_head *head) {
> > struct list_head *first = tmp->next;
> > struct list_head *last = tmp->prev;
> > struct list_head *at = head->next;
> > last->next = at;
> > rcu_assign_pointer(head->next, first);
> > first->prev = head;
> > at->prev = last;
> > }
> >
> Lets keep simple things simple. If there is real concern that 3-4
> contiguous page will not be available we can use vmalloc() here.
Hmm, 32 * (1K + 1) is usually 8-9 pages, and vmalloc is a finite
resource. Maybe it's a good idea to use an array instead of a list. All
I'm saying, RCU does not force you to do this.
> But the
> not so long term plan is to not use irq routing table for msi injection
> (new ioctl kvm_msi_inject) and reduce table to much smaller size (may be
> make it hash).
Why bother with an array as an intermediate step then?
> >
> > > +
> > > + if (!new)
> > > + return -ENOMEM;
> > > +
> > > for (i = 0; i < nr; ++i) {
> > > r = -EINVAL;
> > > if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > > goto out;
> > > if (ue->flags)
> > > goto out;
> > > - r = -ENOMEM;
> > > - e = kzalloc(sizeof(*e), GFP_KERNEL);
> > > - if (!e)
> > > - goto out;
> > > - r = setup_routing_entry(e, ue);
> > > + r = setup_routing_entry(new + i, ue);
> > > if (r)
> > > goto out;
> > > ++ue;
> > > - list_add(&e->link, &irq_list);
> > > - e = NULL;
> > > }
> > >
> > > mutex_lock(&kvm->irq_lock);
> > > - list_splice(&kvm->irq_routing, &tmp);
> > > - INIT_LIST_HEAD(&kvm->irq_routing);
> > > - list_splice(&irq_list, &kvm->irq_routing);
> > > - INIT_LIST_HEAD(&irq_list);
> > > - list_splice(&tmp, &irq_list);
> > > + old = kvm->irq_routing;
> > > + rcu_assign_pointer(kvm->irq_routing, new);
> > > mutex_unlock(&kvm->irq_lock);
> > >
> > > + synchronize_rcu();
> > > +
> > > r = 0;
> > > + new = old;
> > >
> > > out:
> > > - kfree(e);
> > > - __kvm_free_irq_routing(&irq_list);
> > > + kfree(new);
> > > return r;
> > > }
> > >
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index cf20dc1..24013b4 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> > > if (IS_ERR(kvm))
> > > goto out;
> > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > - INIT_LIST_HEAD(&kvm->irq_routing);
> > > INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> > > #endif
> > >
> > > --
> > > 1.6.2.1
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:01 ` Gregory Haskins
@ 2009-07-13 13:15 ` Gleb Natapov
2009-07-13 13:16 ` Gregory Haskins
2009-07-13 15:55 ` Marcelo Tosatti
0 siblings, 2 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:15 UTC (permalink / raw)
To: Gregory Haskins; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> > include/linux/kvm_host.h | 2 +-
> > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > virt/kvm/kvm_main.c | 1 -
> > 3 files changed, 26 insertions(+), 32 deletions(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f54a0d3..6756b3e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -161,7 +161,7 @@ struct kvm {
> >
> > struct mutex irq_lock;
> > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > struct hlist_head mask_notifier_list;
> > #endif
> >
> > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > index 7af18b8..b2fa3f6 100644
> > --- a/virt/kvm/irq_comm.c
> > +++ b/virt/kvm/irq_comm.c
> > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > * IOAPIC. So set the bit in both. The guest will ignore
> > * writes to the unused one.
> > */
> > - list_for_each_entry(e, &kvm->irq_routing, link)
> > + rcu_read_lock();
> > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >
>
> Hi Gleb,
> I haven't had a chance to fully digest and review these patches, but
> one thing I did notice is that you seem to be converting from a list to
> an open-coded structure. I am just curious why you made this design
> decision instead of using the RCU variant of list?
>
It is not scary "open-coded structure" it's just an array :) As I responded
to Michael the idea is to move msis out of irq_routing, make the array
much smaller and either use gsi as an index in the array or use hash table
instead looping over all entries. For now I can justify array as more
cache friendly data structure as we scan it linearly.
> Regards,
> -Greg
>
> > if (e->gsi == irq) {
> > int r = e->set(e, kvm, sig_level);
> > if (r < 0)
> > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >
> > ret = r + ((ret < 0) ? 0 : ret);
> > }
> > + }
> > + rcu_read_unlock();
> > return ret;
> > }
> >
> > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >
> > trace_kvm_ack_irq(irqchip, pin);
> >
> > - list_for_each_entry(e, &kvm->irq_routing, link)
> > + rcu_read_lock();
> > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > if (e->irqchip.irqchip == irqchip &&
> > e->irqchip.pin == pin) {
> > gsi = e->gsi;
> > break;
> > }
> > + }
> > + rcu_read_unlock();
> >
> > hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> > if (kian->gsi == gsi)
> > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > kimn->func(kimn, mask);
> > }
> >
> > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > -{
> > - struct kvm_kernel_irq_routing_entry *e, *n;
> > -
> > - list_for_each_entry_safe(e, n, irq_routing, link)
> > - kfree(e);
> > -}
> > -
> > void kvm_free_irq_routing(struct kvm *kvm)
> > {
> > - mutex_lock(&kvm->irq_lock);
> > - __kvm_free_irq_routing(&kvm->irq_routing);
> > - mutex_unlock(&kvm->irq_lock);
> > + /* Called only during vm destruction. Nobody can use the pointer
> > + at this stage */
> > + kfree(kvm->irq_routing);
> > }
> >
> > static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > unsigned nr,
> > unsigned flags)
> > {
> > - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > - struct list_head tmp = LIST_HEAD_INIT(tmp);
> > - struct kvm_kernel_irq_routing_entry *e = NULL;
> > + struct kvm_kernel_irq_routing_entry *new, *old;
> > unsigned i;
> > int r;
> >
> > + /* last element is left zeroed and indicates the end of the array */
> > + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > +
> > + if (!new)
> > + return -ENOMEM;
> > +
> > for (i = 0; i < nr; ++i) {
> > r = -EINVAL;
> > if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > goto out;
> > if (ue->flags)
> > goto out;
> > - r = -ENOMEM;
> > - e = kzalloc(sizeof(*e), GFP_KERNEL);
> > - if (!e)
> > - goto out;
> > - r = setup_routing_entry(e, ue);
> > + r = setup_routing_entry(new + i, ue);
> > if (r)
> > goto out;
> > ++ue;
> > - list_add(&e->link, &irq_list);
> > - e = NULL;
> > }
> >
> > mutex_lock(&kvm->irq_lock);
> > - list_splice(&kvm->irq_routing, &tmp);
> > - INIT_LIST_HEAD(&kvm->irq_routing);
> > - list_splice(&irq_list, &kvm->irq_routing);
> > - INIT_LIST_HEAD(&irq_list);
> > - list_splice(&tmp, &irq_list);
> > + old = kvm->irq_routing;
> > + rcu_assign_pointer(kvm->irq_routing, new);
> > mutex_unlock(&kvm->irq_lock);
> >
> > + synchronize_rcu();
> > +
> > r = 0;
> > + new = old;
> >
> > out:
> > - kfree(e);
> > - __kvm_free_irq_routing(&irq_list);
> > + kfree(new);
> > return r;
> > }
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index cf20dc1..24013b4 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> > if (IS_ERR(kvm))
> > goto out;
> > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > - INIT_LIST_HEAD(&kvm->irq_routing);
> > INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> > #endif
> >
> >
>
>
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:15 ` Gleb Natapov
@ 2009-07-13 13:16 ` Gregory Haskins
2009-07-13 13:25 ` Gleb Natapov
2009-07-13 15:55 ` Marcelo Tosatti
1 sibling, 1 reply; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:16 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm
[-- Attachment #1: Type: text/plain, Size: 6204 bytes --]
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
>
>> Gleb Natapov wrote:
>>
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>> include/linux/kvm_host.h | 2 +-
>>> virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
>>> virt/kvm/kvm_main.c | 1 -
>>> 3 files changed, 26 insertions(+), 32 deletions(-)
>>>
>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>> index f54a0d3..6756b3e 100644
>>> --- a/include/linux/kvm_host.h
>>> +++ b/include/linux/kvm_host.h
>>> @@ -161,7 +161,7 @@ struct kvm {
>>>
>>> struct mutex irq_lock;
>>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>> - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
>>> + struct kvm_kernel_irq_routing_entry *irq_routing;
>>> struct hlist_head mask_notifier_list;
>>> #endif
>>>
>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>> index 7af18b8..b2fa3f6 100644
>>> --- a/virt/kvm/irq_comm.c
>>> +++ b/virt/kvm/irq_comm.c
>>> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>> * IOAPIC. So set the bit in both. The guest will ignore
>>> * writes to the unused one.
>>> */
>>> - list_for_each_entry(e, &kvm->irq_routing, link)
>>> + rcu_read_lock();
>>> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>>>
>>>
>> Hi Gleb,
>> I haven't had a chance to fully digest and review these patches, but
>> one thing I did notice is that you seem to be converting from a list to
>> an open-coded structure. I am just curious why you made this design
>> decision instead of using the RCU variant of list?
>>
>>
> It is not scary "open-coded structure" it's just an array :) As I responded
> to Michael the idea is to move msis out of irq_routing, make the array
> much smaller and either use gsi as an index in the array or use hash table
> instead looping over all entries. For now I can justify array as more
> cache friendly data structure as we scan it linearly.
>
Ok, but that might be a good thing to mention in the patch header ;)
Kind Regards,
-Greg
>
>> Regards,
>> -Greg
>>
>>
>>> if (e->gsi == irq) {
>>> int r = e->set(e, kvm, sig_level);
>>> if (r < 0)
>>> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>>
>>> ret = r + ((ret < 0) ? 0 : ret);
>>> }
>>> + }
>>> + rcu_read_unlock();
>>> return ret;
>>> }
>>>
>>> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>
>>> trace_kvm_ack_irq(irqchip, pin);
>>>
>>> - list_for_each_entry(e, &kvm->irq_routing, link)
>>> + rcu_read_lock();
>>> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>>> if (e->irqchip.irqchip == irqchip &&
>>> e->irqchip.pin == pin) {
>>> gsi = e->gsi;
>>> break;
>>> }
>>> + }
>>> + rcu_read_unlock();
>>>
>>> hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
>>> if (kian->gsi == gsi)
>>> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>> kimn->func(kimn, mask);
>>> }
>>>
>>> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
>>> -{
>>> - struct kvm_kernel_irq_routing_entry *e, *n;
>>> -
>>> - list_for_each_entry_safe(e, n, irq_routing, link)
>>> - kfree(e);
>>> -}
>>> -
>>> void kvm_free_irq_routing(struct kvm *kvm)
>>> {
>>> - mutex_lock(&kvm->irq_lock);
>>> - __kvm_free_irq_routing(&kvm->irq_routing);
>>> - mutex_unlock(&kvm->irq_lock);
>>> + /* Called only during vm destruction. Nobody can use the pointer
>>> + at this stage */
>>> + kfree(kvm->irq_routing);
>>> }
>>>
>>> static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
>>> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
>>> unsigned nr,
>>> unsigned flags)
>>> {
>>> - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
>>> - struct list_head tmp = LIST_HEAD_INIT(tmp);
>>> - struct kvm_kernel_irq_routing_entry *e = NULL;
>>> + struct kvm_kernel_irq_routing_entry *new, *old;
>>> unsigned i;
>>> int r;
>>>
>>> + /* last element is left zeroed and indicates the end of the array */
>>> + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
>>> +
>>> + if (!new)
>>> + return -ENOMEM;
>>> +
>>> for (i = 0; i < nr; ++i) {
>>> r = -EINVAL;
>>> if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
>>> goto out;
>>> if (ue->flags)
>>> goto out;
>>> - r = -ENOMEM;
>>> - e = kzalloc(sizeof(*e), GFP_KERNEL);
>>> - if (!e)
>>> - goto out;
>>> - r = setup_routing_entry(e, ue);
>>> + r = setup_routing_entry(new + i, ue);
>>> if (r)
>>> goto out;
>>> ++ue;
>>> - list_add(&e->link, &irq_list);
>>> - e = NULL;
>>> }
>>>
>>> mutex_lock(&kvm->irq_lock);
>>> - list_splice(&kvm->irq_routing, &tmp);
>>> - INIT_LIST_HEAD(&kvm->irq_routing);
>>> - list_splice(&irq_list, &kvm->irq_routing);
>>> - INIT_LIST_HEAD(&irq_list);
>>> - list_splice(&tmp, &irq_list);
>>> + old = kvm->irq_routing;
>>> + rcu_assign_pointer(kvm->irq_routing, new);
>>> mutex_unlock(&kvm->irq_lock);
>>>
>>> + synchronize_rcu();
>>> +
>>> r = 0;
>>> + new = old;
>>>
>>> out:
>>> - kfree(e);
>>> - __kvm_free_irq_routing(&irq_list);
>>> + kfree(new);
>>> return r;
>>> }
>>>
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index cf20dc1..24013b4 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
>>> if (IS_ERR(kvm))
>>> goto out;
>>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>> - INIT_LIST_HEAD(&kvm->irq_routing);
>>> INIT_HLIST_HEAD(&kvm->mask_notifier_list);
>>> #endif
>>>
>>>
>>>
>>
>
>
>
> --
> Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:15 ` Michael S. Tsirkin
@ 2009-07-13 13:23 ` Gleb Natapov
2009-07-13 13:36 ` Michael S. Tsirkin
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:23 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 04:15:30PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 13, 2009 at 04:03:10PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 03:55:07PM +0300, Michael S. Tsirkin wrote:
> > > On Sun, Jul 12, 2009 at 03:03:50PM +0300, Gleb Natapov wrote:
> > > >
> > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > ---
> > > > include/linux/kvm_host.h | 2 +-
> > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > > virt/kvm/kvm_main.c | 1 -
> > > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index f54a0d3..6756b3e 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -161,7 +161,7 @@ struct kvm {
> > > >
> > > > struct mutex irq_lock;
> > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > struct hlist_head mask_notifier_list;
> > > > #endif
> > > >
> > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > index 7af18b8..b2fa3f6 100644
> > > > --- a/virt/kvm/irq_comm.c
> > > > +++ b/virt/kvm/irq_comm.c
> > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > * IOAPIC. So set the bit in both. The guest will ignore
> > > > * writes to the unused one.
> > > > */
> > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > + rcu_read_lock();
> > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > if (e->gsi == irq) {
> > > > int r = e->set(e, kvm, sig_level);
> > > > if (r < 0)
> > > > @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > >
> > > > ret = r + ((ret < 0) ? 0 : ret);
> > > > }
> > > > + }
> > > > + rcu_read_unlock();
> > > > return ret;
> > > > }
> > > >
> > > > @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > > >
> > > > trace_kvm_ack_irq(irqchip, pin);
> > > >
> > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > + rcu_read_lock();
> > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > if (e->irqchip.irqchip == irqchip &&
> > > > e->irqchip.pin == pin) {
> > > > gsi = e->gsi;
> > > > break;
> > > > }
> > > > + }
> > > > + rcu_read_unlock();
> > > >
> > > > hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> > > > if (kian->gsi == gsi)
> > > > @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > > > kimn->func(kimn, mask);
> > > > }
> > > >
> > > > -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> > > > -{
> > > > - struct kvm_kernel_irq_routing_entry *e, *n;
> > > > -
> > > > - list_for_each_entry_safe(e, n, irq_routing, link)
> > > > - kfree(e);
> > > > -}
> > > > -
> > > > void kvm_free_irq_routing(struct kvm *kvm)
> > > > {
> > > > - mutex_lock(&kvm->irq_lock);
> > > > - __kvm_free_irq_routing(&kvm->irq_routing);
> > > > - mutex_unlock(&kvm->irq_lock);
> > > > + /* Called only during vm destruction. Nobody can use the pointer
> > > > + at this stage */
> > > > + kfree(kvm->irq_routing);
> > > > }
> > > >
> > > > static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> > > > @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> > > > unsigned nr,
> > > > unsigned flags)
> > > > {
> > > > - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> > > > - struct list_head tmp = LIST_HEAD_INIT(tmp);
> > > > - struct kvm_kernel_irq_routing_entry *e = NULL;
> > > > + struct kvm_kernel_irq_routing_entry *new, *old;
> > > > unsigned i;
> > > > int r;
> > > >
> > > > + /* last element is left zeroed and indicates the end of the array */
> > > > + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> > >
> > > There are up to 1K entries, and each one seems to be around 32 bytes.
> > > Are there chances that we won't be able to find such a chunk of
> > > contiguous memory on a busy system?
> > >
> > > Since the tmp list is never traversed while it is assigned, we can, instead,
> > > build a new list as we did and simply replace list_splice with these bits from
> > > list_splice_init_rcu:
> > >
> > > static inline void list_splice_tmp_rcu(struct list_head *tmp,
> > > struct list_head *head) {
> > > struct list_head *first = tmp->next;
> > > struct list_head *last = tmp->prev;
> > > struct list_head *at = head->next;
> > > last->next = at;
> > > rcu_assign_pointer(head->next, first);
> > > first->prev = head;
> > > at->prev = last;
> > > }
> > >
> > Lets keep simple things simple. If there is real concern that 3-4
> > contiguous page will not be available we can use vmalloc() here.
>
> Hmm, 32 * (1K + 1) is usually 8-9 pages, and vmalloc is a finite
We allocate only existing entries, not the whole array, and this usually
means less then 20 entries. If we will get to the point where with
current data structure we will use 1K entries much more serious problem
will be that for each injected interrupt we will have to scan 1K entries.
> resource. Maybe it's a good idea to use an array instead of a list. All
> I'm saying, RCU does not force you to do this.
>
It doesn't, but list shouldn't be used here in the first place.
> > But the
> > not so long term plan is to not use irq routing table for msi injection
> > (new ioctl kvm_msi_inject) and reduce table to much smaller size (may be
> > make it hash).
>
> Why bother with an array as an intermediate step then?
Incremental changes. I can't rewrite the whole kernel with one patch.
Linus will reject it.
>
> > >
> > > > +
> > > > + if (!new)
> > > > + return -ENOMEM;
> > > > +
> > > > for (i = 0; i < nr; ++i) {
> > > > r = -EINVAL;
> > > > if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> > > > goto out;
> > > > if (ue->flags)
> > > > goto out;
> > > > - r = -ENOMEM;
> > > > - e = kzalloc(sizeof(*e), GFP_KERNEL);
> > > > - if (!e)
> > > > - goto out;
> > > > - r = setup_routing_entry(e, ue);
> > > > + r = setup_routing_entry(new + i, ue);
> > > > if (r)
> > > > goto out;
> > > > ++ue;
> > > > - list_add(&e->link, &irq_list);
> > > > - e = NULL;
> > > > }
> > > >
> > > > mutex_lock(&kvm->irq_lock);
> > > > - list_splice(&kvm->irq_routing, &tmp);
> > > > - INIT_LIST_HEAD(&kvm->irq_routing);
> > > > - list_splice(&irq_list, &kvm->irq_routing);
> > > > - INIT_LIST_HEAD(&irq_list);
> > > > - list_splice(&tmp, &irq_list);
> > > > + old = kvm->irq_routing;
> > > > + rcu_assign_pointer(kvm->irq_routing, new);
> > > > mutex_unlock(&kvm->irq_lock);
> > > >
> > > > + synchronize_rcu();
> > > > +
> > > > r = 0;
> > > > + new = old;
> > > >
> > > > out:
> > > > - kfree(e);
> > > > - __kvm_free_irq_routing(&irq_list);
> > > > + kfree(new);
> > > > return r;
> > > > }
> > > >
> > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > > index cf20dc1..24013b4 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> > > > if (IS_ERR(kvm))
> > > > goto out;
> > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > - INIT_LIST_HEAD(&kvm->irq_routing);
> > > > INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> > > > #endif
> > > >
> > > > --
> > > > 1.6.2.1
> > > >
> > > > --
> > > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > > the body of a message to majordomo@vger.kernel.org
> > > > More majordomo info at http://vger.kernel.org/majordomo-info.html
> >
> > --
> > Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:16 ` Gregory Haskins
@ 2009-07-13 13:25 ` Gleb Natapov
2009-07-13 13:29 ` Gregory Haskins
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:25 UTC (permalink / raw)
To: Gregory Haskins; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 09:16:47AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> >
> >> Gleb Natapov wrote:
> >>
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>> include/linux/kvm_host.h | 2 +-
> >>> virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> >>> virt/kvm/kvm_main.c | 1 -
> >>> 3 files changed, 26 insertions(+), 32 deletions(-)
> >>>
> >>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>> index f54a0d3..6756b3e 100644
> >>> --- a/include/linux/kvm_host.h
> >>> +++ b/include/linux/kvm_host.h
> >>> @@ -161,7 +161,7 @@ struct kvm {
> >>>
> >>> struct mutex irq_lock;
> >>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> >>> - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> >>> + struct kvm_kernel_irq_routing_entry *irq_routing;
> >>> struct hlist_head mask_notifier_list;
> >>> #endif
> >>>
> >>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>> index 7af18b8..b2fa3f6 100644
> >>> --- a/virt/kvm/irq_comm.c
> >>> +++ b/virt/kvm/irq_comm.c
> >>> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >>> * IOAPIC. So set the bit in both. The guest will ignore
> >>> * writes to the unused one.
> >>> */
> >>> - list_for_each_entry(e, &kvm->irq_routing, link)
> >>> + rcu_read_lock();
> >>> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >>>
> >>>
> >> Hi Gleb,
> >> I haven't had a chance to fully digest and review these patches, but
> >> one thing I did notice is that you seem to be converting from a list to
> >> an open-coded structure. I am just curious why you made this design
> >> decision instead of using the RCU variant of list?
> >>
> >>
> > It is not scary "open-coded structure" it's just an array :) As I responded
> > to Michael the idea is to move msis out of irq_routing, make the array
> > much smaller and either use gsi as an index in the array or use hash table
> > instead looping over all entries. For now I can justify array as more
> > cache friendly data structure as we scan it linearly.
> >
>
> Ok, but that might be a good thing to mention in the patch header ;)
>
What exactly? Besides this is just an RFC. By the time it will be
applied (if at all) I may do the change already :)
> Kind Regards,
> -Greg
>
> >
> >> Regards,
> >> -Greg
> >>
> >>
> >>> if (e->gsi == irq) {
> >>> int r = e->set(e, kvm, sig_level);
> >>> if (r < 0)
> >>> @@ -156,6 +157,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> >>>
> >>> ret = r + ((ret < 0) ? 0 : ret);
> >>> }
> >>> + }
> >>> + rcu_read_unlock();
> >>> return ret;
> >>> }
> >>>
> >>> @@ -168,12 +171,15 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>
> >>> trace_kvm_ack_irq(irqchip, pin);
> >>>
> >>> - list_for_each_entry(e, &kvm->irq_routing, link)
> >>> + rcu_read_lock();
> >>> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> >>> if (e->irqchip.irqchip == irqchip &&
> >>> e->irqchip.pin == pin) {
> >>> gsi = e->gsi;
> >>> break;
> >>> }
> >>> + }
> >>> + rcu_read_unlock();
> >>>
> >>> hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
> >>> if (kian->gsi == gsi)
> >>> @@ -264,19 +270,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >>> kimn->func(kimn, mask);
> >>> }
> >>>
> >>> -static void __kvm_free_irq_routing(struct list_head *irq_routing)
> >>> -{
> >>> - struct kvm_kernel_irq_routing_entry *e, *n;
> >>> -
> >>> - list_for_each_entry_safe(e, n, irq_routing, link)
> >>> - kfree(e);
> >>> -}
> >>> -
> >>> void kvm_free_irq_routing(struct kvm *kvm)
> >>> {
> >>> - mutex_lock(&kvm->irq_lock);
> >>> - __kvm_free_irq_routing(&kvm->irq_routing);
> >>> - mutex_unlock(&kvm->irq_lock);
> >>> + /* Called only during vm destruction. Nobody can use the pointer
> >>> + at this stage */
> >>> + kfree(kvm->irq_routing);
> >>> }
> >>>
> >>> static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> >>> @@ -326,43 +324,40 @@ int kvm_set_irq_routing(struct kvm *kvm,
> >>> unsigned nr,
> >>> unsigned flags)
> >>> {
> >>> - struct list_head irq_list = LIST_HEAD_INIT(irq_list);
> >>> - struct list_head tmp = LIST_HEAD_INIT(tmp);
> >>> - struct kvm_kernel_irq_routing_entry *e = NULL;
> >>> + struct kvm_kernel_irq_routing_entry *new, *old;
> >>> unsigned i;
> >>> int r;
> >>>
> >>> + /* last element is left zeroed and indicates the end of the array */
> >>> + new = kzalloc(sizeof(*new) * (nr + 1), GFP_KERNEL);
> >>> +
> >>> + if (!new)
> >>> + return -ENOMEM;
> >>> +
> >>> for (i = 0; i < nr; ++i) {
> >>> r = -EINVAL;
> >>> if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
> >>> goto out;
> >>> if (ue->flags)
> >>> goto out;
> >>> - r = -ENOMEM;
> >>> - e = kzalloc(sizeof(*e), GFP_KERNEL);
> >>> - if (!e)
> >>> - goto out;
> >>> - r = setup_routing_entry(e, ue);
> >>> + r = setup_routing_entry(new + i, ue);
> >>> if (r)
> >>> goto out;
> >>> ++ue;
> >>> - list_add(&e->link, &irq_list);
> >>> - e = NULL;
> >>> }
> >>>
> >>> mutex_lock(&kvm->irq_lock);
> >>> - list_splice(&kvm->irq_routing, &tmp);
> >>> - INIT_LIST_HEAD(&kvm->irq_routing);
> >>> - list_splice(&irq_list, &kvm->irq_routing);
> >>> - INIT_LIST_HEAD(&irq_list);
> >>> - list_splice(&tmp, &irq_list);
> >>> + old = kvm->irq_routing;
> >>> + rcu_assign_pointer(kvm->irq_routing, new);
> >>> mutex_unlock(&kvm->irq_lock);
> >>>
> >>> + synchronize_rcu();
> >>> +
> >>> r = 0;
> >>> + new = old;
> >>>
> >>> out:
> >>> - kfree(e);
> >>> - __kvm_free_irq_routing(&irq_list);
> >>> + kfree(new);
> >>> return r;
> >>> }
> >>>
> >>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> >>> index cf20dc1..24013b4 100644
> >>> --- a/virt/kvm/kvm_main.c
> >>> +++ b/virt/kvm/kvm_main.c
> >>> @@ -945,7 +945,6 @@ static struct kvm *kvm_create_vm(void)
> >>> if (IS_ERR(kvm))
> >>> goto out;
> >>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
> >>> - INIT_LIST_HEAD(&kvm->irq_routing);
> >>> INIT_HLIST_HEAD(&kvm->mask_notifier_list);
> >>> #endif
> >>>
> >>>
> >>>
> >>
> >
> >
> >
> > --
> > Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> >
>
>
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:11 ` Gleb Natapov
@ 2009-07-13 13:26 ` Gregory Haskins
2009-07-13 13:32 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:26 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Michael S. Tsirkin, avi, kvm@vger.kernel.org, paulmck
[-- Attachment #1: Type: text/plain, Size: 4643 bytes --]
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
>
>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>>
>>> Use RCU locking for mask/ack notifiers lists.
>>>
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>> virt/kvm/irq_comm.c | 20 +++++++++++---------
>>> 1 files changed, 11 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>> index 5dde1ef..ba3a115 100644
>>> --- a/virt/kvm/irq_comm.c
>>> +++ b/virt/kvm/irq_comm.c
>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>> break;
>>> }
>>> }
>>> - rcu_read_unlock();
>>>
>>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>> if (kian->gsi == gsi)
>>> kian->irq_acked(kian);
>>> + rcu_read_unlock();
>>> }
>>>
>>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>> struct kvm_irq_ack_notifier *kian)
>>> {
>>> mutex_lock(&kvm->irq_lock);
>>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>> mutex_unlock(&kvm->irq_lock);
>>> }
>>>
>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>> struct kvm_irq_ack_notifier *kian)
>>> {
>>> mutex_lock(&kvm->irq_lock);
>>> - hlist_del_init(&kian->link);
>>> + hlist_del_init_rcu(&kian->link);
>>> mutex_unlock(&kvm->irq_lock);
>>> + synchronize_rcu();
>>> }
>>>
>> This is done under kvm->lock still, which means the lock might be held
>> potentially for a very long time. Can synchronize_rcu be moved out of
>> this lock?
>>
>>
> Only if kvm_free_assigned_device() will be moved out of this lock.
> Device de-assignment is not very frequent event though. How long do you
> think it may be held? KVM RCU read sections are very brief.
>
Note that the delay imposed by the barrier is not only related to the
length of the critical section. The barrier blocks until the next grace
period, and depending on the type of RCU you are using and your config
options, this could be multiple milliseconds.
I am not saying that this is definitely a problem for your design. I
am just pointing out that the length of the KVM-RCU read section is only
one of several factors that influence the ultimate duration of your
kvm->lock CS. IIUC, in an ideally designed subsystem, the read-side CS
will be dwarfed by the length of the grace, so the grace (and barriers
against it) are really the critical factor of this type of design.
I am CC'ing Paul in case I am saying something dumb/untrue. ;)
Kind Regards,
-Greg
>
>>> int kvm_request_irq_source_id(struct kvm *kvm)
>>> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>>> {
>>> mutex_lock(&kvm->irq_lock);
>>> kimn->irq = irq;
>>> - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
>>> + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
>>> mutex_unlock(&kvm->irq_lock);
>>> }
>>>
>>> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>>> struct kvm_irq_mask_notifier *kimn)
>>> {
>>> mutex_lock(&kvm->irq_lock);
>>> - hlist_del(&kimn->link);
>>> + hlist_del_rcu(&kimn->link);
>>> mutex_unlock(&kvm->irq_lock);
>>> + synchronize_rcu();
>>> }
>>>
>>> void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>>> struct kvm_irq_mask_notifier *kimn;
>>> struct hlist_node *n;
>>>
>>> - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
>>> -
>>> - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
>>> + rcu_read_lock();
>>> + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
>>> if (kimn->irq == irq)
>>> kimn->func(kimn, mask);
>>> + rcu_read_unlock();
>>> }
>>>
>>> void kvm_free_irq_routing(struct kvm *kvm)
>>> --
>>> 1.6.2.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>
>
> --
> Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:05 ` Gleb Natapov
@ 2009-07-13 13:29 ` Michael S. Tsirkin
0 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 13:29 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 04:05:58PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 03:56:40PM +0300, Michael S. Tsirkin wrote:
> > On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> > > Use RCU locking for mask/ack notifiers lists.
> > >
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > > virt/kvm/irq_comm.c | 20 +++++++++++---------
> > > 1 files changed, 11 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 5dde1ef..ba3a115 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> > > break;
> > > }
> > > }
> > > - rcu_read_unlock();
> > >
> > > - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> > > + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> > > if (kian->gsi == gsi)
> > > kian->irq_acked(kian);
> > > + rcu_read_unlock();
> > > }
> > >
> > > void kvm_register_irq_ack_notifier(struct kvm *kvm,
> > > struct kvm_irq_ack_notifier *kian)
> > > {
> > > mutex_lock(&kvm->irq_lock);
> > > - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> > > + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> > > mutex_unlock(&kvm->irq_lock);
> >
> > I think we need synchronize_rcu here as well. If the user adds a
> > notifier, he expects to get notified of irqs immediately
> > after the function returns, not after the next rcu grace period.
> >
> If ack notifier registration races with acknowledgment of the interrupt
> it adds notifier for a user already does something terribly wrong.
Hmm, good point.
> > > }
> > >
> > > @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> > > struct kvm_irq_ack_notifier *kian)
> > > {
> > > mutex_lock(&kvm->irq_lock);
> > > - hlist_del_init(&kian->link);
> > > + hlist_del_init_rcu(&kian->link);
> > > mutex_unlock(&kvm->irq_lock);
> > > + synchronize_rcu();
> > > }
> > >
> > > int kvm_request_irq_source_id(struct kvm *kvm)
> > > @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> > > {
> > > mutex_lock(&kvm->irq_lock);
> > > kimn->irq = irq;
> > > - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> > > + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> > > mutex_unlock(&kvm->irq_lock);
> > > }
> > >
> > > @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> > > struct kvm_irq_mask_notifier *kimn)
> > > {
> > > mutex_lock(&kvm->irq_lock);
> > > - hlist_del(&kimn->link);
> > > + hlist_del_rcu(&kimn->link);
> > > mutex_unlock(&kvm->irq_lock);
> > > + synchronize_rcu();
> > > }
> > >
> > > void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > > @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> > > struct kvm_irq_mask_notifier *kimn;
> > > struct hlist_node *n;
> > >
> > > - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> > > -
> > > - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> > > + rcu_read_lock();
> > > + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> > > if (kimn->irq == irq)
> > > kimn->func(kimn, mask);
> > > + rcu_read_unlock();
> > > }
> > >
> > > void kvm_free_irq_routing(struct kvm *kvm)
> > > --
> > > 1.6.2.1
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:25 ` Gleb Natapov
@ 2009-07-13 13:29 ` Gregory Haskins
0 siblings, 0 replies; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:29 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm
[-- Attachment #1: Type: text/plain, Size: 3029 bytes --]
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:16:47AM -0400, Gregory Haskins wrote:
>
>> Gleb Natapov wrote:
>>
>>> On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
>>>
>>>
>>>> Gleb Natapov wrote:
>>>>
>>>>
>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>> ---
>>>>> include/linux/kvm_host.h | 2 +-
>>>>> virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
>>>>> virt/kvm/kvm_main.c | 1 -
>>>>> 3 files changed, 26 insertions(+), 32 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>>> index f54a0d3..6756b3e 100644
>>>>> --- a/include/linux/kvm_host.h
>>>>> +++ b/include/linux/kvm_host.h
>>>>> @@ -161,7 +161,7 @@ struct kvm {
>>>>>
>>>>> struct mutex irq_lock;
>>>>> #ifdef CONFIG_HAVE_KVM_IRQCHIP
>>>>> - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
>>>>> + struct kvm_kernel_irq_routing_entry *irq_routing;
>>>>> struct hlist_head mask_notifier_list;
>>>>> #endif
>>>>>
>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>>>> index 7af18b8..b2fa3f6 100644
>>>>> --- a/virt/kvm/irq_comm.c
>>>>> +++ b/virt/kvm/irq_comm.c
>>>>> @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
>>>>> * IOAPIC. So set the bit in both. The guest will ignore
>>>>> * writes to the unused one.
>>>>> */
>>>>> - list_for_each_entry(e, &kvm->irq_routing, link)
>>>>> + rcu_read_lock();
>>>>> + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
>>>>>
>>>>>
>>>>>
>>>> Hi Gleb,
>>>> I haven't had a chance to fully digest and review these patches, but
>>>> one thing I did notice is that you seem to be converting from a list to
>>>> an open-coded structure. I am just curious why you made this design
>>>> decision instead of using the RCU variant of list?
>>>>
>>>>
>>>>
>>> It is not scary "open-coded structure" it's just an array :) As I responded
>>> to Michael the idea is to move msis out of irq_routing, make the array
>>> much smaller and either use gsi as an index in the array or use hash table
>>> instead looping over all entries. For now I can justify array as more
>>> cache friendly data structure as we scan it linearly.
>>>
>>>
>> Ok, but that might be a good thing to mention in the patch header ;)
>>
>>
> What exactly? Besides this is just an RFC. By the time it will be
> applied (if at all) I may do the change already :)
>
Heh, thats fine. FWIW, I would suggest this:
"the idea is to move msis out of irq_routing, make the array
much smaller and either use gsi as an index in the array or use hash table
instead looping over all entries. For now I can justify array as more
cache friendly data structure as we scan it linearly"
Otherwise, reviewers might be curious why you are not using list_X_rcu() ;)
Kind Regards,
-Greg
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:26 ` Gregory Haskins
@ 2009-07-13 13:32 ` Gleb Natapov
2009-07-13 13:40 ` Gregory Haskins
2009-07-13 13:40 ` Michael S. Tsirkin
0 siblings, 2 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:32 UTC (permalink / raw)
To: Gregory Haskins; +Cc: Michael S. Tsirkin, avi, kvm@vger.kernel.org, paulmck
On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> >
> >> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> >>
> >>> Use RCU locking for mask/ack notifiers lists.
> >>>
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>> virt/kvm/irq_comm.c | 20 +++++++++++---------
> >>> 1 files changed, 11 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>> index 5dde1ef..ba3a115 100644
> >>> --- a/virt/kvm/irq_comm.c
> >>> +++ b/virt/kvm/irq_comm.c
> >>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>> break;
> >>> }
> >>> }
> >>> - rcu_read_unlock();
> >>>
> >>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> >>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >>> if (kian->gsi == gsi)
> >>> kian->irq_acked(kian);
> >>> + rcu_read_unlock();
> >>> }
> >>>
> >>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >>> struct kvm_irq_ack_notifier *kian)
> >>> {
> >>> mutex_lock(&kvm->irq_lock);
> >>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> >>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >>> mutex_unlock(&kvm->irq_lock);
> >>> }
> >>>
> >>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >>> struct kvm_irq_ack_notifier *kian)
> >>> {
> >>> mutex_lock(&kvm->irq_lock);
> >>> - hlist_del_init(&kian->link);
> >>> + hlist_del_init_rcu(&kian->link);
> >>> mutex_unlock(&kvm->irq_lock);
> >>> + synchronize_rcu();
> >>> }
> >>>
> >> This is done under kvm->lock still, which means the lock might be held
> >> potentially for a very long time. Can synchronize_rcu be moved out of
> >> this lock?
> >>
> >>
> > Only if kvm_free_assigned_device() will be moved out of this lock.
> > Device de-assignment is not very frequent event though. How long do you
> > think it may be held? KVM RCU read sections are very brief.
> >
>
> Note that the delay imposed by the barrier is not only related to the
> length of the critical section. The barrier blocks until the next grace
> period, and depending on the type of RCU you are using and your config
> options, this could be multiple milliseconds.
>
> I am not saying that this is definitely a problem for your design. I
> am just pointing out that the length of the KVM-RCU read section is only
Yeah I understand that other RCU read section may introduce delays too.
The question is how big the delay may be. I don't think multiple
milliseconds delay in device de-assignment is a big issue though.
> one of several factors that influence the ultimate duration of your
> kvm->lock CS. IIUC, in an ideally designed subsystem, the read-side CS
> will be dwarfed by the length of the grace, so the grace (and barriers
> against it) are really the critical factor of this type of design.
>
> I am CC'ing Paul in case I am saying something dumb/untrue. ;)
>
> Kind Regards,
> -Greg
>
> >
> >>> int kvm_request_irq_source_id(struct kvm *kvm)
> >>> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
> >>> {
> >>> mutex_lock(&kvm->irq_lock);
> >>> kimn->irq = irq;
> >>> - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> >>> + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
> >>> mutex_unlock(&kvm->irq_lock);
> >>> }
> >>>
> >>> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
> >>> struct kvm_irq_mask_notifier *kimn)
> >>> {
> >>> mutex_lock(&kvm->irq_lock);
> >>> - hlist_del(&kimn->link);
> >>> + hlist_del_rcu(&kimn->link);
> >>> mutex_unlock(&kvm->irq_lock);
> >>> + synchronize_rcu();
> >>> }
> >>>
> >>> void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >>> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> >>> struct kvm_irq_mask_notifier *kimn;
> >>> struct hlist_node *n;
> >>>
> >>> - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> >>> -
> >>> - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
> >>> + rcu_read_lock();
> >>> + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
> >>> if (kimn->irq == irq)
> >>> kimn->func(kimn, mask);
> >>> + rcu_read_unlock();
> >>> }
> >>>
> >>> void kvm_free_irq_routing(struct kvm *kvm)
> >>> --
> >>> 1.6.2.1
> >>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe kvm" in
> >>> the body of a message to majordomo@vger.kernel.org
> >>> More majordomo info at http://vger.kernel.org/majordomo-info.html
> >>>
> >
> > --
> > Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> >
>
>
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:23 ` Gleb Natapov
@ 2009-07-13 13:36 ` Michael S. Tsirkin
0 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 13:36 UTC (permalink / raw)
To: Gleb Natapov; +Cc: avi, kvm
On Mon, Jul 13, 2009 at 04:23:49PM +0300, Gleb Natapov wrote:
> > resource. Maybe it's a good idea to use an array instead of a list. All
> > I'm saying, RCU does not force you to do this.
> >
> It doesn't, but list shouldn't be used here in the first place.
OK, but I think the change and the reason for it should be documented in
the patch subject and description.
--
MST
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:32 ` Gleb Natapov
@ 2009-07-13 13:40 ` Gregory Haskins
2009-07-13 13:52 ` Gleb Natapov
2009-07-13 13:40 ` Michael S. Tsirkin
1 sibling, 1 reply; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:40 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Michael S. Tsirkin, avi, kvm@vger.kernel.org, paulmck
[-- Attachment #1: Type: text/plain, Size: 3643 bytes --]
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
>
>> Gleb Natapov wrote:
>>
>>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
>>>
>>>
>>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>>>>
>>>>
>>>>> Use RCU locking for mask/ack notifiers lists.
>>>>>
>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>> ---
>>>>> virt/kvm/irq_comm.c | 20 +++++++++++---------
>>>>> 1 files changed, 11 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>>>> index 5dde1ef..ba3a115 100644
>>>>> --- a/virt/kvm/irq_comm.c
>>>>> +++ b/virt/kvm/irq_comm.c
>>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>>> break;
>>>>> }
>>>>> }
>>>>> - rcu_read_unlock();
>>>>>
>>>>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>> if (kian->gsi == gsi)
>>>>> kian->irq_acked(kian);
>>>>> + rcu_read_unlock();
>>>>> }
>>>>>
>>>>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>>>> struct kvm_irq_ack_notifier *kian)
>>>>> {
>>>>> mutex_lock(&kvm->irq_lock);
>>>>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>>>>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>>>> mutex_unlock(&kvm->irq_lock);
>>>>> }
>>>>>
>>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>>>> struct kvm_irq_ack_notifier *kian)
>>>>> {
>>>>> mutex_lock(&kvm->irq_lock);
>>>>> - hlist_del_init(&kian->link);
>>>>> + hlist_del_init_rcu(&kian->link);
>>>>> mutex_unlock(&kvm->irq_lock);
>>>>> + synchronize_rcu();
>>>>> }
>>>>>
>>>>>
>>>> This is done under kvm->lock still, which means the lock might be held
>>>> potentially for a very long time. Can synchronize_rcu be moved out of
>>>> this lock?
>>>>
>>>>
>>>>
>>> Only if kvm_free_assigned_device() will be moved out of this lock.
>>> Device de-assignment is not very frequent event though. How long do you
>>> think it may be held? KVM RCU read sections are very brief.
>>>
>>>
>> Note that the delay imposed by the barrier is not only related to the
>> length of the critical section. The barrier blocks until the next grace
>> period, and depending on the type of RCU you are using and your config
>> options, this could be multiple milliseconds.
>>
>> I am not saying that this is definitely a problem for your design. I
>> am just pointing out that the length of the KVM-RCU read section is only
>>
> Yeah I understand that other RCU read section may introduce delays too.
> The question is how big the delay may be.
I think you are misunderstanding me. The read-side CS is not a
significant factor here so I am not worried about concurrent read-side
CS causing a longer delay. What I am saying is that the grace period of
your RCU subsystem is the dominant factor in the equation here, and this
may be several milliseconds.
> I don't think multiple
> milliseconds delay in device de-assignment is a big issue though.
>
I would tend to agree with you. It's not fast path.
I only brought this up because I saw your design being justified
incorrectly: you said "KVM RCU read sections are very brief", but that
is not really relevant to Michael's point. I just want to make sure
that the true impact is understood.
Kind Regards,
-Greg
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:32 ` Gleb Natapov
2009-07-13 13:40 ` Gregory Haskins
@ 2009-07-13 13:40 ` Michael S. Tsirkin
2009-07-13 13:44 ` Gregory Haskins
2009-07-13 19:31 ` Paul E. McKenney
1 sibling, 2 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2009-07-13 13:40 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm@vger.kernel.org, paulmck
On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> Yeah I understand that other RCU read section may introduce delays too.
> The question is how big the delay may be.
I recall seeing the number "at least 3 jiffies" somewhere, but that
might have changed since.
> I don't think multiple
> milliseconds delay in device de-assignment is a big issue though.
Right. My point was that since the sync is done under kvm lock, the
guest can easily get blocked trying to get kvm lock meanwhile.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:40 ` Michael S. Tsirkin
@ 2009-07-13 13:44 ` Gregory Haskins
2009-07-13 19:31 ` Paul E. McKenney
1 sibling, 0 replies; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:44 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: Gleb Natapov, avi, kvm@vger.kernel.org, paulmck
[-- Attachment #1: Type: text/plain, Size: 859 bytes --]
Michael S. Tsirkin wrote:
> On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
>
>> Yeah I understand that other RCU read section may introduce delays too.
>> The question is how big the delay may be.
>>
>
> I recall seeing the number "at least 3 jiffies" somewhere, but that
> might have changed since.
>
I think it is dependent on how you configure your kernel...but that
sounds ballpark for one of the worst-case types IIRC.
>
>> I don't think multiple
>> milliseconds delay in device de-assignment is a big issue though.
>>
>
> Right. My point was that since the sync is done under kvm lock, the
> guest can easily get blocked trying to get kvm lock meanwhile.
>
Yeah, that is really the biggest issue here, I agree. The delay in the
actual deassignment path is NBD.
Kind Regards,
-Greg
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 12:56 ` Michael S. Tsirkin
2009-07-13 13:05 ` Gleb Natapov
@ 2009-07-13 13:48 ` Gregory Haskins
1 sibling, 0 replies; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 13:48 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: Gleb Natapov, avi, kvm
[-- Attachment #1: Type: text/plain, Size: 3499 bytes --]
Michael S. Tsirkin wrote:
> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>
>> Use RCU locking for mask/ack notifiers lists.
>>
>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>> ---
>> virt/kvm/irq_comm.c | 20 +++++++++++---------
>> 1 files changed, 11 insertions(+), 9 deletions(-)
>>
>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>> index 5dde1ef..ba3a115 100644
>> --- a/virt/kvm/irq_comm.c
>> +++ b/virt/kvm/irq_comm.c
>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>> break;
>> }
>> }
>> - rcu_read_unlock();
>>
>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>> if (kian->gsi == gsi)
>> kian->irq_acked(kian);
>> + rcu_read_unlock();
>> }
>>
>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
>> struct kvm_irq_ack_notifier *kian)
>> {
>> mutex_lock(&kvm->irq_lock);
>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>> mutex_unlock(&kvm->irq_lock);
>>
>
> I think we need synchronize_rcu here as well. If the user adds a
> notifier, he expects to get notified of irqs immediately
> after the function returns, not after the next rcu grace period.
>
Agreed.
>
>> }
>>
>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>> struct kvm_irq_ack_notifier *kian)
>> {
>> mutex_lock(&kvm->irq_lock);
>> - hlist_del_init(&kian->link);
>> + hlist_del_init_rcu(&kian->link);
>> mutex_unlock(&kvm->irq_lock);
>> + synchronize_rcu();
>> }
>>
>> int kvm_request_irq_source_id(struct kvm *kvm)
>> @@ -246,7 +247,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>> {
>> mutex_lock(&kvm->irq_lock);
>> kimn->irq = irq;
>> - hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
>> + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
>> mutex_unlock(&kvm->irq_lock);
>> }
>>
>> @@ -254,8 +255,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>> struct kvm_irq_mask_notifier *kimn)
>> {
>> mutex_lock(&kvm->irq_lock);
>> - hlist_del(&kimn->link);
>> + hlist_del_rcu(&kimn->link);
>> mutex_unlock(&kvm->irq_lock);
>> + synchronize_rcu();
>> }
>>
>> void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>> @@ -263,11 +265,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>> struct kvm_irq_mask_notifier *kimn;
>> struct hlist_node *n;
>>
>> - WARN_ON(!mutex_is_locked(&kvm->irq_lock));
>> -
>> - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
>> + rcu_read_lock();
>> + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
>> if (kimn->irq == irq)
>> kimn->func(kimn, mask);
>> + rcu_read_unlock();
>> }
>>
>> void kvm_free_irq_routing(struct kvm *kvm)
>> --
>> 1.6.2.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:40 ` Gregory Haskins
@ 2009-07-13 13:52 ` Gleb Natapov
2009-07-13 14:02 ` Gregory Haskins
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 13:52 UTC (permalink / raw)
To: Gregory Haskins; +Cc: Michael S. Tsirkin, avi, kvm@vger.kernel.org, paulmck
On Mon, Jul 13, 2009 at 09:40:01AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
> >
> >> Gleb Natapov wrote:
> >>
> >>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> >>>
> >>>
> >>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> >>>>
> >>>>
> >>>>> Use RCU locking for mask/ack notifiers lists.
> >>>>>
> >>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>>>> ---
> >>>>> virt/kvm/irq_comm.c | 20 +++++++++++---------
> >>>>> 1 files changed, 11 insertions(+), 9 deletions(-)
> >>>>>
> >>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>>>> index 5dde1ef..ba3a115 100644
> >>>>> --- a/virt/kvm/irq_comm.c
> >>>>> +++ b/virt/kvm/irq_comm.c
> >>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>>> break;
> >>>>> }
> >>>>> }
> >>>>> - rcu_read_unlock();
> >>>>>
> >>>>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>> if (kian->gsi == gsi)
> >>>>> kian->irq_acked(kian);
> >>>>> + rcu_read_unlock();
> >>>>> }
> >>>>>
> >>>>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >>>>> struct kvm_irq_ack_notifier *kian)
> >>>>> {
> >>>>> mutex_lock(&kvm->irq_lock);
> >>>>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>> mutex_unlock(&kvm->irq_lock);
> >>>>> }
> >>>>>
> >>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >>>>> struct kvm_irq_ack_notifier *kian)
> >>>>> {
> >>>>> mutex_lock(&kvm->irq_lock);
> >>>>> - hlist_del_init(&kian->link);
> >>>>> + hlist_del_init_rcu(&kian->link);
> >>>>> mutex_unlock(&kvm->irq_lock);
> >>>>> + synchronize_rcu();
> >>>>> }
> >>>>>
> >>>>>
> >>>> This is done under kvm->lock still, which means the lock might be held
> >>>> potentially for a very long time. Can synchronize_rcu be moved out of
> >>>> this lock?
> >>>>
> >>>>
> >>>>
> >>> Only if kvm_free_assigned_device() will be moved out of this lock.
> >>> Device de-assignment is not very frequent event though. How long do you
> >>> think it may be held? KVM RCU read sections are very brief.
> >>>
> >>>
> >> Note that the delay imposed by the barrier is not only related to the
> >> length of the critical section. The barrier blocks until the next grace
> >> period, and depending on the type of RCU you are using and your config
> >> options, this could be multiple milliseconds.
> >>
> >> I am not saying that this is definitely a problem for your design. I
> >> am just pointing out that the length of the KVM-RCU read section is only
> >>
> > Yeah I understand that other RCU read section may introduce delays too.
> > The question is how big the delay may be.
>
> I think you are misunderstanding me. The read-side CS is not a
> significant factor here so I am not worried about concurrent read-side
> CS causing a longer delay. What I am saying is that the grace period of
> your RCU subsystem is the dominant factor in the equation here, and this
> may be several milliseconds.
>
How is the "grace period" is determined? Isn't it just means "no cpus is
in RCU read section anymore"?
> > I don't think multiple
> > milliseconds delay in device de-assignment is a big issue though.
> >
>
> I would tend to agree with you. It's not fast path.
>
> I only brought this up because I saw your design being justified
> incorrectly: you said "KVM RCU read sections are very brief", but that
> is not really relevant to Michael's point. I just want to make sure
> that the true impact is understood.
>
> Kind Regards,
> -Greg
>
>
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:52 ` Gleb Natapov
@ 2009-07-13 14:02 ` Gregory Haskins
2009-07-13 14:08 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Gregory Haskins @ 2009-07-13 14:02 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Michael S. Tsirkin, avi, kvm@vger.kernel.org, paulmck
[-- Attachment #1: Type: text/plain, Size: 5620 bytes --]
Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:40:01AM -0400, Gregory Haskins wrote:
>
>> Gleb Natapov wrote:
>>
>>> On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
>>>
>>>
>>>> Gleb Natapov wrote:
>>>>
>>>>
>>>>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
>>>>>
>>>>>
>>>>>
>>>>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>>> Use RCU locking for mask/ack notifiers lists.
>>>>>>>
>>>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>>>> ---
>>>>>>> virt/kvm/irq_comm.c | 20 +++++++++++---------
>>>>>>> 1 files changed, 11 insertions(+), 9 deletions(-)
>>>>>>>
>>>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
>>>>>>> index 5dde1ef..ba3a115 100644
>>>>>>> --- a/virt/kvm/irq_comm.c
>>>>>>> +++ b/virt/kvm/irq_comm.c
>>>>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
>>>>>>> break;
>>>>>>> }
>>>>>>> }
>>>>>>> - rcu_read_unlock();
>>>>>>>
>>>>>>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>>>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
>>>>>>> if (kian->gsi == gsi)
>>>>>>> kian->irq_acked(kian);
>>>>>>> + rcu_read_unlock();
>>>>>>> }
>>>>>>>
>>>>>>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
>>>>>>> struct kvm_irq_ack_notifier *kian)
>>>>>>> {
>>>>>>> mutex_lock(&kvm->irq_lock);
>>>>>>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
>>>>>>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
>>>>>>> mutex_unlock(&kvm->irq_lock);
>>>>>>> }
>>>>>>>
>>>>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>>>>>>> struct kvm_irq_ack_notifier *kian)
>>>>>>> {
>>>>>>> mutex_lock(&kvm->irq_lock);
>>>>>>> - hlist_del_init(&kian->link);
>>>>>>> + hlist_del_init_rcu(&kian->link);
>>>>>>> mutex_unlock(&kvm->irq_lock);
>>>>>>> + synchronize_rcu();
>>>>>>> }
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>> This is done under kvm->lock still, which means the lock might be held
>>>>>> potentially for a very long time. Can synchronize_rcu be moved out of
>>>>>> this lock?
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>> Only if kvm_free_assigned_device() will be moved out of this lock.
>>>>> Device de-assignment is not very frequent event though. How long do you
>>>>> think it may be held? KVM RCU read sections are very brief.
>>>>>
>>>>>
>>>>>
>>>> Note that the delay imposed by the barrier is not only related to the
>>>> length of the critical section. The barrier blocks until the next grace
>>>> period, and depending on the type of RCU you are using and your config
>>>> options, this could be multiple milliseconds.
>>>>
>>>> I am not saying that this is definitely a problem for your design. I
>>>> am just pointing out that the length of the KVM-RCU read section is only
>>>>
>>>>
>>> Yeah I understand that other RCU read section may introduce delays too.
>>> The question is how big the delay may be.
>>>
>> I think you are misunderstanding me. The read-side CS is not a
>> significant factor here so I am not worried about concurrent read-side
>> CS causing a longer delay. What I am saying is that the grace period of
>> your RCU subsystem is the dominant factor in the equation here, and this
>> may be several milliseconds.
>>
>>
> How is the "grace period" is determined? Isn't it just means "no cpus is
> in RCU read section anymore"?
>
Nope ;)
RCU is pretty complex, so I won't even try to explain it here as there
are numerous articles floating around out there that do a much better job.
But here is a summary: RCU buys you two things: 1) concurrent readers
*and* writers, and 2) a much lower overhead reader path because it
generally doesn't use atomic. Its point (2) that is relevant here.
If taking an atomic were ok, you could approximate the RCU model using
reference counting. Reference counting buys you "precise" resource
acquistion/release at the expense of the overhead of the atomic
operation (and any associated cache-line bouncing). RCU uses a
"imprecise" model where we don't really know the *exact* moment the
resource is released. Instead, there are specific boundaries in time
when we can guarantee that it had to have been released prior to the
expiry of the event. This event is what is called the "grace period".
So that is what synchronize_rcu() is doing. Its a barrier to the next
imprecise moment in time when we can be assured (if you used the rest of
the RCU API properly) that there can not be any outstanding references
to your object left in flight. Each grace period can be milliseconds,
depending on what version of the kernel you have and how it is configured.
HTH
Kind Regards,
-Greg
>
>>> I don't think multiple
>>> milliseconds delay in device de-assignment is a big issue though.
>>>
>>>
>> I would tend to agree with you. It's not fast path.
>>
>> I only brought this up because I saw your design being justified
>> incorrectly: you said "KVM RCU read sections are very brief", but that
>> is not really relevant to Michael's point. I just want to make sure
>> that the true impact is understood.
>>
>> Kind Regards,
>> -Greg
>>
>>
>>
>
>
>
> --
> Gleb.
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 266 bytes --]
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 14:02 ` Gregory Haskins
@ 2009-07-13 14:08 ` Gleb Natapov
0 siblings, 0 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 14:08 UTC (permalink / raw)
To: Gregory Haskins; +Cc: Michael S. Tsirkin, avi, kvm@vger.kernel.org, paulmck
On Mon, Jul 13, 2009 at 10:02:13AM -0400, Gregory Haskins wrote:
> Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:40:01AM -0400, Gregory Haskins wrote:
> >
> >> Gleb Natapov wrote:
> >>
> >>> On Mon, Jul 13, 2009 at 09:26:21AM -0400, Gregory Haskins wrote:
> >>>
> >>>
> >>>> Gleb Natapov wrote:
> >>>>
> >>>>
> >>>>> On Mon, Jul 13, 2009 at 04:02:56PM +0300, Michael S. Tsirkin wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>>> On Sun, Jul 12, 2009 at 03:03:53PM +0300, Gleb Natapov wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>>> Use RCU locking for mask/ack notifiers lists.
> >>>>>>>
> >>>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>>>>>> ---
> >>>>>>> virt/kvm/irq_comm.c | 20 +++++++++++---------
> >>>>>>> 1 files changed, 11 insertions(+), 9 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> >>>>>>> index 5dde1ef..ba3a115 100644
> >>>>>>> --- a/virt/kvm/irq_comm.c
> >>>>>>> +++ b/virt/kvm/irq_comm.c
> >>>>>>> @@ -179,18 +179,18 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
> >>>>>>> break;
> >>>>>>> }
> >>>>>>> }
> >>>>>>> - rcu_read_unlock();
> >>>>>>>
> >>>>>>> - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>>>> + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, link)
> >>>>>>> if (kian->gsi == gsi)
> >>>>>>> kian->irq_acked(kian);
> >>>>>>> + rcu_read_unlock();
> >>>>>>> }
> >>>>>>>
> >>>>>>> void kvm_register_irq_ack_notifier(struct kvm *kvm,
> >>>>>>> struct kvm_irq_ack_notifier *kian)
> >>>>>>> {
> >>>>>>> mutex_lock(&kvm->irq_lock);
> >>>>>>> - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>>>> + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
> >>>>>>> mutex_unlock(&kvm->irq_lock);
> >>>>>>> }
> >>>>>>>
> >>>>>>> @@ -198,8 +198,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> >>>>>>> struct kvm_irq_ack_notifier *kian)
> >>>>>>> {
> >>>>>>> mutex_lock(&kvm->irq_lock);
> >>>>>>> - hlist_del_init(&kian->link);
> >>>>>>> + hlist_del_init_rcu(&kian->link);
> >>>>>>> mutex_unlock(&kvm->irq_lock);
> >>>>>>> + synchronize_rcu();
> >>>>>>> }
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>> This is done under kvm->lock still, which means the lock might be held
> >>>>>> potentially for a very long time. Can synchronize_rcu be moved out of
> >>>>>> this lock?
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>> Only if kvm_free_assigned_device() will be moved out of this lock.
> >>>>> Device de-assignment is not very frequent event though. How long do you
> >>>>> think it may be held? KVM RCU read sections are very brief.
> >>>>>
> >>>>>
> >>>>>
> >>>> Note that the delay imposed by the barrier is not only related to the
> >>>> length of the critical section. The barrier blocks until the next grace
> >>>> period, and depending on the type of RCU you are using and your config
> >>>> options, this could be multiple milliseconds.
> >>>>
> >>>> I am not saying that this is definitely a problem for your design. I
> >>>> am just pointing out that the length of the KVM-RCU read section is only
> >>>>
> >>>>
> >>> Yeah I understand that other RCU read section may introduce delays too.
> >>> The question is how big the delay may be.
> >>>
> >> I think you are misunderstanding me. The read-side CS is not a
> >> significant factor here so I am not worried about concurrent read-side
> >> CS causing a longer delay. What I am saying is that the grace period of
> >> your RCU subsystem is the dominant factor in the equation here, and this
> >> may be several milliseconds.
> >>
> >>
> > How is the "grace period" is determined? Isn't it just means "no cpus is
> > in RCU read section anymore"?
> >
>
> Nope ;)
>
Now I recall something about each CPU passing scheduler. Thanks.
> RCU is pretty complex, so I won't even try to explain it here as there
> are numerous articles floating around out there that do a much better job.
>
> But here is a summary: RCU buys you two things: 1) concurrent readers
> *and* writers, and 2) a much lower overhead reader path because it
> generally doesn't use atomic. Its point (2) that is relevant here.
>
> If taking an atomic were ok, you could approximate the RCU model using
> reference counting. Reference counting buys you "precise" resource
> acquistion/release at the expense of the overhead of the atomic
> operation (and any associated cache-line bouncing). RCU uses a
> "imprecise" model where we don't really know the *exact* moment the
> resource is released. Instead, there are specific boundaries in time
> when we can guarantee that it had to have been released prior to the
> expiry of the event. This event is what is called the "grace period".
>
> So that is what synchronize_rcu() is doing. Its a barrier to the next
> imprecise moment in time when we can be assured (if you used the rest of
> the RCU API properly) that there can not be any outstanding references
> to your object left in flight. Each grace period can be milliseconds,
> depending on what version of the kernel you have and how it is configured.
>
> HTH
>
> Kind Regards,
> -Greg
>
> >
> >>> I don't think multiple
> >>> milliseconds delay in device de-assignment is a big issue though.
> >>>
> >>>
> >> I would tend to agree with you. It's not fast path.
> >>
> >> I only brought this up because I saw your design being justified
> >> incorrectly: you said "KVM RCU read sections are very brief", but that
> >> is not really relevant to Michael's point. I just want to make sure
> >> that the true impact is understood.
> >>
> >> Kind Regards,
> >> -Greg
> >>
> >>
> >>
> >
> >
> >
> > --
> > Gleb.
> >
>
>
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 13:15 ` Gleb Natapov
2009-07-13 13:16 ` Gregory Haskins
@ 2009-07-13 15:55 ` Marcelo Tosatti
2009-07-13 16:24 ` Gleb Natapov
1 sibling, 1 reply; 41+ messages in thread
From: Marcelo Tosatti @ 2009-07-13 15:55 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > Gleb Natapov wrote:
> > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > ---
> > > include/linux/kvm_host.h | 2 +-
> > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > virt/kvm/kvm_main.c | 1 -
> > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index f54a0d3..6756b3e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -161,7 +161,7 @@ struct kvm {
> > >
> > > struct mutex irq_lock;
> > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > struct hlist_head mask_notifier_list;
> > > #endif
> > >
> > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > index 7af18b8..b2fa3f6 100644
> > > --- a/virt/kvm/irq_comm.c
> > > +++ b/virt/kvm/irq_comm.c
> > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > * IOAPIC. So set the bit in both. The guest will ignore
> > > * writes to the unused one.
> > > */
> > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > + rcu_read_lock();
> > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > >
> >
> > Hi Gleb,
> > I haven't had a chance to fully digest and review these patches, but
> > one thing I did notice is that you seem to be converting from a list to
> > an open-coded structure. I am just curious why you made this design
> > decision instead of using the RCU variant of list?
> >
> It is not scary "open-coded structure" it's just an array :) As I responded
> to Michael the idea is to move msis out of irq_routing, make the array
> much smaller and either use gsi as an index in the array or use hash table
> instead looping over all entries. For now I can justify array as more
> cache friendly data structure as we scan it linearly.
I think its more important to convert to faster search mechanism (the
list walk shows up high in profiling), then convert to RCU?
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 15:55 ` Marcelo Tosatti
@ 2009-07-13 16:24 ` Gleb Natapov
2009-07-13 16:27 ` Marcelo Tosatti
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 16:24 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > Gleb Natapov wrote:
> > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > ---
> > > > include/linux/kvm_host.h | 2 +-
> > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > > virt/kvm/kvm_main.c | 1 -
> > > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > > >
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index f54a0d3..6756b3e 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -161,7 +161,7 @@ struct kvm {
> > > >
> > > > struct mutex irq_lock;
> > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > struct hlist_head mask_notifier_list;
> > > > #endif
> > > >
> > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > index 7af18b8..b2fa3f6 100644
> > > > --- a/virt/kvm/irq_comm.c
> > > > +++ b/virt/kvm/irq_comm.c
> > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > * IOAPIC. So set the bit in both. The guest will ignore
> > > > * writes to the unused one.
> > > > */
> > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > + rcu_read_lock();
> > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > >
> > >
> > > Hi Gleb,
> > > I haven't had a chance to fully digest and review these patches, but
> > > one thing I did notice is that you seem to be converting from a list to
> > > an open-coded structure. I am just curious why you made this design
> > > decision instead of using the RCU variant of list?
> > >
> > It is not scary "open-coded structure" it's just an array :) As I responded
> > to Michael the idea is to move msis out of irq_routing, make the array
> > much smaller and either use gsi as an index in the array or use hash table
> > instead looping over all entries. For now I can justify array as more
> > cache friendly data structure as we scan it linearly.
>
> I think its more important to convert to faster search mechanism (the
> list walk shows up high in profiling), then convert to RCU?
Why in this order? I am working on faster search mechanism now (on top
of the series).
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 16:24 ` Gleb Natapov
@ 2009-07-13 16:27 ` Marcelo Tosatti
2009-07-13 16:33 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Marcelo Tosatti @ 2009-07-13 16:27 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > Gleb Natapov wrote:
> > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > ---
> > > > > include/linux/kvm_host.h | 2 +-
> > > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > > > virt/kvm/kvm_main.c | 1 -
> > > > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > > > >
> > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > index f54a0d3..6756b3e 100644
> > > > > --- a/include/linux/kvm_host.h
> > > > > +++ b/include/linux/kvm_host.h
> > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > >
> > > > > struct mutex irq_lock;
> > > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > struct hlist_head mask_notifier_list;
> > > > > #endif
> > > > >
> > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > index 7af18b8..b2fa3f6 100644
> > > > > --- a/virt/kvm/irq_comm.c
> > > > > +++ b/virt/kvm/irq_comm.c
> > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > * IOAPIC. So set the bit in both. The guest will ignore
> > > > > * writes to the unused one.
> > > > > */
> > > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > + rcu_read_lock();
> > > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > >
> > > >
> > > > Hi Gleb,
> > > > I haven't had a chance to fully digest and review these patches, but
> > > > one thing I did notice is that you seem to be converting from a list to
> > > > an open-coded structure. I am just curious why you made this design
> > > > decision instead of using the RCU variant of list?
> > > >
> > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > to Michael the idea is to move msis out of irq_routing, make the array
> > > much smaller and either use gsi as an index in the array or use hash table
> > > instead looping over all entries. For now I can justify array as more
> > > cache friendly data structure as we scan it linearly.
> >
> > I think its more important to convert to faster search mechanism (the
> > list walk shows up high in profiling), then convert to RCU?
> Why in this order? I am working on faster search mechanism now (on top
> of the series).
Because as Michael mentioned we can use slots_lock (should be renamed
to global_lock) instead of RCU on the write-side.
Note it moves a lot of burden to the writer side, but its much simpler
than RCU and you stop the spread of locks. Needs to be discussed...
>
> --
> Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 16:27 ` Marcelo Tosatti
@ 2009-07-13 16:33 ` Gleb Natapov
2009-07-13 16:42 ` Marcelo Tosatti
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 16:33 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 01:27:38PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > > Gleb Natapov wrote:
> > > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > > ---
> > > > > > include/linux/kvm_host.h | 2 +-
> > > > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > > > > virt/kvm/kvm_main.c | 1 -
> > > > > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > > > > >
> > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > > index f54a0d3..6756b3e 100644
> > > > > > --- a/include/linux/kvm_host.h
> > > > > > +++ b/include/linux/kvm_host.h
> > > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > > >
> > > > > > struct mutex irq_lock;
> > > > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > > struct hlist_head mask_notifier_list;
> > > > > > #endif
> > > > > >
> > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > > index 7af18b8..b2fa3f6 100644
> > > > > > --- a/virt/kvm/irq_comm.c
> > > > > > +++ b/virt/kvm/irq_comm.c
> > > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > > * IOAPIC. So set the bit in both. The guest will ignore
> > > > > > * writes to the unused one.
> > > > > > */
> > > > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > > + rcu_read_lock();
> > > > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > > >
> > > > >
> > > > > Hi Gleb,
> > > > > I haven't had a chance to fully digest and review these patches, but
> > > > > one thing I did notice is that you seem to be converting from a list to
> > > > > an open-coded structure. I am just curious why you made this design
> > > > > decision instead of using the RCU variant of list?
> > > > >
> > > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > > to Michael the idea is to move msis out of irq_routing, make the array
> > > > much smaller and either use gsi as an index in the array or use hash table
> > > > instead looping over all entries. For now I can justify array as more
> > > > cache friendly data structure as we scan it linearly.
> > >
> > > I think its more important to convert to faster search mechanism (the
> > > list walk shows up high in profiling), then convert to RCU?
> > Why in this order? I am working on faster search mechanism now (on top
> > of the series).
>
> Because as Michael mentioned we can use slots_lock (should be renamed
> to global_lock) instead of RCU on the write-side.
>
I don't get it. The point for RCU is to get rid of reader's lock. If
I'll have to take slot_lock on each EOI I achieved nothing.
> Note it moves a lot of burden to the writer side, but its much simpler
> than RCU and you stop the spread of locks. Needs to be discussed...
>
I much prefer to have many well defined locks with well understood
scope, then a small number of globals locks that are split ad-hoc when
deadlock is discovered (lock->irq_lock).
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 16:33 ` Gleb Natapov
@ 2009-07-13 16:42 ` Marcelo Tosatti
2009-07-13 16:44 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Marcelo Tosatti @ 2009-07-13 16:42 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 07:33:30PM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 01:27:38PM -0300, Marcelo Tosatti wrote:
> > On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> > > On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > > > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > > > Gleb Natapov wrote:
> > > > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > > > ---
> > > > > > > include/linux/kvm_host.h | 2 +-
> > > > > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > > > > > virt/kvm/kvm_main.c | 1 -
> > > > > > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > > > > > >
> > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > > > index f54a0d3..6756b3e 100644
> > > > > > > --- a/include/linux/kvm_host.h
> > > > > > > +++ b/include/linux/kvm_host.h
> > > > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > > > >
> > > > > > > struct mutex irq_lock;
> > > > > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > > > struct hlist_head mask_notifier_list;
> > > > > > > #endif
> > > > > > >
> > > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > > > index 7af18b8..b2fa3f6 100644
> > > > > > > --- a/virt/kvm/irq_comm.c
> > > > > > > +++ b/virt/kvm/irq_comm.c
> > > > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > > > * IOAPIC. So set the bit in both. The guest will ignore
> > > > > > > * writes to the unused one.
> > > > > > > */
> > > > > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > > > + rcu_read_lock();
> > > > > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > > > >
> > > > > >
> > > > > > Hi Gleb,
> > > > > > I haven't had a chance to fully digest and review these patches, but
> > > > > > one thing I did notice is that you seem to be converting from a list to
> > > > > > an open-coded structure. I am just curious why you made this design
> > > > > > decision instead of using the RCU variant of list?
> > > > > >
> > > > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > > > to Michael the idea is to move msis out of irq_routing, make the array
> > > > > much smaller and either use gsi as an index in the array or use hash table
> > > > > instead looping over all entries. For now I can justify array as more
> > > > > cache friendly data structure as we scan it linearly.
> > > >
> > > > I think its more important to convert to faster search mechanism (the
> > > > list walk shows up high in profiling), then convert to RCU?
> > > Why in this order? I am working on faster search mechanism now (on top
> > > of the series).
> >
> > Because as Michael mentioned we can use slots_lock (should be renamed
> > to global_lock) instead of RCU on the write-side.
> >
> I don't get it. The point for RCU is to get rid of reader's lock. If
> I'll have to take slot_lock on each EOI I achieved nothing.
You already take slots_lock for read on every exit.
> > than RCU and you stop the spread of locks. Needs to be discussed...
> >
> I much prefer to have many well defined locks with well understood
> scope, then a small number of globals locks that are split ad-hoc when
> deadlock is discovered (lock->irq_lock).
OK.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 16:42 ` Marcelo Tosatti
@ 2009-07-13 16:44 ` Gleb Natapov
2009-07-13 16:45 ` Marcelo Tosatti
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 16:44 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 01:42:13PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 07:33:30PM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 01:27:38PM -0300, Marcelo Tosatti wrote:
> > > On Mon, Jul 13, 2009 at 07:24:53PM +0300, Gleb Natapov wrote:
> > > > On Mon, Jul 13, 2009 at 12:55:31PM -0300, Marcelo Tosatti wrote:
> > > > > On Mon, Jul 13, 2009 at 04:15:34PM +0300, Gleb Natapov wrote:
> > > > > > On Mon, Jul 13, 2009 at 09:01:33AM -0400, Gregory Haskins wrote:
> > > > > > > Gleb Natapov wrote:
> > > > > > > > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > > > > > > > ---
> > > > > > > > include/linux/kvm_host.h | 2 +-
> > > > > > > > virt/kvm/irq_comm.c | 55 +++++++++++++++++++++-------------------------
> > > > > > > > virt/kvm/kvm_main.c | 1 -
> > > > > > > > 3 files changed, 26 insertions(+), 32 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > > > > > index f54a0d3..6756b3e 100644
> > > > > > > > --- a/include/linux/kvm_host.h
> > > > > > > > +++ b/include/linux/kvm_host.h
> > > > > > > > @@ -161,7 +161,7 @@ struct kvm {
> > > > > > > >
> > > > > > > > struct mutex irq_lock;
> > > > > > > > #ifdef CONFIG_HAVE_KVM_IRQCHIP
> > > > > > > > - struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
> > > > > > > > + struct kvm_kernel_irq_routing_entry *irq_routing;
> > > > > > > > struct hlist_head mask_notifier_list;
> > > > > > > > #endif
> > > > > > > >
> > > > > > > > diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
> > > > > > > > index 7af18b8..b2fa3f6 100644
> > > > > > > > --- a/virt/kvm/irq_comm.c
> > > > > > > > +++ b/virt/kvm/irq_comm.c
> > > > > > > > @@ -148,7 +148,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
> > > > > > > > * IOAPIC. So set the bit in both. The guest will ignore
> > > > > > > > * writes to the unused one.
> > > > > > > > */
> > > > > > > > - list_for_each_entry(e, &kvm->irq_routing, link)
> > > > > > > > + rcu_read_lock();
> > > > > > > > + for (e = rcu_dereference(kvm->irq_routing); e && e->set; e++) {
> > > > > > > >
> > > > > > >
> > > > > > > Hi Gleb,
> > > > > > > I haven't had a chance to fully digest and review these patches, but
> > > > > > > one thing I did notice is that you seem to be converting from a list to
> > > > > > > an open-coded structure. I am just curious why you made this design
> > > > > > > decision instead of using the RCU variant of list?
> > > > > > >
> > > > > > It is not scary "open-coded structure" it's just an array :) As I responded
> > > > > > to Michael the idea is to move msis out of irq_routing, make the array
> > > > > > much smaller and either use gsi as an index in the array or use hash table
> > > > > > instead looping over all entries. For now I can justify array as more
> > > > > > cache friendly data structure as we scan it linearly.
> > > > >
> > > > > I think its more important to convert to faster search mechanism (the
> > > > > list walk shows up high in profiling), then convert to RCU?
> > > > Why in this order? I am working on faster search mechanism now (on top
> > > > of the series).
> > >
> > > Because as Michael mentioned we can use slots_lock (should be renamed
> > > to global_lock) instead of RCU on the write-side.
> > >
> > I don't get it. The point for RCU is to get rid of reader's lock. If
> > I'll have to take slot_lock on each EOI I achieved nothing.
>
> You already take slots_lock for read on every exit.
>
We should fix that, not add even more users. Shouldn't we?
> > > than RCU and you stop the spread of locks. Needs to be discussed...
> > >
> > I much prefer to have many well defined locks with well understood
> > scope, then a small number of globals locks that are split ad-hoc when
> > deadlock is discovered (lock->irq_lock).
>
> OK.
>
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 16:44 ` Gleb Natapov
@ 2009-07-13 16:45 ` Marcelo Tosatti
2009-07-13 16:54 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Marcelo Tosatti @ 2009-07-13 16:45 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 07:44:38PM +0300, Gleb Natapov wrote:
> > > I don't get it. The point for RCU is to get rid of reader's lock. If
> > > I'll have to take slot_lock on each EOI I achieved nothing.
> >
> > You already take slots_lock for read on every exit.
> >
> We should fix that, not add even more users. Shouldn't we?
Yes. Please send both patchsets as one when you resend then, its easier
to review.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 1/4] Move irq routing data structure to rcu locking
2009-07-13 16:45 ` Marcelo Tosatti
@ 2009-07-13 16:54 ` Gleb Natapov
0 siblings, 0 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-13 16:54 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gregory Haskins, avi, kvm
On Mon, Jul 13, 2009 at 01:45:07PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 13, 2009 at 07:44:38PM +0300, Gleb Natapov wrote:
> > > > I don't get it. The point for RCU is to get rid of reader's lock. If
> > > > I'll have to take slot_lock on each EOI I achieved nothing.
> > >
> > > You already take slots_lock for read on every exit.
> > >
> > We should fix that, not add even more users. Shouldn't we?
>
> Yes. Please send both patchsets as one when you resend then, its easier
> to review.
OK.
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 13:40 ` Michael S. Tsirkin
2009-07-13 13:44 ` Gregory Haskins
@ 2009-07-13 19:31 ` Paul E. McKenney
2009-07-14 5:46 ` Gleb Natapov
1 sibling, 1 reply; 41+ messages in thread
From: Paul E. McKenney @ 2009-07-13 19:31 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Gleb Natapov, Gregory Haskins, avi, kvm@vger.kernel.org
On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > Yeah I understand that other RCU read section may introduce delays too.
> > The question is how big the delay may be.
>
> I recall seeing the number "at least 3 jiffies" somewhere, but that
> might have changed since.
A grace period lasts a handful of jiffies, depending on kernel
configuration and how long readers remain in a given RCU read-side
critical section.
If a handful of jiffies is too long, there are patches that speed up
the grace period, down into the sub-hundred-microsecond range.
> > I don't think multiple
> > milliseconds delay in device de-assignment is a big issue though.
>
> Right. My point was that since the sync is done under kvm lock, the
> guest can easily get blocked trying to get kvm lock meanwhile.
I will ask the usual question -- can call_rcu() be used to move
the grace period out from under the lock? (Often this can be done,
but not always.)
Thanx, Paul
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-13 19:31 ` Paul E. McKenney
@ 2009-07-14 5:46 ` Gleb Natapov
2009-07-14 12:03 ` Paul E. McKenney
0 siblings, 1 reply; 41+ messages in thread
From: Gleb Natapov @ 2009-07-14 5:46 UTC (permalink / raw)
To: Paul E. McKenney
Cc: Michael S. Tsirkin, Gregory Haskins, avi, kvm@vger.kernel.org
On Mon, Jul 13, 2009 at 12:31:39PM -0700, Paul E. McKenney wrote:
> On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > > Yeah I understand that other RCU read section may introduce delays too.
> > > The question is how big the delay may be.
> >
> > I recall seeing the number "at least 3 jiffies" somewhere, but that
> > might have changed since.
>
> A grace period lasts a handful of jiffies, depending on kernel
> configuration and how long readers remain in a given RCU read-side
> critical section.
>
> If a handful of jiffies is too long, there are patches that speed up
> the grace period, down into the sub-hundred-microsecond range.
>
> > > I don't think multiple
> > > milliseconds delay in device de-assignment is a big issue though.
> >
> > Right. My point was that since the sync is done under kvm lock, the
> > guest can easily get blocked trying to get kvm lock meanwhile.
>
> I will ask the usual question -- can call_rcu() be used to move
> the grace period out from under the lock? (Often this can be done,
> but not always.)
>
It can't. Caller frees the data.
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-14 5:46 ` Gleb Natapov
@ 2009-07-14 12:03 ` Paul E. McKenney
2009-07-14 12:06 ` Gleb Natapov
0 siblings, 1 reply; 41+ messages in thread
From: Paul E. McKenney @ 2009-07-14 12:03 UTC (permalink / raw)
To: Gleb Natapov
Cc: Michael S. Tsirkin, Gregory Haskins, avi, kvm@vger.kernel.org
On Tue, Jul 14, 2009 at 08:46:12AM +0300, Gleb Natapov wrote:
> On Mon, Jul 13, 2009 at 12:31:39PM -0700, Paul E. McKenney wrote:
> > On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > > > Yeah I understand that other RCU read section may introduce delays too.
> > > > The question is how big the delay may be.
> > >
> > > I recall seeing the number "at least 3 jiffies" somewhere, but that
> > > might have changed since.
> >
> > A grace period lasts a handful of jiffies, depending on kernel
> > configuration and how long readers remain in a given RCU read-side
> > critical section.
> >
> > If a handful of jiffies is too long, there are patches that speed up
> > the grace period, down into the sub-hundred-microsecond range.
> >
> > > > I don't think multiple
> > > > milliseconds delay in device de-assignment is a big issue though.
> > >
> > > Right. My point was that since the sync is done under kvm lock, the
> > > guest can easily get blocked trying to get kvm lock meanwhile.
> >
> > I will ask the usual question -- can call_rcu() be used to move
> > the grace period out from under the lock? (Often this can be done,
> > but not always.)
> >
> It can't. Caller frees the data.
I will then ask the usual next question... Can the freeing of the
data be moved from the caller?
Thanx, Paul
^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [PATCH 4/4] Convert irq notifiers lists to RCU locking.
2009-07-14 12:03 ` Paul E. McKenney
@ 2009-07-14 12:06 ` Gleb Natapov
0 siblings, 0 replies; 41+ messages in thread
From: Gleb Natapov @ 2009-07-14 12:06 UTC (permalink / raw)
To: Paul E. McKenney
Cc: Michael S. Tsirkin, Gregory Haskins, avi, kvm@vger.kernel.org
On Tue, Jul 14, 2009 at 05:03:09AM -0700, Paul E. McKenney wrote:
> On Tue, Jul 14, 2009 at 08:46:12AM +0300, Gleb Natapov wrote:
> > On Mon, Jul 13, 2009 at 12:31:39PM -0700, Paul E. McKenney wrote:
> > > On Mon, Jul 13, 2009 at 04:40:59PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Jul 13, 2009 at 04:32:34PM +0300, Gleb Natapov wrote:
> > > > > Yeah I understand that other RCU read section may introduce delays too.
> > > > > The question is how big the delay may be.
> > > >
> > > > I recall seeing the number "at least 3 jiffies" somewhere, but that
> > > > might have changed since.
> > >
> > > A grace period lasts a handful of jiffies, depending on kernel
> > > configuration and how long readers remain in a given RCU read-side
> > > critical section.
> > >
> > > If a handful of jiffies is too long, there are patches that speed up
> > > the grace period, down into the sub-hundred-microsecond range.
> > >
> > > > > I don't think multiple
> > > > > milliseconds delay in device de-assignment is a big issue though.
> > > >
> > > > Right. My point was that since the sync is done under kvm lock, the
> > > > guest can easily get blocked trying to get kvm lock meanwhile.
> > >
> > > I will ask the usual question -- can call_rcu() be used to move
> > > the grace period out from under the lock? (Often this can be done,
> > > but not always.)
> > >
> > It can't. Caller frees the data.
>
> I will then ask the usual next question... Can the freeing of the
> data be moved from the caller?
>
Not really. But don't worry. In the case we discuss a small delay is not
a problem.
--
Gleb.
^ permalink raw reply [flat|nested] 41+ messages in thread
end of thread, other threads:[~2009-07-14 12:06 UTC | newest]
Thread overview: 41+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-12 12:03 [PATCH 0/4] moving irq routing and notifiers to RCU locking Gleb Natapov
2009-07-12 12:03 ` [PATCH 1/4] Move irq routing data structure to rcu locking Gleb Natapov
2009-07-13 12:55 ` Michael S. Tsirkin
2009-07-13 13:03 ` Gleb Natapov
2009-07-13 13:15 ` Michael S. Tsirkin
2009-07-13 13:23 ` Gleb Natapov
2009-07-13 13:36 ` Michael S. Tsirkin
2009-07-13 13:01 ` Gregory Haskins
2009-07-13 13:15 ` Gleb Natapov
2009-07-13 13:16 ` Gregory Haskins
2009-07-13 13:25 ` Gleb Natapov
2009-07-13 13:29 ` Gregory Haskins
2009-07-13 15:55 ` Marcelo Tosatti
2009-07-13 16:24 ` Gleb Natapov
2009-07-13 16:27 ` Marcelo Tosatti
2009-07-13 16:33 ` Gleb Natapov
2009-07-13 16:42 ` Marcelo Tosatti
2009-07-13 16:44 ` Gleb Natapov
2009-07-13 16:45 ` Marcelo Tosatti
2009-07-13 16:54 ` Gleb Natapov
2009-07-12 12:03 ` [PATCH 2/4] Unregister ack notifier callback on PIT freeing Gleb Natapov
2009-07-12 12:03 ` [PATCH 3/4] Move irq ack notifier list to arch independent code Gleb Natapov
2009-07-12 12:03 ` [PATCH 4/4] Convert irq notifiers lists to RCU locking Gleb Natapov
2009-07-13 12:56 ` Michael S. Tsirkin
2009-07-13 13:05 ` Gleb Natapov
2009-07-13 13:29 ` Michael S. Tsirkin
2009-07-13 13:48 ` Gregory Haskins
2009-07-13 13:02 ` Michael S. Tsirkin
2009-07-13 13:11 ` Gleb Natapov
2009-07-13 13:26 ` Gregory Haskins
2009-07-13 13:32 ` Gleb Natapov
2009-07-13 13:40 ` Gregory Haskins
2009-07-13 13:52 ` Gleb Natapov
2009-07-13 14:02 ` Gregory Haskins
2009-07-13 14:08 ` Gleb Natapov
2009-07-13 13:40 ` Michael S. Tsirkin
2009-07-13 13:44 ` Gregory Haskins
2009-07-13 19:31 ` Paul E. McKenney
2009-07-14 5:46 ` Gleb Natapov
2009-07-14 12:03 ` Paul E. McKenney
2009-07-14 12:06 ` Gleb Natapov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox