public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* PCI-passthrough: interrupt work structure per device
@ 2008-05-29 17:17 Ben-Ami Yassour
  2008-05-29 18:09 ` Mark McLoughlin
  0 siblings, 1 reply; 3+ messages in thread
From: Ben-Ami Yassour @ 2008-05-29 17:17 UTC (permalink / raw)
  To: Amit Shah; +Cc: Kay, Allen M, Muli Ben-Yehuda, kvm

Amit,

Attached is a patch for the pci-passthrough tree.
This patch changes the workq structure of the interrupt handling to be per 
device.
This also requires that all locks are locked with irqsave, since the interrupt
handler is also using the pt device list.
I tested it with an e1000 NIC and it seems to work ok.

Please let me know if you have comments.

Regards,
Ben

>From b847cef27c6c6dfff15b8fc9682e4c6563e997f3 Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <benami@il.ibm.com>
Date: Thu, 29 May 2008 19:39:14 +0300
Subject: [PATCH] KVM: PCIPT: interrupt work structure per device

Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
---
  arch/x86/kvm/x86.c         |   90 ++++++++++++++++++++++++++++----------------
  include/asm-x86/kvm_host.h |   23 +++++++----
  include/asm-x86/kvm_para.h |    1 +
  3 files changed, 72 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d1cc582..cf3a47c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -146,6 +146,9 @@ static void kvm_pci_pt_work_fn(struct work_struct *work)
  	struct kvm_pci_pt_dev_list *match;
  	struct kvm_pci_pt_work *int_work;
  	int source;
+	unsigned long flags;
+	int guest_irq;
+	int host_irq;

  	int_work = container_of(work, struct kvm_pci_pt_work, work);

@@ -156,24 +159,27 @@ static void kvm_pci_pt_work_fn(struct work_struct *work)
  	 * finer-grained lock, update this
  	 */
  	mutex_lock(&int_work->kvm->lock);
-	read_lock(&kvm_pci_pt_lock);
+	read_lock_irqsave(&kvm_pci_pt_lock, flags);
  	match = kvm_find_pci_pt_dev(&int_work->kvm->arch.pci_pt_dev_head, NULL,
  				    int_work->irq, source);
  	if (!match) {
  		printk(KERN_ERR "%s: no matching device assigned to guest "
  		       "found for irq %d, source = %d!\n",
  		       __func__, int_work->irq, int_work->source);
+		read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
  		goto out;
  	}
+	guest_irq = match->pt_dev.guest.irq;
+	host_irq = match->pt_dev.host.irq;
+	read_unlock_irqrestore(&kvm_pci_pt_lock, flags);

  	if (source == KVM_PT_SOURCE_IRQ)
-		kvm_set_irq(int_work->kvm, match->pt_dev.guest.irq, 1);
+		kvm_set_irq(int_work->kvm, guest_irq, 1);
  	else {
  		kvm_set_irq(int_work->kvm, int_work->irq, 0);
-		enable_irq(match->pt_dev.host.irq);
+		enable_irq(host_irq);
  	}
  out:
-	read_unlock(&kvm_pci_pt_lock);
  	mutex_unlock(&int_work->kvm->lock);
  	kvm_put_kvm(int_work->kvm);
  }
@@ -184,16 +190,27 @@ out:
  static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
  {
  	struct kvm *kvm = (struct kvm *) dev_id;
+	struct kvm_pci_pt_dev_list *pci_pt_dev;

  	if (!test_bit(irq, pt_irq_handled))
  		return IRQ_NONE;

-	kvm->arch.pci_pt_int_work.irq = irq;
-	kvm->arch.pci_pt_int_work.kvm = kvm;
-	kvm->arch.pci_pt_int_work.source = 0;
+	read_lock(&kvm_pci_pt_lock);
+	pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL,
+					 irq, KVM_PT_SOURCE_IRQ);
+	if (!pci_pt_dev) {
+		read_unlock(&kvm_pci_pt_lock);
+		return IRQ_NONE;
+	}
+
+	pci_pt_dev->pt_dev.int_work.irq = irq;
+	pci_pt_dev->pt_dev.int_work.kvm = kvm;
+	pci_pt_dev->pt_dev.int_work.source = 0;

  	kvm_get_kvm(kvm);
-	schedule_work(&kvm->arch.pci_pt_int_work.work);
+	schedule_work(&pci_pt_dev->pt_dev.int_work.work);
+	read_unlock(&kvm_pci_pt_lock);
+

  	disable_irq_nosync(irq);
  	return IRQ_HANDLED;
@@ -203,25 +220,29 @@ static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
  void kvm_pci_pt_ack_irq(struct kvm *kvm, int vector)
  {
  	int irq;
+	struct kvm_pci_pt_dev_list *pci_pt_dev;
+	unsigned long flags;

  	irq = kvm_get_eoi_gsi(kvm->arch.vioapic, vector);
  	if (irq == -1)
  		return;

-	read_lock(&kvm_pci_pt_lock);
-	if (!kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL, irq,
-				 KVM_PT_SOURCE_IRQ_ACK)) {
-		read_unlock(&kvm_pci_pt_lock);
+	read_lock_irqsave(&kvm_pci_pt_lock, flags);
+	pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL, irq,
+				       KVM_PT_SOURCE_IRQ_ACK);
+
+	if (!pci_pt_dev) {
+		read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
  		return;
  	}
-	read_unlock(&kvm_pci_pt_lock);

-	kvm->arch.pci_pt_int_work.irq = irq;
-	kvm->arch.pci_pt_int_work.kvm = kvm;
-	kvm->arch.pci_pt_int_work.source = 1;
+	pci_pt_dev->pt_dev.int_work.irq = irq;
+	pci_pt_dev->pt_dev.int_work.kvm = kvm;
+	pci_pt_dev->pt_dev.int_work.source = 1;

  	kvm_get_kvm(kvm);
-	schedule_work(&kvm->arch.pci_pt_int_work.work);
+	schedule_work(&pci_pt_dev->pt_dev.int_work.work);
+	read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
  }

  static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
@@ -229,8 +250,9 @@ static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
  {
  	int r = 0;
  	struct kvm_pci_pt_dev_list *match;
+	unsigned long flags;

-	write_lock(&kvm_pci_pt_lock);
+	write_lock_irqsave(&kvm_pci_pt_lock, flags);

  	/* Check if this is a request to update the irq of the device
  	 * in the guest (kernels can dynamically reprogram irq numbers).
@@ -249,10 +271,10 @@ static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
  			match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
  		else
  			r = -EINVAL;
-		write_unlock(&kvm_pci_pt_lock);
+		write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
  		goto out;
  	}
-	write_unlock(&kvm_pci_pt_lock);
+	write_unlock_irqrestore(&kvm_pci_pt_lock, flags);

  	match = kzalloc(sizeof(struct kvm_pci_pt_dev_list), GFP_KERNEL);
  	if (match == NULL) {
@@ -283,16 +305,16 @@ static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
  			goto out_free;
  		}
  	}
-	write_lock(&kvm_pci_pt_lock);
+	write_lock_irqsave(&kvm_pci_pt_lock, flags);
+
+	INIT_WORK(&match->pt_dev.int_work.work, kvm_pci_pt_work_fn);
+
  	list_add(&match->list, &kvm->arch.pci_pt_dev_head);

  	if (irqchip_in_kernel(kvm))
  		set_bit(pci_pt_dev->host.irq, pt_irq_handled);
-	write_unlock(&kvm_pci_pt_lock);
+	write_unlock_irqrestore(&kvm_pci_pt_lock, flags);

-	printk(KERN_INFO "kvm: Handling hypercalls for device %02x:%02x.%1x\n",
-	       pci_pt_dev->host.busnr, PCI_SLOT(pci_pt_dev->host.devfn),
-	       PCI_FUNC(pci_pt_dev->host.devfn));
  out:
  	return r;
  out_free:
@@ -304,15 +326,18 @@ static void kvm_free_pci_passthrough(struct kvm *kvm)
  {
  	struct list_head *ptr, *ptr2;
  	struct kvm_pci_pt_dev_list *pci_pt_dev;
+	unsigned long flags;

-	if (cancel_work_sync(&kvm->arch.pci_pt_int_work.work)) {
-		/* We had pending work. That means we will have to take
-		 * care of kvm_put_kvm.
-		 */
-		kvm_put_kvm(kvm);
+	list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
+		pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+		if (cancel_work_sync(&pci_pt_dev->pt_dev.int_work.work))
+			/* We had pending work. That means we will have to take
+			 * care of kvm_put_kvm.
+			 */
+			kvm_put_kvm(kvm);
  	}

-	write_lock(&kvm_pci_pt_lock);
+	write_lock_irqsave(&kvm_pci_pt_lock, flags);
  	list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
  		pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);

@@ -321,7 +346,7 @@ static void kvm_free_pci_passthrough(struct kvm *kvm)

  		list_del(&pci_pt_dev->list);
  	}
-	write_unlock(&kvm_pci_pt_lock);
+	write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
  }

  unsigned long segment_base(u16 selector)
@@ -4107,7 +4132,6 @@ struct  kvm *kvm_arch_create_vm(void)

  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
  	INIT_LIST_HEAD(&kvm->arch.pci_pt_dev_head);
-	INIT_WORK(&kvm->arch.pci_pt_int_work.work, kvm_pci_pt_work_fn);

  	return kvm;
  }
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 5d3ebbf..6b374dc 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -302,14 +302,6 @@ struct kvm_mem_alias {
  #define KVM_PT_SOURCE_IRQ_ACK	2
  #define KVM_PT_SOURCE_ASSIGN	3

-/* This list is to store the guest bus:device:function-irq and host
- * bus:device:function-irq mapping for assigned devices.
- */
-struct kvm_pci_pt_dev_list {
-	struct list_head list;
-	struct kvm_pci_passthrough_dev pt_dev;
-};
-
  /* For assigned devices, we schedule work in the system workqueue to
   * inject interrupts into the guest when an interrupt occurs on the
   * physical device and also when the guest acks the interrupt.
@@ -321,6 +313,20 @@ struct kvm_pci_pt_work {
  	bool source;
  };

+struct kvm_pci_passthrough_dev_kernel {
+	struct kvm_pci_pt_info guest;
+	struct kvm_pci_pt_info host;
+	struct kvm_pci_pt_work int_work;
+};
+
+/* This list is to store the guest bus:device:function-irq and host
+ * bus:device:function-irq mapping for assigned devices.
+ */
+struct kvm_pci_pt_dev_list {
+	struct list_head list;
+	struct kvm_pci_passthrough_dev_kernel pt_dev;
+};
+
  struct kvm_arch{
  	int naliases;
  	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -334,7 +340,6 @@ struct kvm_arch{
  	 */
  	struct list_head active_mmu_pages;
  	struct list_head pci_pt_dev_head;
-	struct kvm_pci_pt_work pci_pt_int_work;
  	struct kvm_pic *vpic;
  	struct kvm_ioapic *vioapic;
  	struct kvm_pit *vpit;
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 5f93b78..5813ed0 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -171,4 +171,5 @@ struct kvm_pci_passthrough_dev {
  	struct kvm_pci_pt_info guest;
  	struct kvm_pci_pt_info host;
  };
+
  #endif
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: PCI-passthrough: interrupt work structure per device
  2008-05-29 17:17 PCI-passthrough: interrupt work structure per device Ben-Ami Yassour
@ 2008-05-29 18:09 ` Mark McLoughlin
  2008-05-30  2:15   ` Amit Shah
  0 siblings, 1 reply; 3+ messages in thread
From: Mark McLoughlin @ 2008-05-29 18:09 UTC (permalink / raw)
  To: Ben-Ami Yassour; +Cc: Amit Shah, Kay, Allen M, Muli Ben-Yehuda, kvm

On Thu, 2008-05-29 at 20:17 +0300, Ben-Ami Yassour wrote:

> Attached is a patch for the pci-passthrough tree.
> This patch changes the workq structure of the interrupt handling to be per 
> device.

Heh, I just happened to notice this myself yesterday.

...
> >From b847cef27c6c6dfff15b8fc9682e4c6563e997f3 Mon Sep 17 00:00:00 2001
> From: Ben-Ami Yassour <benami@il.ibm.com>
> Date: Thu, 29 May 2008 19:39:14 +0300
> Subject: [PATCH] KVM: PCIPT: interrupt work structure per device
> 
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> ---
>   arch/x86/kvm/x86.c         |   90 ++++++++++++++++++++++++++++----------------
>   include/asm-x86/kvm_host.h |   23 +++++++----
>   include/asm-x86/kvm_para.h |    1 +
>   3 files changed, 72 insertions(+), 42 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index d1cc582..cf3a47c 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
...
> @@ -184,16 +190,27 @@ out:
>   static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
>   {
>   	struct kvm *kvm = (struct kvm *) dev_id;
> +	struct kvm_pci_pt_dev_list *pci_pt_dev;
> 
>   	if (!test_bit(irq, pt_irq_handled))
>   		return IRQ_NONE;
> 
> -	kvm->arch.pci_pt_int_work.irq = irq;
> -	kvm->arch.pci_pt_int_work.kvm = kvm;
> -	kvm->arch.pci_pt_int_work.source = 0;
> +	read_lock(&kvm_pci_pt_lock);
> +	pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL,
> +					 irq, KVM_PT_SOURCE_IRQ);
> +	if (!pci_pt_dev) {
> +		read_unlock(&kvm_pci_pt_lock);
> +		return IRQ_NONE;
> +	}
> +
> +	pci_pt_dev->pt_dev.int_work.irq = irq;
> +	pci_pt_dev->pt_dev.int_work.kvm = kvm;
> +	pci_pt_dev->pt_dev.int_work.source = 0;
> 
>   	kvm_get_kvm(kvm);

Won't we leak this reference if we get another interrupt before the
workqueue is scheduled?

> -	printk(KERN_INFO "kvm: Handling hypercalls for device %02x:%02x.%1x\n",
> -	       pci_pt_dev->host.busnr, PCI_SLOT(pci_pt_dev->host.devfn),
> -	       PCI_FUNC(pci_pt_dev->host.devfn));

Spurious change.

...

> diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
> index 5f93b78..5813ed0 100644
> --- a/include/asm-x86/kvm_para.h
> +++ b/include/asm-x86/kvm_para.h
> @@ -171,4 +171,5 @@ struct kvm_pci_passthrough_dev {
>   	struct kvm_pci_pt_info guest;
>   	struct kvm_pci_pt_info host;
>   };
> +
>   #endif

Ditto.

Cheers,
Mark.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PCI-passthrough: interrupt work structure per device
  2008-05-29 18:09 ` Mark McLoughlin
@ 2008-05-30  2:15   ` Amit Shah
  0 siblings, 0 replies; 3+ messages in thread
From: Amit Shah @ 2008-05-30  2:15 UTC (permalink / raw)
  To: Mark McLoughlin; +Cc: Ben-Ami Yassour, Kay, Allen M, Muli Ben-Yehuda, kvm

On Thursday 29 May 2008 23:39:37 Mark McLoughlin wrote:
> > @@ -184,16 +190,27 @@ out:
> >   static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
> >   {
> >   	struct kvm *kvm = (struct kvm *) dev_id;
> > +	struct kvm_pci_pt_dev_list *pci_pt_dev;
> >
> >   	if (!test_bit(irq, pt_irq_handled))
> >   		return IRQ_NONE;
> >
> > -	kvm->arch.pci_pt_int_work.irq = irq;
> > -	kvm->arch.pci_pt_int_work.kvm = kvm;
> > -	kvm->arch.pci_pt_int_work.source = 0;
> > +	read_lock(&kvm_pci_pt_lock);
> > +	pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL,
> > +					 irq, KVM_PT_SOURCE_IRQ);
> > +	if (!pci_pt_dev) {
> > +		read_unlock(&kvm_pci_pt_lock);
> > +		return IRQ_NONE;
> > +	}
> > +
> > +	pci_pt_dev->pt_dev.int_work.irq = irq;
> > +	pci_pt_dev->pt_dev.int_work.kvm = kvm;
> > +	pci_pt_dev->pt_dev.int_work.source = 0;
> >
> >   	kvm_get_kvm(kvm);
>
> Won't we leak this reference if we get another interrupt before the
> workqueue is scheduled?

Currently, we disable the interrupt line till the ack is done. We don't 
support shared interrupts yet, so this will work till we have the shared 
interrupt support ready.

Amit.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-05-30  2:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-29 17:17 PCI-passthrough: interrupt work structure per device Ben-Ami Yassour
2008-05-29 18:09 ` Mark McLoughlin
2008-05-30  2:15   ` Amit Shah

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox