kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4 v9] MSI-X MMIO support for KVM
@ 2011-02-18  8:53 Sheng Yang
  2011-02-18  8:53 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-18  8:53 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Michael S. Tsirkin, Sheng Yang

Sorry for the long delay, just come back from vacation...

Change from v8:
1. Update struct kvm_run to contain MSI-X routing update exit specific
information.
2. Fix a mmio_nr counting bug.

Notice this patchset still based on 2.6.37 due to a block bug on assigned
device in the upstream now.

Sheng Yang (4):
  KVM: Move struct kvm_io_device to kvm_host.h
  KVM: Add kvm_io_ext_data to IO handler
  KVM: Emulate MSI-X table in kernel
  KVM: Add documents for MSI-X MMIO API

 Documentation/kvm/api.txt |   58 +++++++++
 arch/x86/kvm/Makefile     |    2 +-
 arch/x86/kvm/i8254.c      |    6 +-
 arch/x86/kvm/i8259.c      |    3 +-
 arch/x86/kvm/lapic.c      |    3 +-
 arch/x86/kvm/x86.c        |   40 +++++--
 include/linux/kvm.h       |   28 +++++
 include/linux/kvm_host.h  |   65 ++++++++++-
 virt/kvm/assigned-dev.c   |   44 +++++++
 virt/kvm/coalesced_mmio.c |    3 +-
 virt/kvm/eventfd.c        |    2 +-
 virt/kvm/ioapic.c         |    2 +-
 virt/kvm/iodev.h          |   31 +----
 virt/kvm/kvm_main.c       |   40 ++++++-
 virt/kvm/msix_mmio.c      |  293 +++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/msix_mmio.h      |   25 ++++
 16 files changed, 594 insertions(+), 51 deletions(-)
 create mode 100644 virt/kvm/msix_mmio.c
 create mode 100644 virt/kvm/msix_mmio.h


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h
  2011-02-18  8:53 [PATCH 0/4 v9] MSI-X MMIO support for KVM Sheng Yang
@ 2011-02-18  8:53 ` Sheng Yang
  2011-02-18  8:53 ` [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler Sheng Yang
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-18  8:53 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Michael S. Tsirkin, Sheng Yang

Then it can be used by other struct in kvm_host.h

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |   23 +++++++++++++++++++++++
 virt/kvm/iodev.h         |   25 +------------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db..7d313e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -98,6 +98,29 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+struct kvm_io_device;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+	int (*read)(struct kvm_io_device *this,
+		    gpa_t addr,
+		    int len,
+		    void *val);
+	int (*write)(struct kvm_io_device *this,
+		     gpa_t addr,
+		     int len,
+		     const void *val);
+	void (*destructor)(struct kvm_io_device *this);
+};
+
+struct kvm_io_device {
+	const struct kvm_io_device_ops *ops;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 12fd3ca..d1f5651 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,32 +17,9 @@
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
 #include <asm/errno.h>
 
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
-	int (*read)(struct kvm_io_device *this,
-		    gpa_t addr,
-		    int len,
-		    void *val);
-	int (*write)(struct kvm_io_device *this,
-		     gpa_t addr,
-		     int len,
-		     const void *val);
-	void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
-	const struct kvm_io_device_ops *ops;
-};
-
 static inline void kvm_iodevice_init(struct kvm_io_device *dev,
 				     const struct kvm_io_device_ops *ops)
 {
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler
  2011-02-18  8:53 [PATCH 0/4 v9] MSI-X MMIO support for KVM Sheng Yang
  2011-02-18  8:53 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
@ 2011-02-18  8:53 ` Sheng Yang
  2011-02-18  8:53 ` [PATCH 3/4] KVM: Emulate MSI-X table in kernel Sheng Yang
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-18  8:53 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Michael S. Tsirkin, Sheng Yang

Add a new parameter to IO writing handler, so that we can transfer information
from IO handler to caller.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 arch/x86/kvm/i8254.c      |    6 ++++--
 arch/x86/kvm/i8259.c      |    3 ++-
 arch/x86/kvm/lapic.c      |    3 ++-
 arch/x86/kvm/x86.c        |   13 ++++++++-----
 include/linux/kvm_host.h  |   12 ++++++++++--
 virt/kvm/coalesced_mmio.c |    3 ++-
 virt/kvm/eventfd.c        |    2 +-
 virt/kvm/ioapic.c         |    2 +-
 virt/kvm/iodev.h          |    6 ++++--
 virt/kvm/kvm_main.c       |    4 ++--
 10 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad723..bd8f0c5 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -439,7 +439,8 @@ static inline int pit_in_range(gpa_t addr)
 }
 
 static int pit_ioport_write(struct kvm_io_device *this,
-			    gpa_t addr, int len, const void *data)
+			    gpa_t addr, int len, const void *data,
+			    struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
@@ -585,7 +586,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
 }
 
 static int speaker_ioport_write(struct kvm_io_device *this,
-				gpa_t addr, int len, const void *data)
+				gpa_t addr, int len, const void *data,
+				struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 3cece05..96b1070 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -480,7 +480,8 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
 }
 
 static int picdev_write(struct kvm_io_device *this,
-			 gpa_t addr, int len, const void *val)
+			 gpa_t addr, int len, const void *val,
+			 struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_pic *s = to_pic(this);
 	unsigned char data = *(unsigned char *)val;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 93cf9d0..f413e9c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -836,7 +836,8 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 }
 
 static int apic_mmio_write(struct kvm_io_device *this,
-			    gpa_t address, int len, const void *data)
+			    gpa_t address, int len, const void *data,
+			    struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
 	unsigned int offset = address - apic->base_address;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fa708c9..21b84e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3571,13 +3571,14 @@ static void kvm_init_msr_list(void)
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
-			   const void *v)
+			   const void *v, struct kvm_io_ext_data *ext_data)
 {
 	if (vcpu->arch.apic &&
-	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
+	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v, ext_data))
 		return 0;
 
-	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS,
+				addr, len, v, ext_data);
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -3807,6 +3808,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 					   struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	struct kvm_io_ext_data ext_data;
 
 	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
 
@@ -3825,7 +3827,7 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+	if (!vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data))
 		return X86EMUL_CONTINUE;
 
 	vcpu->mmio_needed = 1;
@@ -3940,6 +3942,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
 	/* TODO: String I/O for in kernel device */
 	int r;
+	struct kvm_io_ext_data ext_data;
 
 	if (vcpu->arch.pio.in)
 		r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
@@ -3947,7 +3950,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 	else
 		r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
 				     vcpu->arch.pio.port, vcpu->arch.pio.size,
-				     pd);
+				     pd, &ext_data);
 	return r;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d313e0..6bb211d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,8 +68,15 @@ enum kvm_bus {
 	KVM_NR_BUSES
 };
 
+struct kvm_io_ext_data {
+	int type;
+	union {
+		char padding[256];
+	};
+};
+
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-		     int len, const void *val);
+		     int len, const void *val, struct kvm_io_ext_data *data);
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -113,7 +120,8 @@ struct kvm_io_device_ops {
 	int (*write)(struct kvm_io_device *this,
 		     gpa_t addr,
 		     int len,
-		     const void *val);
+		     const void *val,
+		     struct kvm_io_ext_data *data);
 	void (*destructor)(struct kvm_io_device *this);
 };
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index fc84875..37b254c 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -59,7 +59,8 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
 }
 
 static int coalesced_mmio_write(struct kvm_io_device *this,
-				gpa_t addr, int len, const void *val)
+				gpa_t addr, int len, const void *val,
+				struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2ca4535..8edd757 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -483,7 +483,7 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
 ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
-		const void *val)
+		const void *val, struct kvm_io_ext_data *ext_data)
 {
 	struct _ioeventfd *p = to_ioeventfd(this);
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 0b9df83..6a027ef 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -321,7 +321,7 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 }
 
 static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-			     const void *val)
+			     const void *val, struct kvm_io_ext_data *ext_data)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 data;
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index d1f5651..340ab79 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -33,9 +33,11 @@ static inline int kvm_iodevice_read(struct kvm_io_device *dev,
 }
 
 static inline int kvm_iodevice_write(struct kvm_io_device *dev,
-				     gpa_t addr, int l, const void *v)
+				     gpa_t addr, int l, const void *v,
+				     struct kvm_io_ext_data *data)
 {
-	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
+	return dev->ops->write ?
+		dev->ops->write(dev, addr, l, v, data) : -EOPNOTSUPP;
 }
 
 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b1b6cbb..a61f90e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2221,14 +2221,14 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-		     int len, const void *val)
+		     int len, const void *val, struct kvm_io_ext_data *ext_data)
 {
 	int i;
 	struct kvm_io_bus *bus;
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
 	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+		if (!kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data))
 			return 0;
 	return -EOPNOTSUPP;
 }
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/4] KVM: Emulate MSI-X table in kernel
  2011-02-18  8:53 [PATCH 0/4 v9] MSI-X MMIO support for KVM Sheng Yang
  2011-02-18  8:53 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
  2011-02-18  8:53 ` [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler Sheng Yang
@ 2011-02-18  8:53 ` Sheng Yang
  2011-02-18  8:53 ` [PATCH 4/4] KVM: Add documents for MSI-X MMIO API Sheng Yang
  2011-02-22 18:08 ` [PATCH 0/4 v9] MSI-X MMIO support for KVM Marcelo Tosatti
  4 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-18  8:53 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Michael S. Tsirkin, Sheng Yang

Then we can support mask bit operation of assigned devices now.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 arch/x86/kvm/Makefile    |    2 +-
 arch/x86/kvm/x86.c       |   29 ++++-
 include/linux/kvm.h      |   28 +++++
 include/linux/kvm_host.h |   32 +++++
 virt/kvm/assigned-dev.c  |   44 +++++++
 virt/kvm/kvm_main.c      |   38 ++++++-
 virt/kvm/msix_mmio.c     |  293 ++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/msix_mmio.h     |   25 ++++
 8 files changed, 479 insertions(+), 12 deletions(-)
 create mode 100644 virt/kvm/msix_mmio.c
 create mode 100644 virt/kvm/msix_mmio.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index f15501f..3a0d851 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 				coalesced_mmio.o irq_comm.o eventfd.o \
-				assigned-dev.o)
+				assigned-dev.o msix_mmio.o)
 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21b84e2..43a5e70 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_MSIX_MMIO:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -3809,6 +3810,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 {
 	gpa_t                 gpa;
 	struct kvm_io_ext_data ext_data;
+	int r;
 
 	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
 
@@ -3824,18 +3826,31 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 
 mmio:
 	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+	r = vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data);
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data))
+	if (!r)
 		return X86EMUL_CONTINUE;
 
-	vcpu->mmio_needed = 1;
-	vcpu->run->exit_reason = KVM_EXIT_MMIO;
-	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
-	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, val, bytes);
+	if (r == -ENOTSYNC) {
+		vcpu->run->exit_reason = KVM_EXIT_MSIX_ROUTING_UPDATE;
+		vcpu->run->msix_routing.dev_id =
+			ext_data.msix_routing.dev_id;
+		vcpu->run->msix_routing.type =
+			ext_data.msix_routing.type;
+		vcpu->run->msix_routing.entry_idx =
+			ext_data.msix_routing.entry_idx;
+		vcpu->run->msix_routing.flags =
+			ext_data.msix_routing.flags;
+	} else  {
+		vcpu->mmio_needed = 1;
+		vcpu->run->exit_reason = KVM_EXIT_MMIO;
+		vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
+		vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+		vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
+		memcpy(vcpu->run->mmio.data, val, bytes);
+	}
 
 	return X86EMUL_CONTINUE;
 }
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ea2dc1a..4393e4e 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -161,6 +161,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
+#define KVM_EXIT_MSIX_ROUTING_UPDATE 19
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -264,6 +265,13 @@ struct kvm_run {
 		struct {
 			__u64 gprs[32];
 		} osi;
+		/* KVM_EXIT_MSIX_ROUTING_UPDATE*/
+		struct {
+			__u32 dev_id;
+			__u16 type;
+			__u16 entry_idx;
+			__u64 flags;
+		} msix_routing;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -541,6 +549,7 @@ struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_GET_PVINFO 57
 #define KVM_CAP_PPC_IRQ_LEVEL 58
 #define KVM_CAP_ASYNC_PF 59
+#define KVM_CAP_MSIX_MMIO 60
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -672,6 +681,9 @@ struct kvm_clock_data {
 #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
 #define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
 #define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_MSIX_MMIO */
+#define KVM_REGISTER_MSIX_MMIO    _IOW(KVMIO,  0x7d, struct kvm_msix_mmio_user)
+#define KVM_UNREGISTER_MSIX_MMIO  _IOW(KVMIO,  0x7e, struct kvm_msix_mmio_user)
 /* Available with KVM_CAP_PIT_STATE2 */
 #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
 #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
@@ -795,4 +807,20 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV	    (1 << 0)
+
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE	    (1 << 8)
+
+#define KVM_MSIX_MMIO_TYPE_DEV_MASK	    0x00ff
+#define KVM_MSIX_MMIO_TYPE_BASE_MASK	    0xff00
+struct kvm_msix_mmio_user {
+	__u32 dev_id;
+	__u16 type;
+	__u16 max_entries_nr;
+	__u64 base_addr;
+	__u64 base_va;
+	__u64 flags;
+	__u64 reserved[4];
+};
+
 #endif /* __LINUX_KVM_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6bb211d..d6d8654 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -68,9 +68,16 @@ enum kvm_bus {
 	KVM_NR_BUSES
 };
 
+#define KVM_IO_EXT_DATA_TYPE_MSIX_ROUTING   1
 struct kvm_io_ext_data {
 	int type;
 	union {
+		struct {
+			u32 dev_id;
+			u16 type;
+			u16 entry_idx;
+			u64 flags;
+		} msix_routing;
 		char padding[256];
 	};
 };
@@ -241,6 +248,27 @@ struct kvm_memslots {
 					KVM_PRIVATE_MEM_SLOTS];
 };
 
+#define KVM_MSIX_MMIO_MAX    32
+
+struct kvm_msix_mmio {
+	u32 dev_id;
+	u16 type;
+	u16 max_entries_nr;
+	u64 flags;
+	gpa_t table_base_addr;
+	hva_t table_base_va;
+	gpa_t pba_base_addr;
+	hva_t pba_base_va;
+};
+
+struct kvm_msix_mmio_dev {
+	struct kvm *kvm;
+	struct kvm_io_device table_dev;
+	int mmio_nr;
+	struct kvm_msix_mmio mmio[KVM_MSIX_MMIO_MAX];
+	struct mutex lock;
+};
+
 struct kvm {
 	spinlock_t mmu_lock;
 	raw_spinlock_t requests_lock;
@@ -289,6 +317,7 @@ struct kvm {
 	long mmu_notifier_count;
 #endif
 	long tlbs_dirty;
+	struct kvm_msix_mmio_dev msix_mmio_dev;
 };
 
 /* The guest did something we don't support. */
@@ -561,6 +590,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
+int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
+			int assigned_dev_id, int entry, bool mask);
+
 /* For vcpu->arch.iommu_flags */
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index ae72ae6..d1598a6 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include "irq.h"
+#include "msix_mmio.h"
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -191,12 +192,25 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
 	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
 }
 
+static void assigned_device_free_msix_mmio(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *adev)
+{
+	struct kvm_msix_mmio mmio;
+
+	mmio.dev_id = adev->assigned_dev_id;
+	mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+		    KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+	kvm_free_msix_mmio(kvm, &mmio);
+}
+
 static void kvm_free_assigned_device(struct kvm *kvm,
 				     struct kvm_assigned_dev_kernel
 				     *assigned_dev)
 {
 	kvm_free_assigned_irq(kvm, assigned_dev);
 
+	assigned_device_free_msix_mmio(kvm, assigned_dev);
+
 	__pci_reset_function(assigned_dev->dev);
 	pci_restore_state(assigned_dev->dev);
 
@@ -785,3 +799,33 @@ out:
 	return r;
 }
 
+/* The caller should hold kvm->lock */
+int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
+				int assigned_dev_id, int entry, bool mask)
+{
+	int r = -EFAULT;
+	struct kvm_assigned_dev_kernel *adev;
+	int i;
+
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev_id);
+	if (!adev)
+		goto out;
+
+	/* For non-MSIX enabled devices, entries_nr == 0 */
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->host_msix_entries[i].entry == entry) {
+			if (mask)
+				disable_irq_nosync(
+					adev->host_msix_entries[i].vector);
+			else
+				enable_irq(adev->host_msix_entries[i].vector);
+			r = 0;
+			break;
+		}
+out:
+	return r;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a61f90e..f211e49 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -56,6 +56,7 @@
 
 #include "coalesced_mmio.h"
 #include "async_pf.h"
+#include "msix_mmio.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
@@ -509,6 +510,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
+	kvm_unregister_msix_mmio_dev(kvm);
 	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
@@ -1877,6 +1879,24 @@ static long kvm_vm_ioctl(struct file *filp,
 		mutex_unlock(&kvm->lock);
 		break;
 #endif
+	case KVM_REGISTER_MSIX_MMIO: {
+		struct kvm_msix_mmio_user mmio_user;
+
+		r = -EFAULT;
+		if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
+			goto out;
+		r = kvm_vm_ioctl_register_msix_mmio(kvm, &mmio_user);
+		break;
+	}
+	case KVM_UNREGISTER_MSIX_MMIO: {
+		struct kvm_msix_mmio_user mmio_user;
+
+		r = -EFAULT;
+		if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
+			goto out;
+		r = kvm_vm_ioctl_unregister_msix_mmio(kvm, &mmio_user);
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -1988,6 +2008,12 @@ static int kvm_dev_ioctl_create_vm(void)
 		return r;
 	}
 #endif
+	r = kvm_register_msix_mmio_dev(kvm);
+	if (r < 0) {
+		kvm_put_kvm(kvm);
+		return r;
+	}
+
 	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
 	if (r < 0)
 		kvm_put_kvm(kvm);
@@ -2223,14 +2249,18 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val, struct kvm_io_ext_data *ext_data)
 {
-	int i;
+	int i, r = -EOPNOTSUPP;
 	struct kvm_io_bus *bus;
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data))
+	for (i = 0; i < bus->dev_count; i++) {
+		r = kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data);
+		if (r == -ENOTSYNC)
+			break;
+		else if (!r)
 			return 0;
-	return -EOPNOTSUPP;
+	}
+	return r;
 }
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
diff --git a/virt/kvm/msix_mmio.c b/virt/kvm/msix_mmio.c
new file mode 100644
index 0000000..9ec4252
--- /dev/null
+++ b/virt/kvm/msix_mmio.c
@@ -0,0 +1,293 @@
+/*
+ * MSI-X MMIO emulation
+ *
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Author:
+ *   Sheng Yang <sheng.yang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include "msix_mmio.h"
+#include "iodev.h"
+
+static int update_msix_mask_bit(struct kvm *kvm, struct kvm_msix_mmio *mmio,
+				int entry, u32 flag)
+{
+	if (mmio->type & KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
+		return kvm_assigned_device_update_msix_mask_bit(kvm,
+				mmio->dev_id, entry, flag);
+	return -EFAULT;
+}
+
+/* Caller must hold dev->lock */
+static int get_mmio_table_index(struct kvm_msix_mmio_dev *dev,
+				gpa_t addr, int len)
+{
+	gpa_t start, end;
+	int i, r = -EINVAL;
+
+	for (i = 0; i < dev->mmio_nr; i++) {
+		start = dev->mmio[i].table_base_addr;
+		end = dev->mmio[i].table_base_addr + PCI_MSIX_ENTRY_SIZE *
+			dev->mmio[i].max_entries_nr;
+		if (addr >= start && addr + len <= end) {
+			r = i;
+			break;
+		}
+	}
+
+	return r;
+}
+
+static int msix_table_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+				void *val)
+{
+	struct kvm_msix_mmio_dev *mmio_dev =
+		container_of(this, struct kvm_msix_mmio_dev, table_dev);
+	struct kvm_msix_mmio *mmio;
+	int idx, ret = 0, entry, offset, r;
+
+	mutex_lock(&mmio_dev->lock);
+	idx = get_mmio_table_index(mmio_dev, addr, len);
+	if (idx < 0) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+	if ((addr & 0x3) || (len != 4 && len != 8))
+		goto out;
+
+	offset = addr & 0xf;
+	if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL && len == 8)
+		goto out;
+
+	mmio = &mmio_dev->mmio[idx];
+	entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
+	r = copy_from_user(val, (void __user *)(mmio->table_base_va +
+			entry * PCI_MSIX_ENTRY_SIZE + offset), len);
+	if (r)
+		goto out;
+out:
+	mutex_unlock(&mmio_dev->lock);
+	return ret;
+}
+
+static int msix_table_mmio_write(struct kvm_io_device *this, gpa_t addr,
+				int len, const void *val,
+				struct kvm_io_ext_data *ext_data)
+{
+	struct kvm_msix_mmio_dev *mmio_dev =
+		container_of(this, struct kvm_msix_mmio_dev, table_dev);
+	struct kvm_msix_mmio *mmio;
+	int idx, entry, offset, ret = 0, r = 0;
+	gpa_t entry_base;
+	u32 old_ctrl, new_ctrl;
+	u32 *ctrl_pos;
+
+	mutex_lock(&mmio_dev->kvm->lock);
+	mutex_lock(&mmio_dev->lock);
+	idx = get_mmio_table_index(mmio_dev, addr, len);
+	if (idx < 0) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+	if ((addr & 0x3) || (len != 4 && len != 8))
+		goto out;
+
+	offset = addr & 0xF;
+	if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL && len == 8)
+		goto out;
+
+	mmio = &mmio_dev->mmio[idx];
+	entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
+	entry_base = mmio->table_base_va + entry * PCI_MSIX_ENTRY_SIZE;
+	ctrl_pos = (u32 *)(entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+	if (get_user(old_ctrl, ctrl_pos))
+		goto out;
+
+	/* No allow writing to other fields when entry is unmasked */
+	if (!(old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
+	    offset != PCI_MSIX_ENTRY_VECTOR_CTRL)
+		goto out;
+
+	if (copy_to_user((void __user *)(entry_base + offset), val, len))
+		goto out;
+
+	if (get_user(new_ctrl, ctrl_pos))
+		goto out;
+
+	ext_data->type = KVM_IO_EXT_DATA_TYPE_MSIX_ROUTING;
+	ext_data->msix_routing.dev_id = mmio->dev_id;
+	ext_data->msix_routing.type = mmio->type;
+	ext_data->msix_routing.entry_idx = entry;
+	ext_data->msix_routing.flags = 0;
+
+	if ((offset < PCI_MSIX_ENTRY_VECTOR_CTRL && len == 4) ||
+	    (offset < PCI_MSIX_ENTRY_DATA && len == 8))
+		ret = -ENOTSYNC;
+	if (old_ctrl == new_ctrl)
+		goto out;
+	if (!(old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
+			(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
+		r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 1);
+	else if ((old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
+			!(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
+		r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 0);
+	if (r || ret)
+		ret = -ENOTSYNC;
+out:
+	mutex_unlock(&mmio_dev->lock);
+	mutex_unlock(&mmio_dev->kvm->lock);
+	return ret;
+}
+
+static const struct kvm_io_device_ops msix_mmio_table_ops = {
+	.read     = msix_table_mmio_read,
+	.write    = msix_table_mmio_write,
+};
+
+int kvm_register_msix_mmio_dev(struct kvm *kvm)
+{
+	int ret;
+
+	kvm_iodevice_init(&kvm->msix_mmio_dev.table_dev, &msix_mmio_table_ops);
+	mutex_init(&kvm->msix_mmio_dev.lock);
+	kvm->msix_mmio_dev.kvm = kvm;
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+				      &kvm->msix_mmio_dev.table_dev);
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
+}
+
+int kvm_unregister_msix_mmio_dev(struct kvm *kvm)
+{
+	int ret;
+
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+				      &kvm->msix_mmio_dev.table_dev);
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
+}
+
+int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
+				    struct kvm_msix_mmio_user *mmio_user)
+{
+	struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
+	struct kvm_msix_mmio *mmio = NULL;
+	int r = 0, i;
+
+	mutex_lock(&mmio_dev->lock);
+	for (i = 0; i < mmio_dev->mmio_nr; i++) {
+		if (mmio_dev->mmio[i].dev_id == mmio_user->dev_id &&
+		    (mmio_dev->mmio[i].type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
+		    (mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK)) {
+			mmio = &mmio_dev->mmio[i];
+			if (mmio->max_entries_nr != mmio_user->max_entries_nr) {
+				r = -EINVAL;
+				goto out;
+			}
+			break;
+		}
+	}
+	if (mmio_user->max_entries_nr > KVM_MAX_MSIX_PER_DEV) {
+		r = -EINVAL;
+		goto out;
+	}
+	/* All reserved currently */
+	if (mmio_user->flags) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) !=
+			KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV) {
+		r = -EINVAL;
+		goto out;
+	}
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) !=
+			KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (!access_ok(VERIFY_WRITE, mmio_user->base_va,
+			mmio_user->max_entries_nr * PCI_MSIX_ENTRY_SIZE)) {
+		r = -EINVAL;
+		goto out;
+	}
+	if (!mmio) {
+		if (mmio_dev->mmio_nr == KVM_MSIX_MMIO_MAX) {
+			r = -ENOSPC;
+			goto out;
+		}
+		mmio = &mmio_dev->mmio[mmio_dev->mmio_nr];
+		mmio_dev->mmio_nr++;
+	}
+
+	mmio->max_entries_nr = mmio_user->max_entries_nr;
+	mmio->dev_id = mmio_user->dev_id;
+	mmio->flags = mmio_user->flags;
+
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
+			KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
+		mmio->type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV;
+	if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) ==
+			KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
+		mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+		mmio->table_base_addr = mmio_user->base_addr;
+		mmio->table_base_va = mmio_user->base_va;
+	}
+out:
+	mutex_unlock(&mmio_dev->lock);
+	return r;
+}
+
+int kvm_free_msix_mmio(struct kvm *kvm, struct kvm_msix_mmio *mmio)
+{
+	struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
+	int r = 0, i, j;
+	bool found = 0;
+
+	if (!mmio)
+		return 0;
+
+	mutex_lock(&mmio_dev->lock);
+	BUG_ON(mmio_dev->mmio_nr > KVM_MSIX_MMIO_MAX);
+	for (i = 0; i < mmio_dev->mmio_nr; i++) {
+		if (mmio_dev->mmio[i].dev_id == mmio->dev_id &&
+		    mmio_dev->mmio[i].type == mmio->type) {
+			found = true;
+			for (j = i; j < mmio_dev->mmio_nr - 1; j++)
+				mmio_dev->mmio[j] = mmio_dev->mmio[j + 1];
+			mmio_dev->mmio[mmio_dev->mmio_nr].max_entries_nr = 0;
+			mmio_dev->mmio[mmio_dev->mmio_nr].dev_id = 0;
+			mmio_dev->mmio[mmio_dev->mmio_nr].type = 0;
+			mmio_dev->mmio_nr--;
+			break;
+		}
+	}
+	if (!found)
+		r = -EINVAL;
+	mutex_unlock(&mmio_dev->lock);
+	return r;
+}
+
+int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
+				      struct kvm_msix_mmio_user *mmio_user)
+{
+	struct kvm_msix_mmio mmio;
+
+	mmio.dev_id = mmio_user->dev_id;
+	mmio.type = mmio_user->type;
+
+	return kvm_free_msix_mmio(kvm, &mmio);
+}
+
diff --git a/virt/kvm/msix_mmio.h b/virt/kvm/msix_mmio.h
new file mode 100644
index 0000000..01b6587
--- /dev/null
+++ b/virt/kvm/msix_mmio.h
@@ -0,0 +1,25 @@
+#ifndef __KVM_MSIX_MMIO_H__
+#define __KVM_MSIX_MMIO_H__
+/*
+ * MSI-X MMIO emulation
+ *
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Author:
+ *   Sheng Yang <sheng.yang@intel.com>
+ */
+
+#include <linux/pci.h>
+
+int kvm_register_msix_mmio_dev(struct kvm *kvm);
+int kvm_unregister_msix_mmio_dev(struct kvm *kvm);
+int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
+				    struct kvm_msix_mmio_user *mmio_user);
+int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
+				      struct kvm_msix_mmio_user *mmio_user);
+int kvm_free_msix_mmio(struct kvm *kvm, struct kvm_msix_mmio *mmio_user);
+
+#endif
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/4] KVM: Add documents for MSI-X MMIO API
  2011-02-18  8:53 [PATCH 0/4 v9] MSI-X MMIO support for KVM Sheng Yang
                   ` (2 preceding siblings ...)
  2011-02-18  8:53 ` [PATCH 3/4] KVM: Emulate MSI-X table in kernel Sheng Yang
@ 2011-02-18  8:53 ` Sheng Yang
  2011-02-22 18:08 ` [PATCH 0/4 v9] MSI-X MMIO support for KVM Marcelo Tosatti
  4 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-18  8:53 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Michael S. Tsirkin, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 Documentation/kvm/api.txt |   58 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index e1a9297..dd10c3b 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -1263,6 +1263,53 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+4.54 KVM_REGISTER_MSIX_MMIO
+
+Capability: KVM_CAP_MSIX_MMIO
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msix_mmio_user (in)
+Returns: 0 on success, -1 on error
+
+This API indicates an MSI-X MMIO address of a guest device. Then all MMIO
+operation would be handled by kernel. When necessary(e.g. MSI data/address
+changed), KVM would exit to userspace using KVM_EXIT_MSIX_ROUTING_UPDATE to
+indicate the MMIO modification and require userspace to update IRQ routing
+table.
+
+NOTICE: Writing the MSI-X MMIO page after it was registered with this API may
+be dangerous for userspace program. The writing during VM running may result
+in synchronization issue therefore the assigned device can't work properly.
+The writing is allowed when VM is not running and can be used as save/restore
+mechanism.
+
+struct kvm_msix_mmio_user {
+	__u32 dev_id;
+	__u16 type;		/* Device type and MMIO address type */
+	__u16 max_entries_nr;	/* Maximum entries supported */
+	__u64 base_addr;	/* Guest physical address of MMIO */
+	__u64 base_va;		/* Host virtual address of MMIO mapping */
+	__u64 flags;		/* Reserved for now */
+	__u64 reserved[4];
+};
+
+Current device type can be:
+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV	    (1 << 0)
+
+Current MMIO type can be:
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE	    (1 << 8)
+
+4.55 KVM_UNREGISTER_MSIX_MMIO
+
+Capability: KVM_CAP_MSIX_MMIO
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msix_mmio_user (in)
+Returns: 0 on success, -1 on error
+
+This API would unregister the specific MSI-X MMIO, indicated by dev_id and
+type fields of struct kvm_msix_mmio_user.
+
 5. The kvm_run structure
 
 Application code obtains a pointer to the kvm_run structure by
@@ -1445,6 +1492,17 @@ Userspace can now handle the hypercall and when it's done modify the gprs as
 necessary. Upon guest entry all guest GPRs will then be replaced by the values
 in this struct.
 
+		/* KVM_EXIT_MSIX_ROUTING_UPDATE*/
+		struct {
+			__u32 dev_id;
+			__u16 type;
+			__u16 entry_idx;
+			__u64 flags;
+		} msix_routing;
+
+KVM_EXIT_MSIX_ROUTING_UPDATE indicates one MSI-X entry has been modified, and
+userspace need to update the correlated routing table.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-02-18  8:53 [PATCH 0/4 v9] MSI-X MMIO support for KVM Sheng Yang
                   ` (3 preceding siblings ...)
  2011-02-18  8:53 ` [PATCH 4/4] KVM: Add documents for MSI-X MMIO API Sheng Yang
@ 2011-02-22 18:08 ` Marcelo Tosatti
  2011-09-01 11:27   ` Jan Kiszka
  4 siblings, 1 reply; 20+ messages in thread
From: Marcelo Tosatti @ 2011-02-22 18:08 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Avi Kivity, kvm, Michael S. Tsirkin

On Fri, Feb 18, 2011 at 04:53:09PM +0800, Sheng Yang wrote:
> Sorry for the long delay, just come back from vacation...
> 
> Change from v8:
> 1. Update struct kvm_run to contain MSI-X routing update exit specific
> information.
> 2. Fix a mmio_nr counting bug.
> 
> Notice this patchset still based on 2.6.37 due to a block bug on assigned
> device in the upstream now.
> 
> Sheng Yang (4):
>   KVM: Move struct kvm_io_device to kvm_host.h
>   KVM: Add kvm_io_ext_data to IO handler
>   KVM: Emulate MSI-X table in kernel
>   KVM: Add documents for MSI-X MMIO API

Looks good to me.



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h
  2011-02-24  9:51 [PATCH 0/4 v10] " Sheng Yang
@ 2011-02-24  9:51 ` Sheng Yang
  0 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-24  9:51 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Alex Williamson, Michael S. Tsirkin, kvm, Sheng Yang

Then it can be used by other struct in kvm_host.h

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |   23 +++++++++++++++++++++++
 virt/kvm/iodev.h         |   25 +------------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db..7d313e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -98,6 +98,29 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+struct kvm_io_device;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+	int (*read)(struct kvm_io_device *this,
+		    gpa_t addr,
+		    int len,
+		    void *val);
+	int (*write)(struct kvm_io_device *this,
+		     gpa_t addr,
+		     int len,
+		     const void *val);
+	void (*destructor)(struct kvm_io_device *this);
+};
+
+struct kvm_io_device {
+	const struct kvm_io_device_ops *ops;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 12fd3ca..d1f5651 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,32 +17,9 @@
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
 #include <asm/errno.h>
 
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
-	int (*read)(struct kvm_io_device *this,
-		    gpa_t addr,
-		    int len,
-		    void *val);
-	int (*write)(struct kvm_io_device *this,
-		     gpa_t addr,
-		     int len,
-		     const void *val);
-	void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
-	const struct kvm_io_device_ops *ops;
-};
-
 static inline void kvm_iodevice_init(struct kvm_io_device *dev,
 				     const struct kvm_io_device_ops *ops)
 {
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h
  2011-02-28  7:20 [PATCH 0/4 v11] MSI-X MMIO support for KVM Sheng Yang
@ 2011-02-28  7:20 ` Sheng Yang
  0 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-02-28  7:20 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang

Then it can be used by other struct in kvm_host.h

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |   23 +++++++++++++++++++++++
 virt/kvm/iodev.h         |   25 +------------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db..7d313e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -98,6 +98,29 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+struct kvm_io_device;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+	int (*read)(struct kvm_io_device *this,
+		    gpa_t addr,
+		    int len,
+		    void *val);
+	int (*write)(struct kvm_io_device *this,
+		     gpa_t addr,
+		     int len,
+		     const void *val);
+	void (*destructor)(struct kvm_io_device *this);
+};
+
+struct kvm_io_device {
+	const struct kvm_io_device_ops *ops;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 12fd3ca..d1f5651 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,32 +17,9 @@
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
 #include <asm/errno.h>
 
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
-	int (*read)(struct kvm_io_device *this,
-		    gpa_t addr,
-		    int len,
-		    void *val);
-	int (*write)(struct kvm_io_device *this,
-		     gpa_t addr,
-		     int len,
-		     const void *val);
-	void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
-	const struct kvm_io_device_ops *ops;
-};
-
 static inline void kvm_iodevice_init(struct kvm_io_device *dev,
 				     const struct kvm_io_device_ops *ops)
 {
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h
  2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
@ 2011-03-02  7:26 ` Sheng Yang
  0 siblings, 0 replies; 20+ messages in thread
From: Sheng Yang @ 2011-03-02  7:26 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti
  Cc: Michael S. Tsirkin, Alex Williamson, kvm, Sheng Yang

Then it can be used by other struct in kvm_host.h

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm_host.h |   23 +++++++++++++++++++++++
 virt/kvm/iodev.h         |   25 +------------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db..7d313e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -98,6 +98,29 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+struct kvm_io_device;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+	int (*read)(struct kvm_io_device *this,
+		    gpa_t addr,
+		    int len,
+		    void *val);
+	int (*write)(struct kvm_io_device *this,
+		     gpa_t addr,
+		     int len,
+		     const void *val);
+	void (*destructor)(struct kvm_io_device *this);
+};
+
+struct kvm_io_device {
+	const struct kvm_io_device_ops *ops;
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 12fd3ca..d1f5651 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,32 +17,9 @@
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
 #include <asm/errno.h>
 
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
-	int (*read)(struct kvm_io_device *this,
-		    gpa_t addr,
-		    int len,
-		    void *val);
-	int (*write)(struct kvm_io_device *this,
-		     gpa_t addr,
-		     int len,
-		     const void *val);
-	void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
-	const struct kvm_io_device_ops *ops;
-};
-
 static inline void kvm_iodevice_init(struct kvm_io_device *dev,
 				     const struct kvm_io_device_ops *ops)
 {
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-02-22 18:08 ` [PATCH 0/4 v9] MSI-X MMIO support for KVM Marcelo Tosatti
@ 2011-09-01 11:27   ` Jan Kiszka
  2011-09-06  7:52     ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Kiszka @ 2011-09-01 11:27 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, kvm, Michael S. Tsirkin, Alex Williamson

On 2011-02-22 19:08, Marcelo Tosatti wrote:
> On Fri, Feb 18, 2011 at 04:53:09PM +0800, Sheng Yang wrote:
>> Sorry for the long delay, just come back from vacation...
>>
>> Change from v8:
>> 1. Update struct kvm_run to contain MSI-X routing update exit specific
>> information.
>> 2. Fix a mmio_nr counting bug.
>>
>> Notice this patchset still based on 2.6.37 due to a block bug on assigned
>> device in the upstream now.
>>
>> Sheng Yang (4):
>>   KVM: Move struct kvm_io_device to kvm_host.h
>>   KVM: Add kvm_io_ext_data to IO handler
>>   KVM: Emulate MSI-X table in kernel
>>   KVM: Add documents for MSI-X MMIO API
> 
> Looks good to me.

What happened to this series and [1]? Forgotten because Sheng switched
his job?

I just realized that MSI-X per-vector masking for assigned devices is
not only slow (takes user space exits) but just simply broken (masking
bit is not evaluated and applied to the device).

BTW, the same is also true for that optional per-vector masking of
legacy MSI. Are there devices in the field that actually support this? I
haven't found one so far and tend to consider this feature not worth
implementing.

Jan

[1] http://thread.gmane.org/gmane.comp.emulators.kvm.devel/68144

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-01 11:27   ` Jan Kiszka
@ 2011-09-06  7:52     ` Avi Kivity
  2011-09-06  8:12       ` Michael S. Tsirkin
  2011-09-06  8:36       ` Jan Kiszka
  0 siblings, 2 replies; 20+ messages in thread
From: Avi Kivity @ 2011-09-06  7:52 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm, Michael S. Tsirkin, Alex Williamson

On 09/01/2011 02:27 PM, Jan Kiszka wrote:
> On 2011-02-22 19:08, Marcelo Tosatti wrote:
> >  On Fri, Feb 18, 2011 at 04:53:09PM +0800, Sheng Yang wrote:
> >>  Sorry for the long delay, just come back from vacation...
> >>
> >>  Change from v8:
> >>  1. Update struct kvm_run to contain MSI-X routing update exit specific
> >>  information.
> >>  2. Fix a mmio_nr counting bug.
> >>
> >>  Notice this patchset still based on 2.6.37 due to a block bug on assigned
> >>  device in the upstream now.
> >>
> >>  Sheng Yang (4):
> >>    KVM: Move struct kvm_io_device to kvm_host.h
> >>    KVM: Add kvm_io_ext_data to IO handler
> >>    KVM: Emulate MSI-X table in kernel
> >>    KVM: Add documents for MSI-X MMIO API
> >
> >  Looks good to me.
>
> What happened to this series and [1]? Forgotten because Sheng switched
> his job?

I actually have guilty feelings about it now and then.  It's just hard 
to merge such a complicated change with a non-trivial userspace interface.

> I just realized that MSI-X per-vector masking for assigned devices is
> not only slow (takes user space exits) but just simply broken (masking
> bit is not evaluated and applied to the device).

Isn't that just a bug in qemu?  Or something else?

>
> BTW, the same is also true for that optional per-vector masking of
> legacy MSI. Are there devices in the field that actually support this? I
> haven't found one so far and tend to consider this feature not worth
> implementing.

Don't know.  I don't like implementing features on the basis of bug 
reports, though.  On the other hand we can't really test it without a 
real device.

>
> Jan
>
> [1] http://thread.gmane.org/gmane.comp.emulators.kvm.devel/68144
>


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  7:52     ` Avi Kivity
@ 2011-09-06  8:12       ` Michael S. Tsirkin
  2011-09-06  8:36         ` Jan Kiszka
  2011-09-06  8:36       ` Jan Kiszka
  1 sibling, 1 reply; 20+ messages in thread
From: Michael S. Tsirkin @ 2011-09-06  8:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, Marcelo Tosatti, kvm, Alex Williamson

On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
> >
> >BTW, the same is also true for that optional per-vector masking of
> >legacy MSI. Are there devices in the field that actually support this? I
> >haven't found one so far and tend to consider this feature not worth
> >implementing.
> 
> Don't know.  I don't like implementing features on the basis of bug
> reports, though.  On the other hand we can't really test it without
> a real device.

Linux will use this capability if present. So
we could add support for an emulated device (e.g. e1000),
then test with nested virt once iommu emulation lands :)

-- 
MST

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  7:52     ` Avi Kivity
  2011-09-06  8:12       ` Michael S. Tsirkin
@ 2011-09-06  8:36       ` Jan Kiszka
  1 sibling, 0 replies; 20+ messages in thread
From: Jan Kiszka @ 2011-09-06  8:36 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Marcelo Tosatti, kvm@vger.kernel.org, Michael S. Tsirkin,
	Alex Williamson

On 2011-09-06 09:52, Avi Kivity wrote:
> On 09/01/2011 02:27 PM, Jan Kiszka wrote:
>> On 2011-02-22 19:08, Marcelo Tosatti wrote:
>>>  On Fri, Feb 18, 2011 at 04:53:09PM +0800, Sheng Yang wrote:
>>>>  Sorry for the long delay, just come back from vacation...
>>>>
>>>>  Change from v8:
>>>>  1. Update struct kvm_run to contain MSI-X routing update exit specific
>>>>  information.
>>>>  2. Fix a mmio_nr counting bug.
>>>>
>>>>  Notice this patchset still based on 2.6.37 due to a block bug on assigned
>>>>  device in the upstream now.
>>>>
>>>>  Sheng Yang (4):
>>>>    KVM: Move struct kvm_io_device to kvm_host.h
>>>>    KVM: Add kvm_io_ext_data to IO handler
>>>>    KVM: Emulate MSI-X table in kernel
>>>>    KVM: Add documents for MSI-X MMIO API
>>>
>>>  Looks good to me.
>>
>> What happened to this series and [1]? Forgotten because Sheng switched
>> his job?
> 
> I actually have guilty feelings about it now and then.  It's just hard 
> to merge such a complicated change with a non-trivial userspace interface.

I agree it's tricky. It will surely require rebasing anyway, thus also a
proper re-review. I was also wondering (without locking into the dirty
details yet) if that interface couldn't be made more generic to allow
fast masking for virtio/vhost as well.

> 
>> I just realized that MSI-X per-vector masking for assigned devices is
>> not only slow (takes user space exits) but just simply broken (masking
>> bit is not evaluated and applied to the device).
> 
> Isn't that just a bug in qemu?  Or something else?

The fact the the per-vectors masks aren't evaluated indicates that it's
simply not implemented yet.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  8:12       ` Michael S. Tsirkin
@ 2011-09-06  8:36         ` Jan Kiszka
  2011-09-06  8:46           ` Sasha Levin
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Kiszka @ 2011-09-06  8:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Marcelo Tosatti, kvm@vger.kernel.org, Alex Williamson

On 2011-09-06 10:12, Michael S. Tsirkin wrote:
> On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
>>>
>>> BTW, the same is also true for that optional per-vector masking of
>>> legacy MSI. Are there devices in the field that actually support this? I
>>> haven't found one so far and tend to consider this feature not worth
>>> implementing.
>>
>> Don't know.  I don't like implementing features on the basis of bug
>> reports, though.  On the other hand we can't really test it without
>> a real device.
> 
> Linux will use this capability if present. So
> we could add support for an emulated device (e.g. e1000),
> then test with nested virt once iommu emulation lands :)

Yeah, would be kind of cool. Still, I would feel better having it tested
against a real silicon as well. Also to prove that there is a real need.

So, in case someone stumbles for such a device (bit 8 set in MSI control
word), please let us know!

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  8:36         ` Jan Kiszka
@ 2011-09-06  8:46           ` Sasha Levin
  2011-09-06  8:49             ` Jan Kiszka
  0 siblings, 1 reply; 20+ messages in thread
From: Sasha Levin @ 2011-09-06  8:46 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Michael S. Tsirkin, Avi Kivity, Marcelo Tosatti,
	kvm@vger.kernel.org, Alex Williamson

On Tue, 2011-09-06 at 10:36 +0200, Jan Kiszka wrote:
> On 2011-09-06 10:12, Michael S. Tsirkin wrote:
> > On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
> >>>
> >>> BTW, the same is also true for that optional per-vector masking of
> >>> legacy MSI. Are there devices in the field that actually support this? I
> >>> haven't found one so far and tend to consider this feature not worth
> >>> implementing.
> >>
> >> Don't know.  I don't like implementing features on the basis of bug
> >> reports, though.  On the other hand we can't really test it without
> >> a real device.
> > 
> > Linux will use this capability if present. So
> > we could add support for an emulated device (e.g. e1000),
> > then test with nested virt once iommu emulation lands :)
> 
> Yeah, would be kind of cool. Still, I would feel better having it tested
> against a real silicon as well. Also to prove that there is a real need.
> 
> So, in case someone stumbles for such a device (bit 8 set in MSI control
> word), please let us know!

Doesn't any device that supports MSI-X supports per-vector masking?

>From the spec:

"MSI and MSI-X each support per-vector masking. Per-vector masking is an
optional extension to MSI, and a standard feature with MSI-X"

-- 

Sasha.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  8:46           ` Sasha Levin
@ 2011-09-06  8:49             ` Jan Kiszka
  2011-09-06  9:00               ` Sasha Levin
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Kiszka @ 2011-09-06  8:49 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Michael S. Tsirkin, Avi Kivity, Marcelo Tosatti,
	kvm@vger.kernel.org, Alex Williamson

On 2011-09-06 10:46, Sasha Levin wrote:
> On Tue, 2011-09-06 at 10:36 +0200, Jan Kiszka wrote:
>> On 2011-09-06 10:12, Michael S. Tsirkin wrote:
>>> On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
>>>>>
>>>>> BTW, the same is also true for that optional per-vector masking of
>>>>> legacy MSI. Are there devices in the field that actually support this? I
>>>>> haven't found one so far and tend to consider this feature not worth
>>>>> implementing.
>>>>
>>>> Don't know.  I don't like implementing features on the basis of bug
>>>> reports, though.  On the other hand we can't really test it without
>>>> a real device.
>>>
>>> Linux will use this capability if present. So
>>> we could add support for an emulated device (e.g. e1000),
>>> then test with nested virt once iommu emulation lands :)
>>
>> Yeah, would be kind of cool. Still, I would feel better having it tested
>> against a real silicon as well. Also to prove that there is a real need.
>>
>> So, in case someone stumbles for such a device (bit 8 set in MSI control
>> word), please let us know!
> 
> Doesn't any device that supports MSI-X supports per-vector masking?
> 
>>From the spec:
> 
> "MSI and MSI-X each support per-vector masking. Per-vector masking is an
> optional extension to MSI, and a standard feature with MSI-X"

Right, but the cap flag still has to be set.

Do you have any MSI-X device in reach?

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  8:49             ` Jan Kiszka
@ 2011-09-06  9:00               ` Sasha Levin
  2011-09-06  9:05                 ` Jan Kiszka
  0 siblings, 1 reply; 20+ messages in thread
From: Sasha Levin @ 2011-09-06  9:00 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Michael S. Tsirkin, Avi Kivity, Marcelo Tosatti,
	kvm@vger.kernel.org, Alex Williamson

On Tue, 2011-09-06 at 10:49 +0200, Jan Kiszka wrote:
> On 2011-09-06 10:46, Sasha Levin wrote:
> > On Tue, 2011-09-06 at 10:36 +0200, Jan Kiszka wrote:
> >> On 2011-09-06 10:12, Michael S. Tsirkin wrote:
> >>> On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
> >>>>>
> >>>>> BTW, the same is also true for that optional per-vector masking of
> >>>>> legacy MSI. Are there devices in the field that actually support this? I
> >>>>> haven't found one so far and tend to consider this feature not worth
> >>>>> implementing.
> >>>>
> >>>> Don't know.  I don't like implementing features on the basis of bug
> >>>> reports, though.  On the other hand we can't really test it without
> >>>> a real device.
> >>>
> >>> Linux will use this capability if present. So
> >>> we could add support for an emulated device (e.g. e1000),
> >>> then test with nested virt once iommu emulation lands :)
> >>
> >> Yeah, would be kind of cool. Still, I would feel better having it tested
> >> against a real silicon as well. Also to prove that there is a real need.
> >>
> >> So, in case someone stumbles for such a device (bit 8 set in MSI control
> >> word), please let us know!
> > 
> > Doesn't any device that supports MSI-X supports per-vector masking?
> > 
> >>From the spec:
> > 
> > "MSI and MSI-X each support per-vector masking. Per-vector masking is an
> > optional extension to MSI, and a standard feature with MSI-X"
> 
> Right, but the cap flag still has to be set.
> 

Are you sure? Take a look at the table in section 6.8.2.3 in the spec
(pci v3). Unlike the message control for MSI, this table doesn't mention
anything about bit 8 or the per-vector masking capability for MSI-X, it
just assumes it's there.

> Do you have any MSI-X device in reach?

Don't think so, but I think it would be much easier to find a MSI-X
device rather than a MSI with per-vector capability.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  9:00               ` Sasha Levin
@ 2011-09-06  9:05                 ` Jan Kiszka
  2011-09-06  9:16                   ` Sasha Levin
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Kiszka @ 2011-09-06  9:05 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Michael S. Tsirkin, Avi Kivity, Marcelo Tosatti,
	kvm@vger.kernel.org, Alex Williamson

On 2011-09-06 11:00, Sasha Levin wrote:
> On Tue, 2011-09-06 at 10:49 +0200, Jan Kiszka wrote:
>> On 2011-09-06 10:46, Sasha Levin wrote:
>>> On Tue, 2011-09-06 at 10:36 +0200, Jan Kiszka wrote:
>>>> On 2011-09-06 10:12, Michael S. Tsirkin wrote:
>>>>> On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
>>>>>>>
>>>>>>> BTW, the same is also true for that optional per-vector masking of
>>>>>>> legacy MSI. Are there devices in the field that actually support this? I
>>>>>>> haven't found one so far and tend to consider this feature not worth
>>>>>>> implementing.
>>>>>>
>>>>>> Don't know.  I don't like implementing features on the basis of bug
>>>>>> reports, though.  On the other hand we can't really test it without
>>>>>> a real device.
>>>>>
>>>>> Linux will use this capability if present. So
>>>>> we could add support for an emulated device (e.g. e1000),
>>>>> then test with nested virt once iommu emulation lands :)
>>>>
>>>> Yeah, would be kind of cool. Still, I would feel better having it tested
>>>> against a real silicon as well. Also to prove that there is a real need.
>>>>
>>>> So, in case someone stumbles for such a device (bit 8 set in MSI control
>>>> word), please let us know!
>>>
>>> Doesn't any device that supports MSI-X supports per-vector masking?
>>>
>>> >From the spec:
>>>
>>> "MSI and MSI-X each support per-vector masking. Per-vector masking is an
>>> optional extension to MSI, and a standard feature with MSI-X"
>>
>> Right, but the cap flag still has to be set.
>>
> 
> Are you sure? Take a look at the table in section 6.8.2.3 in the spec
> (pci v3). Unlike the message control for MSI, this table doesn't mention
> anything about bit 8 or the per-vector masking capability for MSI-X, it
> just assumes it's there.

[Err, I should stop doing n things in parallel.]

Of course, MSI-X implies per-vector masking, but in a totally different
way with different data structures etc. That's not interesting for the
case in question: per-vector masking for legacy MSI.

Back to square #1: We need a device with MSI support and cap bit 8 set
in its _MSI_ control word.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  9:05                 ` Jan Kiszka
@ 2011-09-06  9:16                   ` Sasha Levin
  2011-09-06  9:30                     ` Michael S. Tsirkin
  0 siblings, 1 reply; 20+ messages in thread
From: Sasha Levin @ 2011-09-06  9:16 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Michael S. Tsirkin, Avi Kivity, Marcelo Tosatti,
	kvm@vger.kernel.org, Alex Williamson

On Tue, 2011-09-06 at 11:05 +0200, Jan Kiszka wrote:
> On 2011-09-06 11:00, Sasha Levin wrote:
> > On Tue, 2011-09-06 at 10:49 +0200, Jan Kiszka wrote:
> >> On 2011-09-06 10:46, Sasha Levin wrote:
> >>> On Tue, 2011-09-06 at 10:36 +0200, Jan Kiszka wrote:
> >>>> On 2011-09-06 10:12, Michael S. Tsirkin wrote:
> >>>>> On Tue, Sep 06, 2011 at 10:52:41AM +0300, Avi Kivity wrote:
> >>>>>>>
> >>>>>>> BTW, the same is also true for that optional per-vector masking of
> >>>>>>> legacy MSI. Are there devices in the field that actually support this? I
> >>>>>>> haven't found one so far and tend to consider this feature not worth
> >>>>>>> implementing.
> >>>>>>
> >>>>>> Don't know.  I don't like implementing features on the basis of bug
> >>>>>> reports, though.  On the other hand we can't really test it without
> >>>>>> a real device.
> >>>>>
> >>>>> Linux will use this capability if present. So
> >>>>> we could add support for an emulated device (e.g. e1000),
> >>>>> then test with nested virt once iommu emulation lands :)
> >>>>
> >>>> Yeah, would be kind of cool. Still, I would feel better having it tested
> >>>> against a real silicon as well. Also to prove that there is a real need.
> >>>>
> >>>> So, in case someone stumbles for such a device (bit 8 set in MSI control
> >>>> word), please let us know!
> >>>
> >>> Doesn't any device that supports MSI-X supports per-vector masking?
> >>>
> >>> >From the spec:
> >>>
> >>> "MSI and MSI-X each support per-vector masking. Per-vector masking is an
> >>> optional extension to MSI, and a standard feature with MSI-X"
> >>
> >> Right, but the cap flag still has to be set.
> >>
> > 
> > Are you sure? Take a look at the table in section 6.8.2.3 in the spec
> > (pci v3). Unlike the message control for MSI, this table doesn't mention
> > anything about bit 8 or the per-vector masking capability for MSI-X, it
> > just assumes it's there.
> 
> [Err, I should stop doing n things in parallel.]
> 
> Of course, MSI-X implies per-vector masking, but in a totally different
> way with different data structures etc. That's not interesting for the
> case in question: per-vector masking for legacy MSI.
> 

Ah, Okay.

> Back to square #1: We need a device with MSI support and cap bit 8 set
> in its _MSI_ control word.

Alright, so I've looked at some of my servers, and one of them has both
a bunch of MSI-X devices, and some MSI devices which show this:

	Capabilities: [60] MSI: Enable+ Count=1/2 Maskable+ 64bit-
		Address: 00000020  Data: 0000
		Masking: 00000000  Pending: 00000000

Which would suggest that the support per-vector masking, right?

-- 

Sasha.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/4 v9] MSI-X MMIO support for KVM
  2011-09-06  9:16                   ` Sasha Levin
@ 2011-09-06  9:30                     ` Michael S. Tsirkin
  0 siblings, 0 replies; 20+ messages in thread
From: Michael S. Tsirkin @ 2011-09-06  9:30 UTC (permalink / raw)
  To: Sasha Levin
  Cc: Jan Kiszka, Avi Kivity, Marcelo Tosatti, kvm@vger.kernel.org,
	Alex Williamson

On Tue, Sep 06, 2011 at 12:16:11PM +0300, Sasha Levin wrote:
> > Back to square #1: We need a device with MSI support and cap bit 8 set
> > in its _MSI_ control word.
> 
> Alright, so I've looked at some of my servers, and one of them has both
> a bunch of MSI-X devices, and some MSI devices which show this:
> 
> 	Capabilities: [60] MSI: Enable+ Count=1/2 Maskable+ 64bit-
> 		Address: 00000020  Data: 0000
> 		Masking: 00000000  Pending: 00000000
> 
> Which would suggest that the support per-vector masking, right?

pciutils has this:

  cap = get_conf_word(d, where + PCI_CAP_FLAGS);

later

  printf("MSI: Enable%c Count=%d/%d Maskable%c 64bit%c\n",
         FLAG(cap, PCI_MSI_FLAGS_ENABLE),
         1 << ((cap & PCI_MSI_FLAGS_QSIZE) >> 4),
         1 << ((cap & PCI_MSI_FLAGS_QMASK) >> 1),
         FLAG(cap, PCI_MSI_FLAGS_MASK_BIT),
         FLAG(cap, PCI_MSI_FLAGS_64BIT));

and
#define  PCI_MSI_FLAGS_MASK_BIT  0x100   /* interrupt masking & reporting supported */

#define PCI_CAP_FLAGS           2       /* Capability defined flags (16 bits) */


So yes, that's bit 8 in control word.


> -- 
> 
> Sasha.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2011-09-06  9:29 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-18  8:53 [PATCH 0/4 v9] MSI-X MMIO support for KVM Sheng Yang
2011-02-18  8:53 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2011-02-18  8:53 ` [PATCH 2/4] KVM: Add kvm_io_ext_data to IO handler Sheng Yang
2011-02-18  8:53 ` [PATCH 3/4] KVM: Emulate MSI-X table in kernel Sheng Yang
2011-02-18  8:53 ` [PATCH 4/4] KVM: Add documents for MSI-X MMIO API Sheng Yang
2011-02-22 18:08 ` [PATCH 0/4 v9] MSI-X MMIO support for KVM Marcelo Tosatti
2011-09-01 11:27   ` Jan Kiszka
2011-09-06  7:52     ` Avi Kivity
2011-09-06  8:12       ` Michael S. Tsirkin
2011-09-06  8:36         ` Jan Kiszka
2011-09-06  8:46           ` Sasha Levin
2011-09-06  8:49             ` Jan Kiszka
2011-09-06  9:00               ` Sasha Levin
2011-09-06  9:05                 ` Jan Kiszka
2011-09-06  9:16                   ` Sasha Levin
2011-09-06  9:30                     ` Michael S. Tsirkin
2011-09-06  8:36       ` Jan Kiszka
  -- strict thread matches above, loose matches on Subject: below --
2011-02-24  9:51 [PATCH 0/4 v10] " Sheng Yang
2011-02-24  9:51 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2011-02-28  7:20 [PATCH 0/4 v11] MSI-X MMIO support for KVM Sheng Yang
2011-02-28  7:20 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang
2011-03-02  7:26 [PATCH 0/4 v12] MSI-X MMIO support for KVM Sheng Yang
2011-03-02  7:26 ` [PATCH 1/4] KVM: Move struct kvm_io_device to kvm_host.h Sheng Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).