Linux virtualization list

Linux virtualization list
 help / color / mirror / Atom feed

* [PATCH 0/11] RFC: PCI using capabilitities
From: Rusty Russell @ 2011-12-08 10:22 UTC (permalink / raw)
  To: virtualization; +Cc: Avi Kivity, Sasha Levin, kvm, Michael S. Tsirkin

Here's the patch series I ended up with.  I haven't coded up the QEMU
side yet, so no idea if the new driver works.

Questions:
(1) Do we win from separating ISR, NOTIFY and COMMON?
(2) I used a "u8 bar"; should I use a bir and pack it instead?  BIR
    seems a little obscure (noone else in the kernel source seems to
    refer to it).

Cheers,
Rusty.

^ permalink raw reply

* [RFC 1/11] virtio: use u32, not bitmap for struct virtio_device's features
From: Rusty Russell @ 2011-12-08 10:30 UTC (permalink / raw)
  To: virtualization; +Cc: Michael S. Tsirkin, Avi Kivity, kvm, Sasha Levin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

It seemed like a good idea, but it's actually a pain when we get more
than 32 feature bits.  Just change it to a u32 for now.
---
 drivers/char/virtio_console.c  |    2 +-
 drivers/lguest/lguest_device.c |    2 +-
 drivers/s390/kvm/kvm_virtio.c  |    2 +-
 drivers/virtio/virtio.c        |   10 +++++-----
 drivers/virtio/virtio_mmio.c   |    8 ++------
 drivers/virtio/virtio_pci.c    |    3 +--
 drivers/virtio/virtio_ring.c   |    2 +-
 include/linux/virtio.h         |    3 +--
 include/linux/virtio_config.h  |    2 +-
 tools/virtio/linux/virtio.h    |   18 ++----------------
 tools/virtio/virtio_test.c     |    5 ++---
 11 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -331,7 +331,7 @@ static inline bool use_multiport(struct 
 	 */
 	if (!portdev->vdev)
 		return 0;
-	return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+	return portdev->vdev->features & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
 }
 
 static void free_buf(struct port_buffer *buf)
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -144,7 +144,7 @@ static void lg_finalize_features(struct 
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (test_bit(i, vdev->features))
+		if (vdev->features & (1 << i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -105,7 +105,7 @@ static void kvm_finalize_features(struct
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (test_bit(i, vdev->features))
+		if (vdev->features & (1 << i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -41,9 +41,9 @@ static ssize_t features_show(struct devi
 
 	/* We actually represent this as a bitstring, as it could be
 	 * arbitrary length in future. */
-	for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
+	for (i = 0; i < sizeof(dev->features)*8; i++)
 		len += sprintf(buf+len, "%c",
-			       test_bit(i, dev->features) ? '1' : '0');
+			       dev->features & (1ULL << i) ? '1' : '0');
 	len += sprintf(buf+len, "\n");
 	return len;
 }
@@ -122,18 +122,18 @@ static int virtio_dev_probe(struct devic
 	device_features = dev->config->get_features(dev);
 
 	/* Features supported by both device and driver into dev->features. */
-	memset(dev->features, 0, sizeof(dev->features));
+	dev->features = 0;
 	for (i = 0; i < drv->feature_table_size; i++) {
 		unsigned int f = drv->feature_table[i];
 		BUG_ON(f >= 32);
 		if (device_features & (1 << f))
-			set_bit(f, dev->features);
+			dev->features |= (1 << f);
 	}
 
 	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
 		if (device_features & (1 << i))
-			set_bit(i, dev->features);
+			dev->features |= (1 << i);
 
 	dev->config->finalize_features(dev);
 
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -112,16 +112,12 @@ static u32 vm_get_features(struct virtio
 static void vm_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-	int i;
 
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
-	for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
-		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
-		writel(vdev->features[i],
-				vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
-	}
+	writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
+	writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
 }
 
 static void vm_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -121,8 +121,7 @@ static void vp_finalize_features(struct 
 	vring_transport_features(vdev);
 
 	/* We only support 32 feature bits. */
-	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
-	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
+	iowrite32(vdev->features, vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -685,7 +685,7 @@ void vring_transport_features(struct vir
 			break;
 		default:
 			/* We don't understand this bit. */
-			clear_bit(i, vdev->features);
+			vdev->features &= ~(1 << i);
 		}
 	}
 }
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -66,8 +66,7 @@ struct virtio_device {
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
 	struct list_head vqs;
-	/* Note that this is a Linux set_bit-style bitmap. */
-	unsigned long features[1];
+	u32 features;
 	void *priv;
 };
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -142,7 +142,7 @@ static inline bool virtio_has_feature(co
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
 
-	return test_bit(fbit, vdev->features);
+	return vdev->features & (1 << fbit);
 }
 
 /**
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -131,29 +131,15 @@ static inline void kfree(void *p)
 #define BITS_PER_BYTE		8
 #define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
 #define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-/* TODO: Not atomic as it should be:
- * we don't use this for anything important. */
-static inline void clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p &= ~mask;
-}
-
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-        return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
 
 /* The only feature we care to support */
 #define virtio_has_feature(dev, feature) \
-	test_bit((feature), (dev)->features)
+	((dev)->features & (1 << (feature)))
 /* end of stubs */
 
 struct virtio_device {
 	void *dev;
-	unsigned long features[1];
+	u32 features;
 };
 
 struct virtqueue {
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -55,7 +55,7 @@ void vhost_vq_setup(struct vdev_info *de
 {
 	struct vhost_vring_state state = { .index = info->idx };
 	struct vhost_vring_file file = { .index = info->idx };
-	unsigned long long features = dev->vdev.features[0];
+	unsigned long long features = dev->vdev.features;
 	struct vhost_vring_addr addr = {
 		.index = info->idx,
 		.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
@@ -106,8 +106,7 @@ static void vdev_info_init(struct vdev_i
 {
 	int r;
 	memset(dev, 0, sizeof *dev);
-	dev->vdev.features[0] = features;
-	dev->vdev.features[1] = features >> 32;
+	dev->vdev.features = features;
 	dev->buf_size = 1024;
 	dev->buf = malloc(dev->buf_size);
 	assert(dev->buf);

^ permalink raw reply

* [RFC 2/11] virtio: add support for 64 bit features.
From: Rusty Russell @ 2011-12-08 10:31 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

Change the u32 to a u64, and make sure to use 1ULL everywhere!
---
 drivers/char/virtio_console.c  |    2 +-
 drivers/lguest/lguest_device.c |   10 +++++-----
 drivers/s390/kvm/kvm_virtio.c  |   10 +++++-----
 drivers/virtio/virtio.c        |   12 ++++++------
 drivers/virtio/virtio_mmio.c   |   14 +++++++++-----
 drivers/virtio/virtio_pci.c    |    5 ++---
 drivers/virtio/virtio_ring.c   |    2 +-
 include/linux/virtio.h         |    2 +-
 include/linux/virtio_config.h  |    8 ++++----
 tools/virtio/linux/virtio.h    |    4 ++--
 10 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -331,7 +331,7 @@ static inline bool use_multiport(struct 
 	 */
 	if (!portdev->vdev)
 		return 0;
-	return portdev->vdev->features & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+	return portdev->vdev->features & (1ULL << VIRTIO_CONSOLE_F_MULTIPORT);
 }
 
 static void free_buf(struct port_buffer *buf)
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -94,17 +94,17 @@ static unsigned desc_size(const struct l
 }
 
 /* This gets the device's feature bits. */
-static u32 lg_get_features(struct virtio_device *vdev)
+static u64 lg_get_features(struct virtio_device *vdev)
 {
 	unsigned int i;
-	u32 features = 0;
+	u64 features = 0;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
 	u8 *in_features = lg_features(desc);
 
 	/* We do this the slow but generic way. */
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++)
+	for (i = 0; i < min(desc->feature_len * 8, 64); i++)
 		if (in_features[i / 8] & (1 << (i % 8)))
-			features |= (1 << i);
+			features |= (1ULL << i);
 
 	return features;
 }
@@ -144,7 +144,7 @@ static void lg_finalize_features(struct 
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (vdev->features & (1 << i))
+		if (vdev->features & (1ULL << i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -79,16 +79,16 @@ static unsigned desc_size(const struct k
 }
 
 /* This gets the device's feature bits. */
-static u32 kvm_get_features(struct virtio_device *vdev)
+static u64 kvm_get_features(struct virtio_device *vdev)
 {
 	unsigned int i;
-	u32 features = 0;
+	u64 features = 0;
 	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
 	u8 *in_features = kvm_vq_features(desc);
 
-	for (i = 0; i < min(desc->feature_len * 8, 32); i++)
+	for (i = 0; i < min(desc->feature_len * 8, 64); i++)
 		if (in_features[i / 8] & (1 << (i % 8)))
-			features |= (1 << i);
+			features |= (1ULL << i);
 	return features;
 }
 
@@ -105,7 +105,7 @@ static void kvm_finalize_features(struct
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (vdev->features & (1 << i))
+		if (vdev->features & (1ULL << i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,7 +113,7 @@ static int virtio_dev_probe(struct devic
 	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
-	u32 device_features;
+	u64 device_features;
 
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
@@ -125,15 +125,15 @@ static int virtio_dev_probe(struct devic
 	dev->features = 0;
 	for (i = 0; i < drv->feature_table_size; i++) {
 		unsigned int f = drv->feature_table[i];
-		BUG_ON(f >= 32);
-		if (device_features & (1 << f))
-			dev->features |= (1 << f);
+		BUG_ON(f >= 64);
+		if (device_features & (1ULL << f))
+			dev->features |= (1ULL << f);
 	}
 
 	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
-		if (device_features & (1 << i))
-			dev->features |= (1 << i);
+		if (device_features & (1ULL << i))
+			dev->features |= (1ULL << i);
 
 	dev->config->finalize_features(dev);
 
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -99,14 +99,16 @@ struct virtio_mmio_vq_info {
 
 /* Configuration interface */
 
-static u32 vm_get_features(struct virtio_device *vdev)
+static u64 vm_get_features(struct virtio_device *vdev)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+	u64 features;
 
-	/* TODO: Features > 32 bits */
 	writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL);
-
-	return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES);
+	features = readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES);
+	writel(1, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL);
+	features |= ((u64)readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES) << 32);
+	return features;
 }
 
 static void vm_finalize_features(struct virtio_device *vdev)
@@ -117,7 +119,9 @@ static void vm_finalize_features(struct 
 	vring_transport_features(vdev);
 
 	writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
-	writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
+	writel((u32)vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
+	writel(1, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
+	writel(vdev->features >> 32, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
 }
 
 static void vm_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -103,12 +103,11 @@ static struct virtio_pci_device *to_vp_d
 }
 
 /* virtio config->get_features() implementation */
-static u32 vp_get_features(struct virtio_device *vdev)
+static u64 vp_get_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	/* When someone needs more than 32 feature bits, we'll need to
-	 * steal a bit to indicate that the rest are somewhere else. */
+	/* We only support 32 feature bits. */
 	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
 }
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -685,7 +685,7 @@ void vring_transport_features(struct vir
 			break;
 		default:
 			/* We don't understand this bit. */
-			vdev->features &= ~(1 << i);
+			vdev->features &= ~(1ULL << i);
 		}
 	}
 }
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -66,7 +66,7 @@ struct virtio_device {
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
 	struct list_head vqs;
-	u32 features;
+	u64 features;
 	void *priv;
 };
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -117,7 +117,7 @@ struct virtio_config_ops {
 			vq_callback_t *callbacks[],
 			const char *names[]);
 	void (*del_vqs)(struct virtio_device *);
-	u32 (*get_features)(struct virtio_device *vdev);
+	u64 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 };
 
@@ -135,14 +135,14 @@ static inline bool virtio_has_feature(co
 {
 	/* Did you forget to fix assumptions on max features? */
 	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+		BUILD_BUG_ON(fbit >= 64);
 	else
-		BUG_ON(fbit >= 32);
+		BUG_ON(fbit >= 64);
 
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
 
-	return vdev->features & (1 << fbit);
+	return vdev->features & (1ULL << fbit);
 }
 
 /**
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -134,12 +134,12 @@ static inline void kfree(void *p)
 
 /* The only feature we care to support */
 #define virtio_has_feature(dev, feature) \
-	((dev)->features & (1 << (feature)))
+	((dev)->features & (1ULL << (feature)))
 /* end of stubs */
 
 struct virtio_device {
 	void *dev;
-	u32 features;
+	u64 features;
 };
 
 struct virtqueue {

^ permalink raw reply

* Re: [PATCH 0/11] RFC: PCI using capabilitities
From: Sasha Levin @ 2011-12-08 10:32 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Michael S. Tsirkin, Avi Kivity, kvm, virtualization
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

Rusty, I can't find the actual patches, could you verify that they were
indeed sent?

On Thu, 2011-12-08 at 20:52 +1030, Rusty Russell wrote:
> Here's the patch series I ended up with.  I haven't coded up the QEMU
> side yet, so no idea if the new driver works.
> 
> Questions:
> (1) Do we win from separating ISR, NOTIFY and COMMON?

By separating ISR, NOTIFY and COMMON we can place ISR and NOTIFY in PIO
and COMMON in MMIO. This gives us the benefit of having the small data
path use fast PIO, while big config path can use MMIO.

> (2) I used a "u8 bar"; should I use a bir and pack it instead?  BIR
>     seems a little obscure (noone else in the kernel source seems to
>     refer to it).

BIR is a concept from the PCI spec, but it was only used for MSI-X. I
don't expect to see it all around the kernel source.

-- 

Sasha.

^ permalink raw reply

* [RFC 3/11] pci: add pci_iomap_range
From: Rusty Russell @ 2011-12-08 10:32 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

From: Michael S Tsirkin <mst@redhat.com>

Virtio drivers should map the part of the range they need, not necessarily
all of it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/asm-generic/io.h    |    4 ++++
 include/asm-generic/iomap.h |   11 +++++++++++
 lib/iomap.c                 |   41 ++++++++++++++++++++++++++++++++++++-----
 3 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 9120887..3cf1787 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -286,6 +286,10 @@ static inline void writesb(const void __iomem *addr, const void *buf, int len)
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
 extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+				     unsigned offset,
+				     unsigned long minlen,
+				     unsigned long maxlen);
 static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
 {
 }
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 98dcd76..6f192d4 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -70,8 +70,19 @@ extern void ioport_unmap(void __iomem *);
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
 extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+				     unsigned offset,
+				     unsigned long minlen,
+				     unsigned long maxlen);
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
 #else
+static inline void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+					    unsigned offset,
+					    unsigned long minlen,
+					    unsigned long maxlen)
+{
+	return NULL;
+}
 struct pci_dev;
 static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
 {
diff --git a/lib/iomap.c b/lib/iomap.c
index 5dbcb4b..93ae915 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -243,26 +243,37 @@ EXPORT_SYMBOL(ioport_unmap);
 
 #ifdef CONFIG_PCI
 /**
- * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * pci_iomap_range - create a virtual mapping cookie for a PCI BAR
  * @dev: PCI device that owns the BAR
  * @bar: BAR number
- * @maxlen: length of the memory to map
+ * @offset: map memory at the given offset in BAR
+ * @minlen: min length of the memory to map
+ * @maxlen: max length of the memory to map
  *
  * Using this function you will get a __iomem address to your device BAR.
  * You can access it using ioread*() and iowrite*(). These functions hide
  * the details if this is a MMIO or PIO address space and will just do what
  * you expect from them in the correct way.
  *
+ * @minlen specifies the minimum length to map. We check that BAR is
+ * large enough.
  * @maxlen specifies the maximum length to map. If you want to get access to
- * the complete BAR without checking for its length first, pass %0 here.
+ * the complete BAR from offset to the end, pass %0 here.
  * */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+			      unsigned offset,
+			      unsigned long minlen,
+			      unsigned long maxlen)
 {
 	resource_size_t start = pci_resource_start(dev, bar);
 	resource_size_t len = pci_resource_len(dev, bar);
 	unsigned long flags = pci_resource_flags(dev, bar);
 
-	if (!len || !start)
+	if (len <= offset || !start)
+		return NULL;
+	len -= offset;
+	start += offset;
+	if (len < minlen)
 		return NULL;
 	if (maxlen && len > maxlen)
 		len = maxlen;
@@ -277,10 +288,30 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 	return NULL;
 }
 
+/**
+ * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	return pci_iomap_range(dev, bar, 0, 0, maxlen);
+}
+
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
 	IO_COND(addr, /* nothing */, iounmap(addr));
 }
 EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iomap_range);
 EXPORT_SYMBOL(pci_iounmap);
 #endif /* CONFIG_PCI */

^ permalink raw reply related

* [RFC 4/11] virtio-pci: define layout for virtio vendor-specific capabilities.
From: Rusty Russell @ 2011-12-08 10:34 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

Based on patch by Michael S. Tsirkin <mst@redhat.com>, but I found it
hard to follow so changed to use structures which are more
self-documenting.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_pci.h |   41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -92,4 +92,45 @@
 /* The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. */
 #define VIRTIO_PCI_VRING_ALIGN		4096
+
+/* IDs for different capabilities.  Must all exist. */
+/* FIXME: Do we win from separating ISR, NOTIFY and COMMON? */
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG	1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG	2
+/* ISR access */
+#define VIRTIO_PCI_CAP_ISR_CFG		3
+/* Device specific confiuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG	4
+
+/* This is the PCI capability header: */
+struct virtio_pci_cap {
+	u8 cap_vndr;	/* Generic PCI field: PCI_CAP_ID_VNDR */
+	u8 cap_next;	/* Generic PCI field: next ptr. */
+	u8 cfg_type;	/* One of the VIRTIO_PCI_CAP_*_CFG. */
+/* FIXME: Should we use a bir, instead of raw bar number? */
+	u8 bar;		/* Where to find it. */
+	__le32 offset;	/* Offset within bar. */
+	__le32 length;	/* Length. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_pci_common_cfg {
+	/* About the whole device. */
+	__le32 device_feature_select;	/* read-write */
+	__le32 device_feature;		/* read-only */
+	__le32 guest_feature_select;	/* read-write */
+	__le32 guest_feature;		/* read-only */
+	__le16 msix_config;		/* read-write */
+	__u8 device_status;		/* read-write */
+	__u8 unused;
+
+	/* About a specific virtqueue. */
+	__le16 queue_select;	/* read-write */
+	__le16 queue_align;	/* read-write, power of 2. */
+	__le16 queue_size;	/* read-write, power of 2. */
+	__le16 queue_msix_vector;/* read-write */
+	__le64 queue_address;	/* read-write: 0xFFFFFFFFFFFFFFFF == DNE. */
+};
 #endif

^ permalink raw reply

* [RFC 6/11] virtio_pci: move old defines to legacy, introduce new structure.
From: Rusty Russell @ 2011-12-08 10:35 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

We don't *remove* the old ones, unless VIRTIO_PCI_NO_LEGACY is defined,
but they get a friendly #warning about the change.

Note that config option is not promted; we always enable it for now.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/Kconfig             |   12 +++++++
 drivers/virtio/Makefile            |    2 -
 drivers/virtio/virtio_pci_legacy.c |   16 ++++-----
 include/linux/virtio_pci.h         |   63 ++++++++++++++++++++++++-------------
 4 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -25,6 +25,18 @@ config VIRTIO_PCI
 
 	  If unsure, say M.
 
+config VIRTIO_PCI_LEGACY
+	bool
+	default y
+	depends on VIRTIO_PCI
+	---help---
+	  The old BAR0 virtio pci layout was deprecated early 2012.
+
+	  So look out into your driveway.  Do you have a flying car?  If
+	  so, you can happily disable this option and virtio will not
+	  break.  Otherwise, leave it set.  Unless you're testing what
+	  life will be like in The Future.
+
 config VIRTIO_BALLOON
 	tristate "Virtio balloon driver (EXPERIMENTAL)"
 	select VIRTIO
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VIRTIO) += virtio.o
 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
-obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
+obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci_legacy.c
rename from drivers/virtio/virtio_pci.c
rename to drivers/virtio/virtio_pci_legacy.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -1,5 +1,5 @@
 /*
- * Virtio PCI driver
+ * Virtio PCI driver (legacy mode)
  *
  * This module allows virtio devices to be used over a virtual PCI device.
  * This can be used with QEMU based VMMs like KVM or Xen.
@@ -27,7 +27,7 @@
 #include <linux/spinlock.h>
 
 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
-MODULE_DESCRIPTION("virtio-pci");
+MODULE_DESCRIPTION("virtio-pci-legacy");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1");
 
@@ -629,7 +629,7 @@ static int __devinit virtio_pci_probe(st
 		return -ENODEV;
 
 	if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
-		printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
+		printk(KERN_ERR "virtio_pci_legacy: expected ABI version %d, got %d\n",
 		       VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
 		return -ENODEV;
 	}
@@ -654,7 +654,7 @@ static int __devinit virtio_pci_probe(st
 	if (err)
 		goto out;
 
-	err = pci_request_regions(pci_dev, "virtio-pci");
+	err = pci_request_regions(pci_dev, "virtio-pci-legacy");
 	if (err)
 		goto out_enable_device;
 
@@ -721,8 +721,8 @@ static int virtio_pci_resume(struct pci_
 }
 #endif
 
-static struct pci_driver virtio_pci_driver = {
-	.name		= "virtio-pci",
+static struct pci_driver virtio_pci_driver_legacy = {
+	.name		= "virtio-pci-legacy",
 	.id_table	= virtio_pci_id_table,
 	.probe		= virtio_pci_probe,
 	.remove		= __devexit_p(virtio_pci_remove),
@@ -734,14 +734,14 @@ static struct pci_driver virtio_pci_driv
 
 static int __init virtio_pci_init(void)
 {
-	return pci_register_driver(&virtio_pci_driver);
+	return pci_register_driver(&virtio_pci_driver_legacy);
 }
 
 module_init(virtio_pci_init);
 
 static void __exit virtio_pci_exit(void)
 {
-	pci_unregister_driver(&virtio_pci_driver);
+	pci_unregister_driver(&virtio_pci_driver_legacy);
 }
 
 module_exit(virtio_pci_exit);
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -42,56 +42,75 @@
 #include <linux/virtio_config.h>
 
 /* A 32-bit r/o bitmask of the features supported by the host */
-#define VIRTIO_PCI_HOST_FEATURES	0
+#define VIRTIO_PCI_LEGACY_HOST_FEATURES		0
 
 /* A 32-bit r/w bitmask of features activated by the guest */
-#define VIRTIO_PCI_GUEST_FEATURES	4
+#define VIRTIO_PCI_LEGACY_GUEST_FEATURES	4
 
 /* A 32-bit r/w PFN for the currently selected queue */
-#define VIRTIO_PCI_QUEUE_PFN		8
+#define VIRTIO_PCI_LEGACY_QUEUE_PFN		8
 
 /* A 16-bit r/o queue size for the currently selected queue */
-#define VIRTIO_PCI_QUEUE_NUM		12
+#define VIRTIO_PCI_LEGACY_QUEUE_NUM		12
 
 /* A 16-bit r/w queue selector */
-#define VIRTIO_PCI_QUEUE_SEL		14
+#define VIRTIO_PCI_LEGACY_QUEUE_SEL		14
 
 /* A 16-bit r/w queue notifier */
-#define VIRTIO_PCI_QUEUE_NOTIFY		16
+#define VIRTIO_PCI_LEGACY_QUEUE_NOTIFY		16
 
 /* An 8-bit device status register.  */
-#define VIRTIO_PCI_STATUS		18
+#define VIRTIO_PCI_LEGACY_STATUS		18
 
 /* An 8-bit r/o interrupt status register.  Reading the value will return the
  * current contents of the ISR and will also clear it.  This is effectively
  * a read-and-acknowledge. */
-#define VIRTIO_PCI_ISR			19
-
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG		0x2
+#define VIRTIO_PCI_LEGACY_ISR			19
 
 /* MSI-X registers: only enabled if MSI-X is enabled. */
 /* A 16-bit vector for configuration changes. */
-#define VIRTIO_MSI_CONFIG_VECTOR        20
+#define VIRTIO_MSI_LEGACY_CONFIG_VECTOR        20
 /* A 16-bit vector for selected queue notifications. */
-#define VIRTIO_MSI_QUEUE_VECTOR         22
-/* Vector value used to disable MSI for queue */
-#define VIRTIO_MSI_NO_VECTOR            0xffff
+#define VIRTIO_MSI_LEGACY_QUEUE_VECTOR         22
 
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
-#define VIRTIO_PCI_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
+#define VIRTIO_PCI_LEGACY_CONFIG(dev)		((dev)->msix_enabled ? 24 : 20)
+
+/* How many bits to shift physical queue address written to QUEUE_PFN.
+ * 12 is historical, and due to x86 page size. */
+#define VIRTIO_PCI_LEGACY_QUEUE_ADDR_SHIFT	12
+
+/* The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize again. */
+#define VIRTIO_PCI_LEGACY_VRING_ALIGN		4096
+
+#ifndef VIRTIO_PCI_NO_LEGACY
+/* Don't break compile of old userspace code.  These will go away. */
+#warning "Please support virtio_pci non-legacy mode!"
+#define VIRTIO_PCI_HOST_FEATURES VIRTIO_PCI_LEGACY_HOST_FEATURES
+#define VIRTIO_PCI_GUEST_FEATURES VIRTIO_PCI_LEGACY_GUEST_FEATURES
+#define VIRTIO_PCI_QUEUE_PFN VIRTIO_PCI_LEGACY_QUEUE_PFN
+#define VIRTIO_PCI_QUEUE_NUM VIRTIO_PCI_LEGACY_QUEUE_NUM
+#define VIRTIO_PCI_QUEUE_SEL VIRTIO_PCI_LEGACY_QUEUE_SEL
+#define VIRTIO_PCI_QUEUE_NOTIFY VIRTIO_PCI_LEGACY_QUEUE_NOTIFY
+#define VIRTIO_PCI_STATUS VIRTIO_PCI_LEGACY_STATUS
+#define VIRTIO_PCI_ISR VIRTIO_PCI_LEGACY_ISR
+#define VIRTIO_MSI_CONFIG_VECTOR VIRTIO_MSI_LEGACY_CONFIG_VECTOR
+#define VIRTIO_MSI_QUEUE_VECTOR VIRTIO_MSI_LEGACY_QUEUE_VECTOR
+#define VIRTIO_PCI_CONFIG(dev) VIRTIO_PCI_LEGACY_CONFIG(dev)
+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT VIRTIO_PCI_LEGACY_QUEUE_ADDR_SHIFT
+#define VIRTIO_PCI_VRING_ALIGN VIRTIO_PCI_LEGACY_VRING_ALIGN
+#endif /* ...!KERNEL */
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
 
-/* How many bits to shift physical queue address written to QUEUE_PFN.
- * 12 is historical, and due to x86 page size. */
-#define VIRTIO_PCI_QUEUE_ADDR_SHIFT	12
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
 
-/* The alignment to use between consumer and producer parts of vring.
- * x86 pagesize again. */
-#define VIRTIO_PCI_VRING_ALIGN		4096
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG		0x2
 
 /* IDs for different capabilities.  Must all exist. */
 /* FIXME: Do we win from separating ISR, NOTIFY and COMMON? */

^ permalink raw reply

* Re: [PATCH 1/1] Staging: hv: storvsc: Move the storage driver out of the staging area
From: Rolf Eike Beer @ 2011-12-08 10:37 UTC (permalink / raw)
  Cc: gregkh, linux-kernel, devel, virtualization, ohering,
	james.bottomley, hch, linux-scsi, K. Y. Srinivasan
In-Reply-To: <1323274574-22863-1-git-send-email-kys@microsoft.com>

> The storage driver (storvsc_drv.c) handles all block storage devices
> assigned to Linux guests hosted on Hyper-V. This driver has been in the
> staging tree for a while and this patch moves it out of the staging area.
> As per Greg's recommendation, this patch makes no changes to the
> staging/hv
> directory. Once the driver moves out of staging, we will cleanup the
> staging/hv directory.
>
> This patch includes all the patches that I have sent against the
> staging/hv
> tree to address the comments I have gotten to date on this storage driver.
>
> Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
> ---
>  drivers/scsi/Kconfig       |    7 +
>  drivers/scsi/Makefile      |    3 +
>  drivers/scsi/storvsc_drv.c | 1586
> ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 1596 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/scsi/storvsc_drv.c
>
> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
> index 06ea3bc..4910269 100644
> --- a/drivers/scsi/Kconfig
> +++ b/drivers/scsi/Kconfig
> @@ -662,6 +662,13 @@ config VMWARE_PVSCSI
>  	  To compile this driver as a module, choose M here: the
>  	  module will be called vmw_pvscsi.
>
> +config HYPERV_STORAGE
> +	tristate "Microsoft Hyper-V virtual storage driver"
> +	depends on SCSI && HYPERV
> +	default HYPERV
> +	help
> +	  Select this option to enable the Hyper-V virtual storage driver.
> +
>  config LIBFC
>  	tristate "LibFC module"
>  	select SCSI_FC_ATTRS

Maybe that help text could be slightly more verbose? Like "This is only
necessary if you plan to run this kernel as a guest on Microsoft Hyper-V"?
And I would welcome the usual "the module will be called" text for this
driver, too. See the first 2 lines of context of think hunk for a
copy&paste source of this text.

Eike

^ permalink raw reply

* Re: [PATCH] virtio-ring: Use threshold for switching to indirect descriptors
From: Sasha Levin @ 2011-12-08 10:37 UTC (permalink / raw)
  To: Rusty Russell
  Cc: markmc, kvm, Michael S. Tsirkin, linux-kernel, virtualization,
	Avi Kivity
In-Reply-To: <87r50fgzyj.fsf@rustcorp.com.au>

On Thu, 2011-12-08 at 20:14 +1030, Rusty Russell wrote:
> On Wed, 7 Dec 2011 17:48:17 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Wed, Dec 07, 2011 at 04:02:45PM +0200, Sasha Levin wrote:
> > > On Sun, 2011-12-04 at 20:23 +0200, Sasha Levin wrote:
> > > 
> > > [snip]
> > > 
> > > Rusty, Michael, does the below looks a reasonable optimization for you?
> > 
> > OK overall but a bit hard to say for sure as it looks pretty incomplete ...
> 
> A static threshold is very hackish; we need to either initialize it to
> a proven-good value (since noone will ever change it) or be cleverer.

I'll better wait to see how the threshold issue is resolved, and
possibly do it as a dynamic value which depends on the threshold.

I doubt theres one magic value which would work for all.

-- 

Sasha.

^ permalink raw reply

* [RFC 6/11] virtio_pci: don't use the legacy driver if we find the new PCI capabilities.
From: Rusty Russell @ 2011-12-08 10:38 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>


With module option to override.  I assume I can call
pci_find_capability() before pci_request_regions?
---
 drivers/virtio/virtio_pci_legacy.c |   20 +++++++++++++++++++-
 include/linux/virtio_pci.h         |   19 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -26,6 +26,10 @@
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 
+static bool force_nonlegacy;
+module_param(force_nonlegacy, bool, 0644);
+MODULE_PARM_DESC(force_nonlegacy, "Take over non-legacy virtio devices too");
+
 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
 MODULE_DESCRIPTION("virtio-pci-legacy");
 MODULE_LICENSE("GPL");
@@ -622,7 +626,7 @@ static int __devinit virtio_pci_probe(st
 				      const struct pci_device_id *id)
 {
 	struct virtio_pci_device *vp_dev;
-	int err;
+	int err, cap;
 
 	/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
 	if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
@@ -654,6 +658,21 @@ static int __devinit virtio_pci_probe(st
 	if (err)
 		goto out;
 
+	/* We leave modern virtio-pci for the modern driver. */
+	cap = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG);
+	if (cap) {
+		if (force_nonlegacy)
+			dev_info(&pci_dev->dev,
+				 "virtio_pci_legacy: forcing legacy mode!\n");
+		else {
+			dev_info(&pci_dev->dev,
+				 "virtio_pci_legacy: leaving to"
+				 " non-legacy driver\n");
+			err = -ENODEV;
+			goto out_enable_device;
+		}
+	}
+
 	err = pci_request_regions(pci_dev, "virtio-pci-legacy");
 	if (err)
 		goto out_enable_device;
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -152,4 +152,23 @@ struct virtio_pci_common_cfg {
 	__le16 queue_msix_vector;/* read-write */
 	__le64 queue_address;	/* read-write: 0xFFFFFFFFFFFFFFFF == DNE. */
 };
+
+#ifdef __KERNEL__
+/* Returns offset of the capability, or 0. */
+static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type)
+{
+	int pos;
+
+	for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+	     pos > 0;
+	     pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+		u8 type;
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 cfg_type), &type);
+		if (type == cfg_type)
+			return pos;
+	}
+	return 0;
+}
+#endif /* __KERNEL__ */
 #endif

^ permalink raw reply

* [RFC 7/11] virtio_pci: new, capability-aware driver.
From: Rusty Russell @ 2011-12-08 10:39 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

Differences:
1) Uses 4 pci capabilities to demark common, irq, notify and dev-specific areas.
2) Guest sets queue size, using host-provided maximum.
3) Guest sets queue alignment, rather than ABI-defined 4096.
4) More than 32 feature bits (a lot more!).
---
 drivers/virtio/Makefile     |    1
 drivers/virtio/virtio_pci.c |  868 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 869 insertions(+)

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_VIRTIO) += virtio.o
 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
+obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
new file mode 100644
--- /dev/null
+++ b/drivers/virtio/virtio_pci.c
@@ -0,0 +1,869 @@
+/*
+ * Virtio PCI driver
+ *
+ * This module allows virtio devices to be used over a virtual PCI
+ * device.  Copyright 2011, Rusty Russell IBM Corporation, but based
+ * on the older virtio_pci_legacy.c, which was Copyright IBM
+ * Corp. 2007.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#define VIRTIO_PCI_NO_LEGACY
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+MODULE_DESCRIPTION("virtio-pci");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("2");
+
+/* Use cacheline size as a good guess at a nice alignment. */
+#define VIRTIO_PCI_ALIGN	SMP_CACHE_BYTES
+
+/* Our device structure */
+struct virtio_pci_device
+{
+	struct virtio_device vdev;
+	struct pci_dev *pci_dev;
+
+	/* The IO mapping for the PCI config space */
+	struct virtio_pci_common_cfg __iomem *common;
+	/* Where to read and clear interrupt */ 
+	u8 __iomem *isr;
+	/* Write the virtqueue index here to notify device of activity. */
+	__le16 __iomem *notify;
+	/* Device-specific data. */
+	void __iomem *device;
+
+	/* a list of queues so we can dispatch IRQs */
+	spinlock_t lock;
+	struct list_head virtqueues;
+
+	/* MSI-X support */
+	int msix_enabled;
+	int intx_enabled;
+	struct msix_entry *msix_entries;
+	/* Name strings for interrupts. This size should be enough,
+	 * and I'm too lazy to allocate each name separately. */
+	char (*msix_names)[256];
+	/* Number of available vectors */
+	unsigned msix_vectors;
+	/* Vectors allocated, excluding per-vq vectors if any */
+	unsigned msix_used_vectors;
+	/* Whether we have vector per vq */
+	bool per_vq_vectors;
+};
+
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+	VP_MSIX_CONFIG_VECTOR = 0,
+	VP_MSIX_VQ_VECTOR = 1,
+};
+
+struct virtio_pci_vq_info
+{
+	/* the actual virtqueue */
+	struct virtqueue *vq;
+
+	/* the number of entries in the queue */
+	int num;
+
+	/* the index of the queue */
+	int queue_index;
+
+	/* the virtual address of the ring queue */
+	void *queue;
+
+	/* the list node for the virtqueues list */
+	struct list_head node;
+
+	/* MSI-X vector (or none) */
+	unsigned msix_vector;
+};
+
+/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
+static struct pci_device_id virtio_pci_id_table[] = {
+	{ 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+	{ 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
+
+/* Convert a generic virtio device to our structure */
+static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
+{
+	return container_of(vdev, struct virtio_pci_device, vdev);
+}
+
+/* There is no iowrite64.  We use two 32-bit ops. */
+static void iowrite64(u64 val, const __le64 *addr)
+{
+	iowrite32((u32)val, (__le32 *)addr);
+	iowrite32(val >> 32, (__le32 *)addr + 1);
+}
+
+/* There is no ioread64.  We use two 32-bit ops. */
+static u64 ioread64(__le64 *addr)
+{
+	return ioread32(addr) | ((u64)ioread32((__le32 *)addr + 1) << 32);
+}
+
+static u64 vp_get_features(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u64 features;
+
+	iowrite32(0, &vp_dev->common->device_feature_select);
+	features = ioread32(&vp_dev->common->device_feature);
+	iowrite32(1, &vp_dev->common->device_feature_select);
+	features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32);
+	return features;
+}
+
+static void vp_finalize_features(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
+	iowrite32(0, &vp_dev->common->guest_feature_select);
+	iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
+	iowrite32(1, &vp_dev->common->guest_feature_select);
+	iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
+}
+
+/* virtio config->get() implementation */
+static void vp_get(struct virtio_device *vdev, unsigned offset,
+		   void *buf, unsigned len)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	void __iomem *ioaddr = vp_dev->device + offset;
+	u8 *ptr = buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		ptr[i] = ioread8(ioaddr + i);
+}
+
+/* the config->set() implementation.  it's symmetric to the config->get()
+ * implementation */
+static void vp_set(struct virtio_device *vdev, unsigned offset,
+		   const void *buf, unsigned len)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	void __iomem *ioaddr = vp_dev->device + offset;
+	const u8 *ptr = buf;
+	int i;
+
+	for (i = 0; i < len; i++)
+		iowrite8(ptr[i], ioaddr + i);
+}
+
+/* config->{get,set}_status() implementations */
+static u8 vp_get_status(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	return ioread8(&vp_dev->common->device_status);
+}
+
+static void vp_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	/* We should never be setting status to 0. */
+	BUG_ON(status == 0);
+	iowrite8(status, &vp_dev->common->device_status);
+}
+
+/* wait for pending irq handlers */
+static void vp_synchronize_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled)
+		synchronize_irq(vp_dev->pci_dev->irq);
+
+	for (i = 0; i < vp_dev->msix_vectors; ++i)
+		synchronize_irq(vp_dev->msix_entries[i].vector);
+}
+
+static void vp_reset(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	/* 0 status means a reset. */
+	iowrite8(0, &vp_dev->common->device_status);
+	/* Flush out the status write, and flush in device writes,
+	 * including MSi-X interrupts, if any. */
+	ioread8(&vp_dev->common->device_status);
+	/* Flush pending VQ/configuration callbacks. */
+	vp_synchronize_vectors(vdev);
+}
+
+/* the notify function used when creating a virt queue */
+static void vp_notify(struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+	struct virtio_pci_vq_info *info = vq->priv;
+
+	/* we write the queue's selector into the notification register to
+	 * signal the other end */
+	iowrite16(info->queue_index, vp_dev->notify);
+}
+
+/* Handle a configuration change: Tell driver if it wants to know. */
+static irqreturn_t vp_config_changed(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_driver *drv;
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv->config_changed)
+		drv->config_changed(&vp_dev->vdev);
+	return IRQ_HANDLED;
+}
+
+/* Notify all virtqueues on an interrupt. */
+static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_pci_vq_info *info;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_for_each_entry(info, &vp_dev->virtqueues, node) {
+		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+	return ret;
+}
+
+/* A small wrapper to also acknowledge the interrupt when it's handled.
+ * I really need an EIO hook for the vring so I can ack the interrupt once we
+ * know that we'll be handling the IRQ but before we invoke the callback since
+ * the callback may notify the host which results in the host attempting to
+ * raise an interrupt that we would then mask once we acknowledged the
+ * interrupt. */
+static irqreturn_t vp_interrupt(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	u8 isr;
+
+	/* reading the ISR has the effect of also clearing it so it's very
+	 * important to save off the value. */
+	isr = ioread8(vp_dev->isr);
+
+	/* It's definitely not us if the ISR was not high */
+	if (!isr)
+		return IRQ_NONE;
+
+	/* Configuration change?  Tell driver if it wants to know. */
+	if (isr & VIRTIO_PCI_ISR_CONFIG)
+		vp_config_changed(irq, opaque);
+
+	return vp_vring_interrupt(irq, opaque);
+}
+
+static void vp_free_vectors(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled) {
+		free_irq(vp_dev->pci_dev->irq, vp_dev);
+		vp_dev->intx_enabled = 0;
+	}
+
+	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
+
+	if (vp_dev->msix_enabled) {
+		/* Disable the vector used for configuration */
+		iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->msix_config);
+		/* Flush the write out to device */
+		ioread16(&vp_dev->common->msix_config);
+
+		pci_disable_msix(vp_dev->pci_dev);
+		vp_dev->msix_enabled = 0;
+		vp_dev->msix_vectors = 0;
+	}
+
+	vp_dev->msix_used_vectors = 0;
+	kfree(vp_dev->msix_names);
+	vp_dev->msix_names = NULL;
+	kfree(vp_dev->msix_entries);
+	vp_dev->msix_entries = NULL;
+}
+
+static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
+				   bool per_vq_vectors)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned i, v;
+	int err = -ENOMEM;
+
+	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
+				       GFP_KERNEL);
+	if (!vp_dev->msix_entries)
+		goto error;
+	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+				     GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto error;
+
+	for (i = 0; i < nvectors; ++i)
+		vp_dev->msix_entries[i].entry = i;
+
+	/* pci_enable_msix returns positive if we can't get this many. */
+	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
+	if (err > 0)
+		err = -ENOSPC;
+	if (err)
+		goto error;
+	vp_dev->msix_vectors = nvectors;
+	vp_dev->msix_enabled = 1;
+
+	/* Set the vector used for configuration */
+	v = vp_dev->msix_used_vectors;
+	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+		 "%s-config", name);
+	err = request_irq(vp_dev->msix_entries[v].vector,
+			  vp_config_changed, 0, vp_dev->msix_names[v],
+			  vp_dev);
+	if (err)
+		goto error;
+	++vp_dev->msix_used_vectors;
+
+	iowrite16(v, &vp_dev->common->msix_config);
+	/* Verify we had enough resources to assign the vector */
+	v = ioread16(&vp_dev->common->msix_config);
+	if (v == VIRTIO_MSI_NO_VECTOR) {
+		err = -EBUSY;
+		goto error;
+	}
+
+	if (!per_vq_vectors) {
+		/* Shared vector for all VQs */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-virtqueues", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error;
+		++vp_dev->msix_used_vectors;
+	}
+	return 0;
+error:
+	vp_free_vectors(vdev);
+	return err;
+}
+
+static int vp_request_intx(struct virtio_device *vdev)
+{
+	int err;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
+	if (!err)
+		vp_dev->intx_enabled = 1;
+	return err;
+}
+
+static void *alloc_virtqueue_pages(u16 *num)
+{
+	void *pages;
+
+	/* 1024 entries uses about 32k */
+	if (*num > 1024)
+		*num = 1024;
+
+	for (; *num; *num /= 2) {
+		size_t size = PAGE_ALIGN(vring_size(*num, VIRTIO_PCI_ALIGN));
+		pages = alloc_pages_exact(size,
+					  GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
+		if (pages)
+			return pages;
+	}
+	return NULL;
+}
+
+static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
+				  void (*callback)(struct virtqueue *vq),
+				  const char *name,
+				  u16 msix_vec)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_vq_info *info;
+	struct virtqueue *vq;
+	u16 num;
+	int err;
+
+	/* Select the queue we're interested in */
+	iowrite16(index, &vp_dev->common->queue_select);
+
+	switch (ioread64(&vp_dev->common->queue_address)) {
+	case 0xFFFFFFFFFFFFFFFFULL:
+		return ERR_PTR(-ENOENT);
+	case 0:
+		/* Uninitialized.  Excellent. */
+		break;
+	default:
+		/* We've already set this up? */
+		return ERR_PTR(-EBUSY);
+	}
+
+	/* Maximum size must be a power of 2. */
+	num = ioread16(&vp_dev->common->queue_size);
+	if (num & (num - 1)) {
+		dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* allocate and fill out our structure the represents an active
+	 * queue */
+	info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	info->queue_index = index;
+	info->msix_vector = msix_vec;
+
+	info->queue = alloc_virtqueue_pages(&num);
+	if (info->queue == NULL) {
+		err = -ENOMEM;
+		goto out_info;
+	}
+	info->num = num;
+
+	/* create the vring */
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_ALIGN,
+				 vdev, info->queue, vp_notify, callback, name);
+	if (!vq) {
+		err = -ENOMEM;
+		goto out_alloc_pages;
+	}
+
+	vq->priv = info;
+	info->vq = vq;
+
+	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
+		iowrite16(msix_vec, &vp_dev->common->queue_msix_vector);
+		msix_vec = ioread16(&vp_dev->common->queue_msix_vector);
+		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto out_new_virtqueue;
+		}
+	}
+
+	if (callback) {
+		unsigned long flags;
+		spin_lock_irqsave(&vp_dev->lock, flags);
+		list_add(&info->node, &vp_dev->virtqueues);
+		spin_unlock_irqrestore(&vp_dev->lock, flags);
+	} else {
+		INIT_LIST_HEAD(&info->node);
+	}
+
+	/* Activate the queue. */
+	iowrite64(virt_to_phys(info->queue), &vp_dev->common->queue_address);
+	iowrite16(VIRTIO_PCI_ALIGN, &vp_dev->common->queue_align);
+	iowrite16(num, &vp_dev->common->queue_size);
+
+	return vq;
+
+out_new_virtqueue:
+	vring_del_virtqueue(vq);
+out_alloc_pages:
+	free_pages_exact(info->queue,
+			 PAGE_ALIGN(vring_size(num, VIRTIO_PCI_ALIGN)));
+out_info:
+	kfree(info);
+	return ERR_PTR(err);
+}
+
+static void vp_del_vq(struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+	struct virtio_pci_vq_info *info = vq->priv;
+	unsigned long flags, size;
+
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_del(&info->node);
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+	/* Select and deactivate the queue */
+	iowrite16(info->queue_index, &vp_dev->common->queue_select);
+
+	if (vp_dev->msix_enabled) {
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  &vp_dev->common->queue_msix_vector);
+		/* Flush the write out to device */
+		ioread16(&vp_dev->common->queue_msix_vector);
+	}
+
+	vring_del_virtqueue(vq);
+
+	/* This is for our own benefit, not the device's! */
+	iowrite64(0, &vp_dev->common->queue_address);
+	iowrite16(0, &vp_dev->common->queue_size);
+	iowrite16(0, &vp_dev->common->queue_align);
+
+	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_ALIGN));
+	free_pages_exact(info->queue, size);
+	kfree(info);
+}
+
+/* the config->del_vqs() implementation */
+static void vp_del_vqs(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtqueue *vq, *n;
+	struct virtio_pci_vq_info *info;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
+		info = vq->priv;
+		if (vp_dev->per_vq_vectors &&
+			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
+			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
+				 vq);
+		vp_del_vq(vq);
+	}
+	vp_dev->per_vq_vectors = false;
+
+	vp_free_vectors(vdev);
+}
+
+static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			      struct virtqueue *vqs[],
+			      vq_callback_t *callbacks[],
+			      const char *names[],
+			      bool use_msix,
+			      bool per_vq_vectors)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u16 msix_vec;
+	int i, err, nvectors, allocated_vectors;
+
+	if (!use_msix) {
+		/* Old style: one normal interrupt for change and all vqs. */
+		err = vp_request_intx(vdev);
+		if (err)
+			goto error_request;
+	} else {
+		if (per_vq_vectors) {
+			/* Best option: one for change interrupt, one per vq. */
+			nvectors = 1;
+			for (i = 0; i < nvqs; ++i)
+				if (callbacks[i])
+					++nvectors;
+		} else {
+			/* Second best: one for change, shared for all vqs. */
+			nvectors = 2;
+		}
+
+		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
+		if (err)
+			goto error_request;
+	}
+
+	vp_dev->per_vq_vectors = per_vq_vectors;
+	allocated_vectors = vp_dev->msix_used_vectors;
+	for (i = 0; i < nvqs; ++i) {
+		if (!callbacks[i] || !vp_dev->msix_enabled)
+			msix_vec = VIRTIO_MSI_NO_VECTOR;
+		else if (vp_dev->per_vq_vectors)
+			msix_vec = allocated_vectors++;
+		else
+			msix_vec = VP_MSIX_VQ_VECTOR;
+		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
+		if (IS_ERR(vqs[i])) {
+			err = PTR_ERR(vqs[i]);
+			goto error_find;
+		}
+
+		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
+			continue;
+
+		/* allocate per-vq irq if available and necessary */
+		snprintf(vp_dev->msix_names[msix_vec],
+			 sizeof *vp_dev->msix_names,
+			 "%s-%s",
+			 dev_name(&vp_dev->vdev.dev), names[i]);
+		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
+				  vring_interrupt, 0,
+				  vp_dev->msix_names[msix_vec],
+				  vqs[i]);
+		if (err) {
+			vp_del_vq(vqs[i]);
+			goto error_find;
+		}
+	}
+	return 0;
+
+error_find:
+	vp_del_vqs(vdev);
+
+error_request:
+	return err;
+}
+
+/* the config->find_vqs() implementation */
+static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	int err;
+
+	/* Try MSI-X with one vector per queue. */
+	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
+	if (!err)
+		return 0;
+	/* Fallback: MSI-X with one vector for config, one shared for queues. */
+	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
+				 true, false);
+	if (!err)
+		return 0;
+	/* Finally fall back to regular interrupts. */
+	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
+				  false, false);
+}
+
+static struct virtio_config_ops virtio_pci_config_ops = {
+	.get		= vp_get,
+	.set		= vp_set,
+	.get_status	= vp_get_status,
+	.set_status	= vp_set_status,
+	.reset		= vp_reset,
+	.find_vqs	= vp_find_vqs,
+	.del_vqs	= vp_del_vqs,
+	.get_features	= vp_get_features,
+	.finalize_features = vp_finalize_features,
+};
+
+static void virtio_pci_release_dev(struct device *_d)
+{
+	/*
+	 * No need for a release method as we allocate/free
+	 * all devices together with the pci devices.
+	 * Provide an empty one to avoid getting a warning from core.
+	 */
+}
+
+static void __iomem *map_capability(struct pci_dev *dev, int off, size_t expect)
+{
+	u8 bar;
+	u32 offset, length;
+	void __iomem *p;
+
+	pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap, bar),
+			     &bar);
+	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
+			     &offset);
+	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
+			     &length);
+
+	if (length < expect) {
+		/* FIXME: I assume we want to report errors as PCI device? */
+		dev_err(&dev->dev,
+			"virtio_pci: small capability len %u (%u expected)\n",
+			length, expect);
+		return NULL;
+	}		
+
+	p = pci_iomap_range(dev, bar, offset, length, PAGE_SIZE);
+	if (!p)
+		/* FIXME: I assume we want to report errors as PCI device? */
+		dev_err(&dev->dev,
+			"virtio_pci: unable to map virtio %u@%u on bar %i\n",
+			length, offset, bar);
+	return p;
+}
+
+
+/* the PCI probing function */
+static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
+				      const struct pci_device_id *id)
+{
+	struct virtio_pci_device *vp_dev;
+	int err, common, isr, notify, device;
+
+	/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
+	if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
+		return -ENODEV;
+
+	if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
+		printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
+		       VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
+		return -ENODEV;
+	}
+
+	/* allocate our structure and fill it out */
+	vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
+	if (vp_dev == NULL)
+		return -ENOMEM;
+
+	vp_dev->vdev.dev.parent = &pci_dev->dev;
+	vp_dev->vdev.dev.release = virtio_pci_release_dev;
+	vp_dev->vdev.config = &virtio_pci_config_ops;
+	vp_dev->pci_dev = pci_dev;
+	INIT_LIST_HEAD(&vp_dev->virtqueues);
+	spin_lock_init(&vp_dev->lock);
+
+	/* Disable MSI/MSIX to bring device to a known good state. */
+	pci_msi_off(pci_dev);
+
+	/* enable the device */
+	err = pci_enable_device(pci_dev);
+	if (err)
+		goto out;
+
+	/* check for a legacy bar0 device. */
+	common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG);
+	if (!common) {
+		dev_info(&pci_dev->dev,
+			 "virtio_pci: leaving for legacy driver\n");
+		err = -ENODEV;
+		goto out_enable_device;
+	}
+	isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG);
+	notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG);
+	device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG);
+	if (!isr || !notify || !device) {
+		dev_err(&pci_dev->dev,
+			"virtio_pci: missing capabilities %i/%i/%i/%i\n",
+			common, isr, notify, device);
+		goto out_enable_device;
+	}
+
+	err = pci_request_regions(pci_dev, "virtio-pci");
+	if (err)
+		goto out_enable_device;
+
+	vp_dev->common = map_capability(pci_dev, common,
+					sizeof(struct virtio_pci_common_cfg));
+	if (!vp_dev->common)
+		goto out_req_regions;
+	vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8));
+	if (!vp_dev->isr)
+		goto out_map_common;
+	vp_dev->notify = map_capability(pci_dev, notify, sizeof(u16));
+	if (!vp_dev->notify)
+		goto out_map_isr;
+	vp_dev->device = map_capability(pci_dev, device, 0);
+	if (!vp_dev->device)
+		goto out_map_notify;
+
+	pci_set_drvdata(pci_dev, vp_dev);
+	pci_set_master(pci_dev);
+
+	/* we use the subsystem vendor/device id as the virtio vendor/device
+	 * id.  this allows us to use the same PCI vendor/device id for all
+	 * virtio devices and to identify the particular virtio driver by
+	 * the subsystem ids */
+	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
+	vp_dev->vdev.id.device = pci_dev->subsystem_device;
+
+	/* finally register the virtio device */
+	err = register_virtio_device(&vp_dev->vdev);
+	if (err)
+		goto out_set_drvdata;
+
+	return 0;
+
+out_set_drvdata:
+	pci_set_drvdata(pci_dev, NULL);
+	pci_iounmap(pci_dev, vp_dev->device);
+out_map_notify:
+	pci_iounmap(pci_dev, vp_dev->notify);
+out_map_isr:
+	pci_iounmap(pci_dev, vp_dev->isr);
+out_map_common:
+	pci_iounmap(pci_dev, vp_dev->common);
+out_req_regions:
+	pci_release_regions(pci_dev);
+out_enable_device:
+	pci_disable_device(pci_dev);
+out:
+	kfree(vp_dev);
+	return err;
+}
+
+static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)
+{
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+
+	unregister_virtio_device(&vp_dev->vdev);
+
+	vp_del_vqs(&vp_dev->vdev);
+	pci_set_drvdata(pci_dev, NULL);
+	pci_iounmap(pci_dev, vp_dev->device);
+	pci_iounmap(pci_dev, vp_dev->notify);
+	pci_iounmap(pci_dev, vp_dev->isr);
+	pci_iounmap(pci_dev, vp_dev->common);
+	pci_release_regions(pci_dev);
+	pci_disable_device(pci_dev);
+	kfree(vp_dev);
+}
+
+#ifdef CONFIG_PM
+static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state)
+{
+	pci_save_state(pci_dev);
+	pci_set_power_state(pci_dev, PCI_D3hot);
+	return 0;
+}
+
+static int virtio_pci_resume(struct pci_dev *pci_dev)
+{
+	pci_restore_state(pci_dev);
+	pci_set_power_state(pci_dev, PCI_D0);
+	return 0;
+}
+#endif
+
+static struct pci_driver virtio_pci_driver = {
+	.name		= "virtio-pci",
+	.id_table	= virtio_pci_id_table,
+	.probe		= virtio_pci_probe,
+	.remove		= __devexit_p(virtio_pci_remove),
+#ifdef CONFIG_PM
+	.suspend	= virtio_pci_suspend,
+	.resume		= virtio_pci_resume,
+#endif
+};
+
+static int __init virtio_pci_init(void)
+{
+	return pci_register_driver(&virtio_pci_driver);
+}
+
+module_init(virtio_pci_init);
+
+static void __exit virtio_pci_exit(void)
+{
+	pci_unregister_driver(&virtio_pci_driver);
+}
+
+module_exit(virtio_pci_exit);

^ permalink raw reply

* [RFC 8/11] virtio_pci: share structure between legacy and modern.
From: Rusty Russell @ 2011-12-08 10:40 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

They're almost identical: we add a "legacy" ioregion (what was
"ioaddr" in the legacy driver), and move it out to virtio_pci-common.h.
---
 drivers/virtio/virtio_pci-common.h |   72 ++++++++++++++++++++++
 drivers/virtio/virtio_pci.c        |   64 -------------------
 drivers/virtio/virtio_pci_legacy.c |  120 +++++++++----------------------------
 3 files changed, 105 insertions(+), 151 deletions(-)

diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h
new file mode 100644
--- /dev/null
+++ b/drivers/virtio/virtio_pci-common.h
@@ -0,0 +1,72 @@
+#include <linux/pci.h>
+#include <linux/virtio_pci.h>
+
+/* Our device structure: shared by virtio_pci and virtio_pci_legacy. */
+struct virtio_pci_device
+{
+	struct virtio_device vdev;
+	struct pci_dev *pci_dev;
+
+	/* The IO mapping for the PCI config space (non-legacy mode) */
+	struct virtio_pci_common_cfg __iomem *common;
+	/* Device-specific data (non-legacy mode). */
+	void __iomem *device;
+
+	/* In legacy mode, these two point to within ->legacy. */
+	/* Where to read and clear interrupt */ 
+	u8 __iomem *isr;
+	/* Write the virtqueue index here to notify device of activity. */
+	__le16 __iomem *notify;
+
+#ifdef CONFIG_VIRTIO_PCI_LEGACY
+	/* Instead of common, notify and device, legacy uses this: */
+	void __iomem *legacy;
+#endif
+
+	/* a list of queues so we can dispatch IRQs */
+	spinlock_t lock;
+	struct list_head virtqueues;
+
+	/* MSI-X support */
+	int msix_enabled;
+	int intx_enabled;
+	struct msix_entry *msix_entries;
+	/* Name strings for interrupts. This size should be enough,
+	 * and I'm too lazy to allocate each name separately. */
+	char (*msix_names)[256];
+	/* Number of available vectors */
+	unsigned msix_vectors;
+	/* Vectors allocated, excluding per-vq vectors if any */
+	unsigned msix_used_vectors;
+	/* Whether we have vector per vq */
+	bool per_vq_vectors;
+};
+
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+	VP_MSIX_CONFIG_VECTOR = 0,
+	VP_MSIX_VQ_VECTOR = 1,
+};
+
+struct virtio_pci_vq_info
+{
+	/* the actual virtqueue */
+	struct virtqueue *vq;
+
+	/* the number of entries in the queue */
+	int num;
+
+	/* the index of the queue */
+	int queue_index;
+
+	/* the virtual address of the ring queue */
+	void *queue;
+
+	/* the list node for the virtqueues list */
+	struct list_head node;
+
+	/* MSI-X vector (or none) */
+	unsigned msix_vector;
+};
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -22,6 +22,7 @@
 #include <linux/virtio_pci.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
+#include "virtio_pci-common.h"
 
 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 MODULE_DESCRIPTION("virtio-pci");
@@ -31,69 +32,6 @@ MODULE_VERSION("2");
 /* Use cacheline size as a good guess at a nice alignment. */
 #define VIRTIO_PCI_ALIGN	SMP_CACHE_BYTES
 
-/* Our device structure */
-struct virtio_pci_device
-{
-	struct virtio_device vdev;
-	struct pci_dev *pci_dev;
-
-	/* The IO mapping for the PCI config space */
-	struct virtio_pci_common_cfg __iomem *common;
-	/* Where to read and clear interrupt */ 
-	u8 __iomem *isr;
-	/* Write the virtqueue index here to notify device of activity. */
-	__le16 __iomem *notify;
-	/* Device-specific data. */
-	void __iomem *device;
-
-	/* a list of queues so we can dispatch IRQs */
-	spinlock_t lock;
-	struct list_head virtqueues;
-
-	/* MSI-X support */
-	int msix_enabled;
-	int intx_enabled;
-	struct msix_entry *msix_entries;
-	/* Name strings for interrupts. This size should be enough,
-	 * and I'm too lazy to allocate each name separately. */
-	char (*msix_names)[256];
-	/* Number of available vectors */
-	unsigned msix_vectors;
-	/* Vectors allocated, excluding per-vq vectors if any */
-	unsigned msix_used_vectors;
-	/* Whether we have vector per vq */
-	bool per_vq_vectors;
-};
-
-/* Constants for MSI-X */
-/* Use first vector for configuration changes, second and the rest for
- * virtqueues Thus, we need at least 2 vectors for MSI. */
-enum {
-	VP_MSIX_CONFIG_VECTOR = 0,
-	VP_MSIX_VQ_VECTOR = 1,
-};
-
-struct virtio_pci_vq_info
-{
-	/* the actual virtqueue */
-	struct virtqueue *vq;
-
-	/* the number of entries in the queue */
-	int num;
-
-	/* the index of the queue */
-	int queue_index;
-
-	/* the virtual address of the ring queue */
-	void *queue;
-
-	/* the list node for the virtqueues list */
-	struct list_head node;
-
-	/* MSI-X vector (or none) */
-	unsigned msix_vector;
-};
-
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
 static struct pci_device_id virtio_pci_id_table[] = {
 	{ 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -25,6 +25,7 @@
 #include <linux/virtio_pci.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
+#include "virtio_pci-common.h"
 
 static bool force_nonlegacy;
 module_param(force_nonlegacy, bool, 0644);
@@ -35,63 +36,6 @@ MODULE_DESCRIPTION("virtio-pci-legacy");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1");
 
-/* Our device structure */
-struct virtio_pci_device
-{
-	struct virtio_device vdev;
-	struct pci_dev *pci_dev;
-
-	/* the IO mapping for the PCI config space */
-	void __iomem *ioaddr;
-
-	/* a list of queues so we can dispatch IRQs */
-	spinlock_t lock;
-	struct list_head virtqueues;
-
-	/* MSI-X support */
-	int msix_enabled;
-	int intx_enabled;
-	struct msix_entry *msix_entries;
-	/* Name strings for interrupts. This size should be enough,
-	 * and I'm too lazy to allocate each name separately. */
-	char (*msix_names)[256];
-	/* Number of available vectors */
-	unsigned msix_vectors;
-	/* Vectors allocated, excluding per-vq vectors if any */
-	unsigned msix_used_vectors;
-	/* Whether we have vector per vq */
-	bool per_vq_vectors;
-};
-
-/* Constants for MSI-X */
-/* Use first vector for configuration changes, second and the rest for
- * virtqueues Thus, we need at least 2 vectors for MSI. */
-enum {
-	VP_MSIX_CONFIG_VECTOR = 0,
-	VP_MSIX_VQ_VECTOR = 1,
-};
-
-struct virtio_pci_vq_info
-{
-	/* the actual virtqueue */
-	struct virtqueue *vq;
-
-	/* the number of entries in the queue */
-	int num;
-
-	/* the index of the queue */
-	int queue_index;
-
-	/* the virtual address of the ring queue */
-	void *queue;
-
-	/* the list node for the virtqueues list */
-	struct list_head node;
-
-	/* MSI-X vector (or none) */
-	unsigned msix_vector;
-};
-
 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
 static struct pci_device_id virtio_pci_id_table[] = {
 	{ 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
@@ -112,7 +56,7 @@ static u64 vp_get_features(struct virtio
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
 	/* We only support 32 feature bits. */
-	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+	return ioread32(vp_dev->legacy + VIRTIO_PCI_HOST_FEATURES);
 }
 
 /* virtio config->finalize_features() implementation */
@@ -124,7 +68,7 @@ static void vp_finalize_features(struct 
 	vring_transport_features(vdev);
 
 	/* We only support 32 feature bits. */
-	iowrite32(vdev->features, vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
+	iowrite32(vdev->features, vp_dev->legacy+VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
@@ -132,13 +76,13 @@ static void vp_get(struct virtio_device 
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
+	void __iomem *legacy = vp_dev->legacy +
 				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	u8 *ptr = buf;
 	int i;
 
 	for (i = 0; i < len; i++)
-		ptr[i] = ioread8(ioaddr + i);
+		ptr[i] = ioread8(legacy + i);
 }
 
 /* the config->set() implementation.  it's symmetric to the config->get()
@@ -147,20 +91,20 @@ static void vp_set(struct virtio_device 
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
+	void __iomem *legacy = vp_dev->legacy +
 				VIRTIO_PCI_CONFIG(vp_dev) + offset;
 	const u8 *ptr = buf;
 	int i;
 
 	for (i = 0; i < len; i++)
-		iowrite8(ptr[i], ioaddr + i);
+		iowrite8(ptr[i], legacy + i);
 }
 
 /* config->{get,set}_status() implementations */
 static u8 vp_get_status(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	return ioread8(vp_dev->legacy + VIRTIO_PCI_STATUS);
 }
 
 static void vp_set_status(struct virtio_device *vdev, u8 status)
@@ -168,7 +112,7 @@ static void vp_set_status(struct virtio_
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* We should never be setting status to 0. */
 	BUG_ON(status == 0);
-	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	iowrite8(status, vp_dev->legacy + VIRTIO_PCI_STATUS);
 }
 
 /* wait for pending irq handlers */
@@ -188,10 +132,10 @@ static void vp_reset(struct virtio_devic
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
-	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	iowrite8(0, vp_dev->legacy + VIRTIO_PCI_STATUS);
 	/* Flush out the status write, and flush in device writes,
 	 * including MSi-X interrupts, if any. */
-	ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	ioread8(vp_dev->legacy + VIRTIO_PCI_STATUS);
 	/* Flush pending VQ/configuration callbacks. */
 	vp_synchronize_vectors(vdev);
 }
@@ -204,7 +148,7 @@ static void vp_notify(struct virtqueue *
 
 	/* we write the queue's selector into the notification register to
 	 * signal the other end */
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+	iowrite16(info->queue_index, vp_dev->legacy + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
@@ -251,7 +195,7 @@ static irqreturn_t vp_interrupt(int irq,
 
 	/* reading the ISR has the effect of also clearing it so it's very
 	 * important to save off the value. */
-	isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+	isr = ioread8(vp_dev->legacy + VIRTIO_PCI_ISR);
 
 	/* It's definitely not us if the ISR was not high */
 	if (!isr)
@@ -280,9 +224,9 @@ static void vp_free_vectors(struct virti
 	if (vp_dev->msix_enabled) {
 		/* Disable the vector used for configuration */
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+			  vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
 		/* Flush the write out to device */
-		ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+		ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
 
 		pci_disable_msix(vp_dev->pci_dev);
 		vp_dev->msix_enabled = 0;
@@ -336,9 +280,9 @@ static int vp_request_msix_vectors(struc
 		goto error;
 	++vp_dev->msix_used_vectors;
 
-	iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+	iowrite16(v, vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
 	/* Verify we had enough resources to assign the vector */
-	v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+	v = ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
 	if (v == VIRTIO_MSI_NO_VECTOR) {
 		err = -EBUSY;
 		goto error;
@@ -387,11 +331,11 @@ static struct virtqueue *setup_vq(struct
 	int err;
 
 	/* Select the queue we're interested in */
-	iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(index, vp_dev->legacy + VIRTIO_PCI_QUEUE_SEL);
 
 	/* Check if queue is either not available or already active. */
-	num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
-	if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
+	num = ioread16(vp_dev->legacy + VIRTIO_PCI_QUEUE_NUM);
+	if (!num || ioread32(vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN))
 		return ERR_PTR(-ENOENT);
 
 	/* allocate and fill out our structure the represents an active
@@ -413,7 +357,7 @@ static struct virtqueue *setup_vq(struct
 
 	/* activate the queue */
 	iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
-		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+		  vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
@@ -427,8 +371,8 @@ static struct virtqueue *setup_vq(struct
 	info->vq = vq;
 
 	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-		iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		iowrite16(msix_vec, vp_dev->legacy + VIRTIO_MSI_QUEUE_VECTOR);
+		msix_vec = ioread16(vp_dev->legacy + VIRTIO_MSI_QUEUE_VECTOR);
 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			err = -EBUSY;
 			goto out_assign;
@@ -448,7 +392,7 @@ static struct virtqueue *setup_vq(struct
 out_assign:
 	vring_del_virtqueue(vq);
 out_activate_queue:
-	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	iowrite32(0, vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN);
 	free_pages_exact(info->queue, size);
 out_info:
 	kfree(info);
@@ -465,19 +409,19 @@ static void vp_del_vq(struct virtqueue *
 	list_del(&info->node);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(info->queue_index, vp_dev->legacy + VIRTIO_PCI_QUEUE_SEL);
 
 	if (vp_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+			  vp_dev->legacy + VIRTIO_MSI_QUEUE_VECTOR);
 		/* Flush the write out to device */
-		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+		ioread8(vp_dev->legacy + VIRTIO_PCI_ISR);
 	}
 
 	vring_del_virtqueue(vq);
 
 	/* Select and deactivate the queue */
-	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	iowrite32(0, vp_dev->legacy + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
 	free_pages_exact(info->queue, size);
@@ -676,8 +620,8 @@ static int __devinit virtio_pci_probe(st
 	if (err)
 		goto out_enable_device;
 
-	vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
-	if (vp_dev->ioaddr == NULL)
+	vp_dev->legacy = pci_iomap(pci_dev, 0, 0);
+	if (vp_dev->legacy == NULL)
 		goto out_req_regions;
 
 	pci_set_drvdata(pci_dev, vp_dev);
@@ -699,7 +643,7 @@ static int __devinit virtio_pci_probe(st
 
 out_set_drvdata:
 	pci_set_drvdata(pci_dev, NULL);
-	pci_iounmap(pci_dev, vp_dev->ioaddr);
+	pci_iounmap(pci_dev, vp_dev->legacy);
 out_req_regions:
 	pci_release_regions(pci_dev);
 out_enable_device:
@@ -717,7 +661,7 @@ static void __devexit virtio_pci_remove(
 
 	vp_del_vqs(&vp_dev->vdev);
 	pci_set_drvdata(pci_dev, NULL);
-	pci_iounmap(pci_dev, vp_dev->ioaddr);
+	pci_iounmap(pci_dev, vp_dev->legacy);
 	pci_release_regions(pci_dev);
 	pci_disable_device(pci_dev);
 	kfree(vp_dev);

^ permalink raw reply

* [RFC 9/11] virtio_pci: share interrupt/notify handlers between legacy and modern.
From: Rusty Russell @ 2011-12-08 10:41 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

If we make the legacy driver set up the ->notify and ->isr pointers in
the struct virtio_pci_device structure, we can use them in common code
(the positions have changed, but the semantics haven't).
---
 drivers/virtio/Makefile            |    4 -
 drivers/virtio/virtio_pci-common.c |   81 ++++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci-common.h |   34 +++++++++++++
 drivers/virtio/virtio_pci.c        |   87 ++---------------------------------
 drivers/virtio/virtio_pci_legacy.c |   91 ++++---------------------------------
 5 files changed, 135 insertions(+), 162 deletions(-)

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_VIRTIO) += virtio.o
 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
-obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
-obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
+obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-common.o
+obj-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o virtio_pci-common.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_pci-common.c b/drivers/virtio/virtio_pci-common.c
new file mode 100644
--- /dev/null
+++ b/drivers/virtio/virtio_pci-common.c
@@ -0,0 +1,81 @@
+/*
+ * Virtio PCI driver - common code for legacy and non-legacy.
+ *
+ * Copyright 2011, Rusty Russell IBM Corporation, but based on the
+ * older virtio_pci_legacy.c, which was Copyright IBM Corp. 2007.  But
+ * most of the interrupt setup code was written by Michael S. Tsirkin.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#define VIRTIO_PCI_NO_LEGACY
+#include "virtio_pci-common.h"
+#include <linux/virtio_ring.h>
+
+/* the notify function used when creating a virt queue */
+void virtio_pci_notify(struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+	struct virtio_pci_vq_info *info = vq->priv;
+
+	/* we write the queue's selector into the notification register to
+	 * signal the other end */
+	iowrite16(info->queue_index, vp_dev->notify);
+}
+
+/* Handle a configuration change: Tell driver if it wants to know. */
+irqreturn_t virtio_pci_config_changed(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_driver *drv;
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv->config_changed)
+		drv->config_changed(&vp_dev->vdev);
+	return IRQ_HANDLED;
+}
+
+/* Notify all virtqueues on an interrupt. */
+irqreturn_t virtio_pci_vring_interrupt(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_pci_vq_info *info;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_for_each_entry(info, &vp_dev->virtqueues, node) {
+		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+	return ret;
+}
+
+/* A small wrapper to also acknowledge the interrupt when it's handled.
+ * I really need an EIO hook for the vring so I can ack the interrupt once we
+ * know that we'll be handling the IRQ but before we invoke the callback since
+ * the callback may notify the host which results in the host attempting to
+ * raise an interrupt that we would then mask once we acknowledged the
+ * interrupt. */
+irqreturn_t virtio_pci_interrupt(int irq, void *opaque)
+{
+	struct virtio_pci_device *vp_dev = opaque;
+	u8 isr;
+
+	/* reading the ISR has the effect of also clearing it so it's very
+	 * important to save off the value. */
+	isr = ioread8(vp_dev->isr);
+
+	/* It's definitely not us if the ISR was not high */
+	if (!isr)
+		return IRQ_NONE;
+
+	/* Configuration change?  Tell driver if it wants to know. */
+	if (isr & VIRTIO_PCI_ISR_CONFIG)
+		virtio_pci_config_changed(irq, opaque);
+
+	return virtio_pci_vring_interrupt(irq, opaque);
+}
diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h
--- a/drivers/virtio/virtio_pci-common.h
+++ b/drivers/virtio/virtio_pci-common.h
@@ -42,6 +42,12 @@ struct virtio_pci_device
 	bool per_vq_vectors;
 };
 
+/* Convert a generic virtio device to our structure */
+static inline struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
+{
+	return container_of(vdev, struct virtio_pci_device, vdev);
+}
+
 /* Constants for MSI-X */
 /* Use first vector for configuration changes, second and the rest for
  * virtqueues Thus, we need at least 2 vectors for MSI. */
@@ -70,3 +76,31 @@ struct virtio_pci_vq_info
 	/* MSI-X vector (or none) */
 	unsigned msix_vector;
 };
+
+/* the notify function used when creating a virt queue */
+void virtio_pci_notify(struct virtqueue *vq);
+/* Handle a configuration change: Tell driver if it wants to know. */
+irqreturn_t virtio_pci_config_changed(int irq, void *opaque);
+/* Notify all virtqueues on an interrupt. */
+irqreturn_t virtio_pci_vring_interrupt(int irq, void *opaque);
+/* Acknowledge, check for config or vq interrupt. */
+irqreturn_t virtio_pci_interrupt(int irq, void *opaque);
+
+/* Core of a config->find_vqs() implementation */
+int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev,
+			__le16 __iomem *msix_config,
+			struct virtqueue *(setup_vq)(struct virtio_pci_device *,
+						     unsigned,
+						     void (*)(struct virtqueue*),
+						     const char *,
+						     u16 msix_vec),
+			void (*del_vq)(struct virtqueue *vq),
+			unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[]);
+
+/* the core of a config->del_vqs() implementation */
+void virtio_pci_del_vqs(struct virtio_pci_device *vp_dev,
+			__le16 __iomem *msix_config,
+			void (*del_vq)(struct virtqueue *vq));
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -40,12 +40,6 @@ static struct pci_device_id virtio_pci_i
 
 MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
 
-/* Convert a generic virtio device to our structure */
-static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
-{
-	return container_of(vdev, struct virtio_pci_device, vdev);
-}
-
 /* There is no iowrite64.  We use two 32-bit ops. */
 static void iowrite64(u64 val, const __le64 *addr)
 {
@@ -151,74 +145,6 @@ static void vp_reset(struct virtio_devic
 	vp_synchronize_vectors(vdev);
 }
 
-/* the notify function used when creating a virt queue */
-static void vp_notify(struct virtqueue *vq)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
-	struct virtio_pci_vq_info *info = vq->priv;
-
-	/* we write the queue's selector into the notification register to
-	 * signal the other end */
-	iowrite16(info->queue_index, vp_dev->notify);
-}
-
-/* Handle a configuration change: Tell driver if it wants to know. */
-static irqreturn_t vp_config_changed(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_driver *drv;
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
-	if (drv->config_changed)
-		drv->config_changed(&vp_dev->vdev);
-	return IRQ_HANDLED;
-}
-
-/* Notify all virtqueues on an interrupt. */
-static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_pci_vq_info *info;
-	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_for_each_entry(info, &vp_dev->virtqueues, node) {
-		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
-			ret = IRQ_HANDLED;
-	}
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-	return ret;
-}
-
-/* A small wrapper to also acknowledge the interrupt when it's handled.
- * I really need an EIO hook for the vring so I can ack the interrupt once we
- * know that we'll be handling the IRQ but before we invoke the callback since
- * the callback may notify the host which results in the host attempting to
- * raise an interrupt that we would then mask once we acknowledged the
- * interrupt. */
-static irqreturn_t vp_interrupt(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	u8 isr;
-
-	/* reading the ISR has the effect of also clearing it so it's very
-	 * important to save off the value. */
-	isr = ioread8(vp_dev->isr);
-
-	/* It's definitely not us if the ISR was not high */
-	if (!isr)
-		return IRQ_NONE;
-
-	/* Configuration change?  Tell driver if it wants to know. */
-	if (isr & VIRTIO_PCI_ISR_CONFIG)
-		vp_config_changed(irq, opaque);
-
-	return vp_vring_interrupt(irq, opaque);
-}
-
 static void vp_free_vectors(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -284,7 +210,7 @@ static int vp_request_msix_vectors(struc
 	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 		 "%s-config", name);
 	err = request_irq(vp_dev->msix_entries[v].vector,
-			  vp_config_changed, 0, vp_dev->msix_names[v],
+			  virtio_pci_config_changed, 0, vp_dev->msix_names[v],
 			  vp_dev);
 	if (err)
 		goto error;
@@ -304,8 +230,8 @@ static int vp_request_msix_vectors(struc
 		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 			 "%s-virtqueues", name);
 		err = request_irq(vp_dev->msix_entries[v].vector,
-				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
-				  vp_dev);
+				  virtio_pci_vring_interrupt, 0,
+				  vp_dev->msix_names[v], vp_dev);
 		if (err)
 			goto error;
 		++vp_dev->msix_used_vectors;
@@ -321,7 +247,7 @@ static int vp_request_intx(struct virtio
 	int err;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+	err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt,
 			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
 	if (!err)
 		vp_dev->intx_enabled = 1;
@@ -396,7 +322,8 @@ static struct virtqueue *setup_vq(struct
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+				 vdev, info->queue, virtio_pci_notify,
+				 callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_alloc_pages;
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -44,12 +44,6 @@ static struct pci_device_id virtio_pci_i
 
 MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
 
-/* Convert a generic virtio device to our structure */
-static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
-{
-	return container_of(vdev, struct virtio_pci_device, vdev);
-}
-
 /* virtio config->get_features() implementation */
 static u64 vp_get_features(struct virtio_device *vdev)
 {
@@ -140,74 +134,6 @@ static void vp_reset(struct virtio_devic
 	vp_synchronize_vectors(vdev);
 }
 
-/* the notify function used when creating a virt queue */
-static void vp_notify(struct virtqueue *vq)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
-	struct virtio_pci_vq_info *info = vq->priv;
-
-	/* we write the queue's selector into the notification register to
-	 * signal the other end */
-	iowrite16(info->queue_index, vp_dev->legacy + VIRTIO_PCI_QUEUE_NOTIFY);
-}
-
-/* Handle a configuration change: Tell driver if it wants to know. */
-static irqreturn_t vp_config_changed(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_driver *drv;
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
-	if (drv && drv->config_changed)
-		drv->config_changed(&vp_dev->vdev);
-	return IRQ_HANDLED;
-}
-
-/* Notify all virtqueues on an interrupt. */
-static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_pci_vq_info *info;
-	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_for_each_entry(info, &vp_dev->virtqueues, node) {
-		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
-			ret = IRQ_HANDLED;
-	}
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-	return ret;
-}
-
-/* A small wrapper to also acknowledge the interrupt when it's handled.
- * I really need an EIO hook for the vring so I can ack the interrupt once we
- * know that we'll be handling the IRQ but before we invoke the callback since
- * the callback may notify the host which results in the host attempting to
- * raise an interrupt that we would then mask once we acknowledged the
- * interrupt. */
-static irqreturn_t vp_interrupt(int irq, void *opaque)
-{
-	struct virtio_pci_device *vp_dev = opaque;
-	u8 isr;
-
-	/* reading the ISR has the effect of also clearing it so it's very
-	 * important to save off the value. */
-	isr = ioread8(vp_dev->legacy + VIRTIO_PCI_ISR);
-
-	/* It's definitely not us if the ISR was not high */
-	if (!isr)
-		return IRQ_NONE;
-
-	/* Configuration change?  Tell driver if it wants to know. */
-	if (isr & VIRTIO_PCI_ISR_CONFIG)
-		vp_config_changed(irq, opaque);
-
-	return vp_vring_interrupt(irq, opaque);
-}
-
 static void vp_free_vectors(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -274,7 +200,7 @@ static int vp_request_msix_vectors(struc
 	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 		 "%s-config", name);
 	err = request_irq(vp_dev->msix_entries[v].vector,
-			  vp_config_changed, 0, vp_dev->msix_names[v],
+			  virtio_pci_config_changed, 0, vp_dev->msix_names[v],
 			  vp_dev);
 	if (err)
 		goto error;
@@ -294,8 +220,8 @@ static int vp_request_msix_vectors(struc
 		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 			 "%s-virtqueues", name);
 		err = request_irq(vp_dev->msix_entries[v].vector,
-				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
-				  vp_dev);
+				  virtio_pci_vring_interrupt, 0,
+				  vp_dev->msix_names[v], vp_dev);
 		if (err)
 			goto error;
 		++vp_dev->msix_used_vectors;
@@ -311,7 +237,7 @@ static int vp_request_intx(struct virtio
 	int err;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
+	err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt,
 			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
 	if (!err)
 		vp_dev->intx_enabled = 1;
@@ -361,7 +287,8 @@ static struct virtqueue *setup_vq(struct
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+				 vdev, info->queue, virtio_pci_notify,
+				 callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
@@ -624,6 +551,10 @@ static int __devinit virtio_pci_probe(st
 	if (vp_dev->legacy == NULL)
 		goto out_req_regions;
 
+	/* Setting this lets us share interrupt handlers with virtio_pci */
+	vp_dev->isr = vp_dev->legacy + VIRTIO_PCI_LEGACY_ISR;
+	vp_dev->notify = vp_dev->legacy + VIRTIO_PCI_LEGACY_QUEUE_NOTIFY;
+
 	pci_set_drvdata(pci_dev, vp_dev);
 	pci_set_master(pci_dev);

^ permalink raw reply

* [RFC 10/11] virtio_pci: share virtqueue setup/teardown between modern and legacy driver.
From: Rusty Russell @ 2011-12-08 10:42 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

There's a great deal of work in setting up and disabling interrupts,
particularly with MSI-X, which is generic.  So we move most of the
work out to helpers which take the location of the msix_config
register, and setup_vq and del_vq functions.
---
 drivers/virtio/virtio_pci-common.c |  249 +++++++++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci-common.h |   19 ++
 drivers/virtio/virtio_pci.c        |  224 +--------------------------------
 drivers/virtio/virtio_pci_legacy.c |  229 ++--------------------------------
 4 files changed, 291 insertions(+), 430 deletions(-)

diff --git a/drivers/virtio/virtio_pci-common.c b/drivers/virtio/virtio_pci-common.c
--- a/drivers/virtio/virtio_pci-common.c
+++ b/drivers/virtio/virtio_pci-common.c
@@ -11,6 +11,7 @@
 #define VIRTIO_PCI_NO_LEGACY
 #include "virtio_pci-common.h"
 #include <linux/virtio_ring.h>
+#include <linux/interrupt.h>
 
 /* the notify function used when creating a virt queue */
 void virtio_pci_notify(struct virtqueue *vq)
@@ -79,3 +80,251 @@ irqreturn_t virtio_pci_interrupt(int irq
 
 	return virtio_pci_vring_interrupt(irq, opaque);
 }
+
+static void vp_free_vectors(struct virtio_device *vdev,
+			    __le16 __iomem *msix_config)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
+
+	if (vp_dev->intx_enabled) {
+		free_irq(vp_dev->pci_dev->irq, vp_dev);
+		vp_dev->intx_enabled = 0;
+	}
+
+	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
+
+	if (vp_dev->msix_enabled) {
+		/* Disable the vector used for configuration */
+		iowrite16(VIRTIO_MSI_NO_VECTOR, msix_config);
+		/* Flush the write out to device */
+		ioread16(msix_config);
+
+		pci_disable_msix(vp_dev->pci_dev);
+		vp_dev->msix_enabled = 0;
+		vp_dev->msix_vectors = 0;
+	}
+
+	vp_dev->msix_used_vectors = 0;
+	kfree(vp_dev->msix_names);
+	vp_dev->msix_names = NULL;
+	kfree(vp_dev->msix_entries);
+	vp_dev->msix_entries = NULL;
+}
+
+static int vp_request_msix_vectors(struct virtio_device *vdev,
+				   __le16 __iomem *msix_config,
+				   int nvectors, bool per_vq_vectors)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned i, v;
+	int err = -ENOMEM;
+
+	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
+				       GFP_KERNEL);
+	if (!vp_dev->msix_entries)
+		goto error;
+	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+				     GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto error;
+
+	for (i = 0; i < nvectors; ++i)
+		vp_dev->msix_entries[i].entry = i;
+
+	/* pci_enable_msix returns positive if we can't get this many. */
+	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
+	if (err > 0)
+		err = -ENOSPC;
+	if (err)
+		goto error;
+	vp_dev->msix_vectors = nvectors;
+	vp_dev->msix_enabled = 1;
+
+	/* Set the vector used for configuration */
+	v = vp_dev->msix_used_vectors;
+	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+		 "%s-config", name);
+	err = request_irq(vp_dev->msix_entries[v].vector,
+			  virtio_pci_config_changed, 0, vp_dev->msix_names[v],
+			  vp_dev);
+	if (err)
+		goto error;
+	++vp_dev->msix_used_vectors;
+
+	iowrite16(v, msix_config);
+	/* Verify we had enough resources to assign the vector */
+	v = ioread16(msix_config);
+	if (v == VIRTIO_MSI_NO_VECTOR) {
+		err = -EBUSY;
+		goto error;
+	}
+
+	if (!per_vq_vectors) {
+		/* Shared vector for all VQs */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-virtqueues", name);
+		err = request_irq(vp_dev->msix_entries[v].vector,
+				  virtio_pci_vring_interrupt, 0,
+				  vp_dev->msix_names[v], vp_dev);
+		if (err)
+			goto error;
+		++vp_dev->msix_used_vectors;
+	}
+	return 0;
+error:
+	vp_free_vectors(vdev, msix_config);
+	return err;
+}
+
+static int vp_request_intx(struct virtio_device *vdev)
+{
+	int err;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt,
+			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
+	if (!err)
+		vp_dev->intx_enabled = 1;
+	return err;
+}
+
+static int vp_try_to_find_vqs(struct virtio_device *vdev,
+			      __le16 __iomem *msix_config,
+			      struct virtqueue *(setup_vq)(struct virtio_device*,
+							   unsigned,
+							   void (*)(struct
+								    virtqueue *),
+							   const char *,
+							   u16 msix_vec),
+			      void (*del_vq)(struct virtqueue *vq),
+			      unsigned nvqs,
+			      struct virtqueue *vqs[],
+			      vq_callback_t *callbacks[],
+			      const char *names[],
+			      bool use_msix,
+			      bool per_vq_vectors)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u16 msix_vec;
+	int i, err, nvectors, allocated_vectors;
+
+	if (!use_msix) {
+		/* Old style: one normal interrupt for change and all vqs. */
+		err = vp_request_intx(vdev);
+		if (err)
+			goto error_request;
+	} else {
+		if (per_vq_vectors) {
+			/* Best option: one for change interrupt, one per vq. */
+			nvectors = 1;
+			for (i = 0; i < nvqs; ++i)
+				if (callbacks[i])
+					++nvectors;
+		} else {
+			/* Second best: one for change, shared for all vqs. */
+			nvectors = 2;
+		}
+
+		err = vp_request_msix_vectors(vdev, msix_config,
+					      nvectors, per_vq_vectors);
+		if (err)
+			goto error_request;
+	}
+
+	vp_dev->per_vq_vectors = per_vq_vectors;
+	allocated_vectors = vp_dev->msix_used_vectors;
+	for (i = 0; i < nvqs; ++i) {
+		if (!callbacks[i] || !vp_dev->msix_enabled)
+			msix_vec = VIRTIO_MSI_NO_VECTOR;
+		else if (vp_dev->per_vq_vectors)
+			msix_vec = allocated_vectors++;
+		else
+			msix_vec = VP_MSIX_VQ_VECTOR;
+		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
+		if (IS_ERR(vqs[i])) {
+			err = PTR_ERR(vqs[i]);
+			goto error_find;
+		}
+
+		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
+			continue;
+
+		/* allocate per-vq irq if available and necessary */
+		snprintf(vp_dev->msix_names[msix_vec],
+			 sizeof *vp_dev->msix_names,
+			 "%s-%s",
+			 dev_name(&vp_dev->vdev.dev), names[i]);
+		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
+				  vring_interrupt, 0,
+				  vp_dev->msix_names[msix_vec],
+				  vqs[i]);
+		if (err) {
+			del_vq(vqs[i]);
+			goto error_find;
+		}
+	}
+	return 0;
+
+error_find:
+	virtio_pci_del_vqs(vdev, msix_config, del_vq);
+
+error_request:
+	return err;
+}
+
+/* the config->find_vqs() implementation */
+int virtio_pci_find_vqs(struct virtio_device *vdev,
+			__le16 __iomem *msix_config,
+			struct virtqueue *(setup_vq)(struct virtio_device *,
+						     unsigned,
+						     void (*)(struct virtqueue*),
+						     const char *,
+						     u16 msix_vec),
+			void (*del_vq)(struct virtqueue *vq),
+			unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[])
+{
+	int err;
+
+	/* Try MSI-X with one vector per queue. */
+	err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq,
+				 nvqs, vqs, callbacks, names, true, true);
+	if (!err)
+		return 0;
+	/* Fallback: MSI-X with one vector for config, one shared for queues. */
+	err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq,
+				 nvqs, vqs, callbacks, names, true, false);
+	if (!err)
+		return 0;
+	/* Finally fall back to regular interrupts. */
+	return vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq,
+				  nvqs, vqs, callbacks, names, false, false);
+}
+
+/* the core of a config->del_vqs() implementation */
+void virtio_pci_del_vqs(struct virtio_device *vdev,
+			__le16 __iomem *msix_config,
+			void (*del_vq)(struct virtqueue *vq))
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtqueue *vq, *n;
+	struct virtio_pci_vq_info *info;
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
+		info = vq->priv;
+		if (vp_dev->per_vq_vectors &&
+			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
+			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
+				 vq);
+		del_vq(vq);
+	}
+	vp_dev->per_vq_vectors = false;
+
+	vp_free_vectors(vdev, msix_config);
+}
diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h
--- a/drivers/virtio/virtio_pci-common.h
+++ b/drivers/virtio/virtio_pci-common.h
@@ -87,6 +87,25 @@ irqreturn_t virtio_pci_vring_interrupt(i
 irqreturn_t virtio_pci_interrupt(int irq, void *opaque);
 
 /* Core of a config->find_vqs() implementation */
+int virtio_pci_find_vqs(struct virtio_device *vdev,
+			__le16 __iomem *msix_config,
+			struct virtqueue *(setup_vq)(struct virtio_device *,
+						     unsigned,
+						     void (*)(struct virtqueue*),
+						     const char *,
+						     u16 msix_vec),
+			void (*del_vq)(struct virtqueue *vq),
+			unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[]);
+
+/* the core of a config->del_vqs() implementation */
+void virtio_pci_del_vqs(struct virtio_device *vdev,
+			__le16 __iomem *msix_config,
+			void (*del_vq)(struct virtqueue *vq));
+
+/* Core of a config->find_vqs() implementation */
 int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev,
 			__le16 __iomem *msix_config,
 			struct virtqueue *(setup_vq)(struct virtio_pci_device *,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -145,115 +145,6 @@ static void vp_reset(struct virtio_devic
 	vp_synchronize_vectors(vdev);
 }
 
-static void vp_free_vectors(struct virtio_device *vdev)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	int i;
-
-	if (vp_dev->intx_enabled) {
-		free_irq(vp_dev->pci_dev->irq, vp_dev);
-		vp_dev->intx_enabled = 0;
-	}
-
-	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
-		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
-
-	if (vp_dev->msix_enabled) {
-		/* Disable the vector used for configuration */
-		iowrite16(VIRTIO_MSI_NO_VECTOR, &vp_dev->common->msix_config);
-		/* Flush the write out to device */
-		ioread16(&vp_dev->common->msix_config);
-
-		pci_disable_msix(vp_dev->pci_dev);
-		vp_dev->msix_enabled = 0;
-		vp_dev->msix_vectors = 0;
-	}
-
-	vp_dev->msix_used_vectors = 0;
-	kfree(vp_dev->msix_names);
-	vp_dev->msix_names = NULL;
-	kfree(vp_dev->msix_entries);
-	vp_dev->msix_entries = NULL;
-}
-
-static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
-				   bool per_vq_vectors)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	const char *name = dev_name(&vp_dev->vdev.dev);
-	unsigned i, v;
-	int err = -ENOMEM;
-
-	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
-				       GFP_KERNEL);
-	if (!vp_dev->msix_entries)
-		goto error;
-	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
-				     GFP_KERNEL);
-	if (!vp_dev->msix_names)
-		goto error;
-
-	for (i = 0; i < nvectors; ++i)
-		vp_dev->msix_entries[i].entry = i;
-
-	/* pci_enable_msix returns positive if we can't get this many. */
-	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
-	if (err > 0)
-		err = -ENOSPC;
-	if (err)
-		goto error;
-	vp_dev->msix_vectors = nvectors;
-	vp_dev->msix_enabled = 1;
-
-	/* Set the vector used for configuration */
-	v = vp_dev->msix_used_vectors;
-	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-		 "%s-config", name);
-	err = request_irq(vp_dev->msix_entries[v].vector,
-			  virtio_pci_config_changed, 0, vp_dev->msix_names[v],
-			  vp_dev);
-	if (err)
-		goto error;
-	++vp_dev->msix_used_vectors;
-
-	iowrite16(v, &vp_dev->common->msix_config);
-	/* Verify we had enough resources to assign the vector */
-	v = ioread16(&vp_dev->common->msix_config);
-	if (v == VIRTIO_MSI_NO_VECTOR) {
-		err = -EBUSY;
-		goto error;
-	}
-
-	if (!per_vq_vectors) {
-		/* Shared vector for all VQs */
-		v = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-			 "%s-virtqueues", name);
-		err = request_irq(vp_dev->msix_entries[v].vector,
-				  virtio_pci_vring_interrupt, 0,
-				  vp_dev->msix_names[v], vp_dev);
-		if (err)
-			goto error;
-		++vp_dev->msix_used_vectors;
-	}
-	return 0;
-error:
-	vp_free_vectors(vdev);
-	return err;
-}
-
-static int vp_request_intx(struct virtio_device *vdev)
-{
-	int err;
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
-	err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt,
-			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
-	if (!err)
-		vp_dev->intx_enabled = 1;
-	return err;
-}
-
 static void *alloc_virtqueue_pages(u16 *num)
 {
 	void *pages;
@@ -403,116 +294,19 @@ static void vp_del_vq(struct virtqueue *
 static void vp_del_vqs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	struct virtqueue *vq, *n;
-	struct virtio_pci_vq_info *info;
-
-	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
-		info = vq->priv;
-		if (vp_dev->per_vq_vectors &&
-			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
-			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
-				 vq);
-		vp_del_vq(vq);
-	}
-	vp_dev->per_vq_vectors = false;
-
-	vp_free_vectors(vdev);
+	virtio_pci_del_vqs(vdev, &vp_dev->common->msix_config, vp_del_vq);
 }
 
-static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-			      struct virtqueue *vqs[],
-			      vq_callback_t *callbacks[],
-			      const char *names[],
-			      bool use_msix,
-			      bool per_vq_vectors)
+static int vp_find_vqs(struct virtio_device *vdev,
+			unsigned nvqs,
+			struct virtqueue *vqs[],
+			vq_callback_t *callbacks[],
+			const char *names[])
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	u16 msix_vec;
-	int i, err, nvectors, allocated_vectors;
-
-	if (!use_msix) {
-		/* Old style: one normal interrupt for change and all vqs. */
-		err = vp_request_intx(vdev);
-		if (err)
-			goto error_request;
-	} else {
-		if (per_vq_vectors) {
-			/* Best option: one for change interrupt, one per vq. */
-			nvectors = 1;
-			for (i = 0; i < nvqs; ++i)
-				if (callbacks[i])
-					++nvectors;
-		} else {
-			/* Second best: one for change, shared for all vqs. */
-			nvectors = 2;
-		}
-
-		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
-		if (err)
-			goto error_request;
-	}
-
-	vp_dev->per_vq_vectors = per_vq_vectors;
-	allocated_vectors = vp_dev->msix_used_vectors;
-	for (i = 0; i < nvqs; ++i) {
-		if (!callbacks[i] || !vp_dev->msix_enabled)
-			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (vp_dev->per_vq_vectors)
-			msix_vec = allocated_vectors++;
-		else
-			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
-		if (IS_ERR(vqs[i])) {
-			err = PTR_ERR(vqs[i]);
-			goto error_find;
-		}
-
-		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
-			continue;
-
-		/* allocate per-vq irq if available and necessary */
-		snprintf(vp_dev->msix_names[msix_vec],
-			 sizeof *vp_dev->msix_names,
-			 "%s-%s",
-			 dev_name(&vp_dev->vdev.dev), names[i]);
-		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
-				  vring_interrupt, 0,
-				  vp_dev->msix_names[msix_vec],
-				  vqs[i]);
-		if (err) {
-			vp_del_vq(vqs[i]);
-			goto error_find;
-		}
-	}
-	return 0;
-
-error_find:
-	vp_del_vqs(vdev);
-
-error_request:
-	return err;
-}
-
-/* the config->find_vqs() implementation */
-static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		       struct virtqueue *vqs[],
-		       vq_callback_t *callbacks[],
-		       const char *names[])
-{
-	int err;
-
-	/* Try MSI-X with one vector per queue. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
-	if (!err)
-		return 0;
-	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 true, false);
-	if (!err)
-		return 0;
-	/* Finally fall back to regular interrupts. */
-	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				  false, false);
+	return virtio_pci_find_vqs(vdev, &vp_dev->common->msix_config,
+				   setup_vq, vp_del_vq,
+				   nvqs, vqs, callbacks, names);
 }
 
 static struct virtio_config_ops virtio_pci_config_ops = {
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -134,120 +134,11 @@ static void vp_reset(struct virtio_devic
 	vp_synchronize_vectors(vdev);
 }
 
-static void vp_free_vectors(struct virtio_device *vdev)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	int i;
-
-	if (vp_dev->intx_enabled) {
-		free_irq(vp_dev->pci_dev->irq, vp_dev);
-		vp_dev->intx_enabled = 0;
-	}
-
-	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
-		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
-
-	if (vp_dev->msix_enabled) {
-		/* Disable the vector used for configuration */
-		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
-		/* Flush the write out to device */
-		ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
-
-		pci_disable_msix(vp_dev->pci_dev);
-		vp_dev->msix_enabled = 0;
-		vp_dev->msix_vectors = 0;
-	}
-
-	vp_dev->msix_used_vectors = 0;
-	kfree(vp_dev->msix_names);
-	vp_dev->msix_names = NULL;
-	kfree(vp_dev->msix_entries);
-	vp_dev->msix_entries = NULL;
-}
-
-static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
-				   bool per_vq_vectors)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	const char *name = dev_name(&vp_dev->vdev.dev);
-	unsigned i, v;
-	int err = -ENOMEM;
-
-	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
-				       GFP_KERNEL);
-	if (!vp_dev->msix_entries)
-		goto error;
-	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
-				     GFP_KERNEL);
-	if (!vp_dev->msix_names)
-		goto error;
-
-	for (i = 0; i < nvectors; ++i)
-		vp_dev->msix_entries[i].entry = i;
-
-	/* pci_enable_msix returns positive if we can't get this many. */
-	err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
-	if (err > 0)
-		err = -ENOSPC;
-	if (err)
-		goto error;
-	vp_dev->msix_vectors = nvectors;
-	vp_dev->msix_enabled = 1;
-
-	/* Set the vector used for configuration */
-	v = vp_dev->msix_used_vectors;
-	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-		 "%s-config", name);
-	err = request_irq(vp_dev->msix_entries[v].vector,
-			  virtio_pci_config_changed, 0, vp_dev->msix_names[v],
-			  vp_dev);
-	if (err)
-		goto error;
-	++vp_dev->msix_used_vectors;
-
-	iowrite16(v, vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
-	/* Verify we had enough resources to assign the vector */
-	v = ioread16(vp_dev->legacy + VIRTIO_MSI_CONFIG_VECTOR);
-	if (v == VIRTIO_MSI_NO_VECTOR) {
-		err = -EBUSY;
-		goto error;
-	}
-
-	if (!per_vq_vectors) {
-		/* Shared vector for all VQs */
-		v = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-			 "%s-virtqueues", name);
-		err = request_irq(vp_dev->msix_entries[v].vector,
-				  virtio_pci_vring_interrupt, 0,
-				  vp_dev->msix_names[v], vp_dev);
-		if (err)
-			goto error;
-		++vp_dev->msix_used_vectors;
-	}
-	return 0;
-error:
-	vp_free_vectors(vdev);
-	return err;
-}
-
-static int vp_request_intx(struct virtio_device *vdev)
-{
-	int err;
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
-	err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt,
-			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
-	if (!err)
-		vp_dev->intx_enabled = 1;
-	return err;
-}
-
-static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
-				  void (*callback)(struct virtqueue *vq),
-				  const char *name,
-				  u16 msix_vec)
+static struct virtqueue *setup_legacy_vq(struct virtio_device *vdev,
+					 unsigned index,
+					 void (*callback)(struct virtqueue *vq),
+					 const char *name,
+					 u16 msix_vec)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
@@ -326,7 +217,7 @@ out_info:
 	return ERR_PTR(err);
 }
 
-static void vp_del_vq(struct virtqueue *vq)
+static void del_legacy_vq(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 	struct virtio_pci_vq_info *info = vq->priv;
@@ -359,94 +250,10 @@ static void vp_del_vq(struct virtqueue *
 static void vp_del_vqs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	struct virtqueue *vq, *n;
-	struct virtio_pci_vq_info *info;
 
-	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
-		info = vq->priv;
-		if (vp_dev->per_vq_vectors &&
-			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
-			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
-				 vq);
-		vp_del_vq(vq);
-	}
-	vp_dev->per_vq_vectors = false;
-
-	vp_free_vectors(vdev);
-}
-
-static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-			      struct virtqueue *vqs[],
-			      vq_callback_t *callbacks[],
-			      const char *names[],
-			      bool use_msix,
-			      bool per_vq_vectors)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	u16 msix_vec;
-	int i, err, nvectors, allocated_vectors;
-
-	if (!use_msix) {
-		/* Old style: one normal interrupt for change and all vqs. */
-		err = vp_request_intx(vdev);
-		if (err)
-			goto error_request;
-	} else {
-		if (per_vq_vectors) {
-			/* Best option: one for change interrupt, one per vq. */
-			nvectors = 1;
-			for (i = 0; i < nvqs; ++i)
-				if (callbacks[i])
-					++nvectors;
-		} else {
-			/* Second best: one for change, shared for all vqs. */
-			nvectors = 2;
-		}
-
-		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
-		if (err)
-			goto error_request;
-	}
-
-	vp_dev->per_vq_vectors = per_vq_vectors;
-	allocated_vectors = vp_dev->msix_used_vectors;
-	for (i = 0; i < nvqs; ++i) {
-		if (!callbacks[i] || !vp_dev->msix_enabled)
-			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (vp_dev->per_vq_vectors)
-			msix_vec = allocated_vectors++;
-		else
-			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
-		if (IS_ERR(vqs[i])) {
-			err = PTR_ERR(vqs[i]);
-			goto error_find;
-		}
-
-		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
-			continue;
-
-		/* allocate per-vq irq if available and necessary */
-		snprintf(vp_dev->msix_names[msix_vec],
-			 sizeof *vp_dev->msix_names,
-			 "%s-%s",
-			 dev_name(&vp_dev->vdev.dev), names[i]);
-		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
-				  vring_interrupt, 0,
-				  vp_dev->msix_names[msix_vec],
-				  vqs[i]);
-		if (err) {
-			vp_del_vq(vqs[i]);
-			goto error_find;
-		}
-	}
-	return 0;
-
-error_find:
-	vp_del_vqs(vdev);
-
-error_request:
-	return err;
+	return virtio_pci_del_vqs(vdev, vp_dev->legacy +
+				  VIRTIO_MSI_LEGACY_CONFIG_VECTOR,
+				  del_legacy_vq);
 }
 
 /* the config->find_vqs() implementation */
@@ -455,20 +262,12 @@ static int vp_find_vqs(struct virtio_dev
 		       vq_callback_t *callbacks[],
 		       const char *names[])
 {
-	int err;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	/* Try MSI-X with one vector per queue. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
-	if (!err)
-		return 0;
-	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				 true, false);
-	if (!err)
-		return 0;
-	/* Finally fall back to regular interrupts. */
-	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
-				  false, false);
+	return virtio_pci_find_vqs(vdev, vp_dev->legacy +
+				   VIRTIO_MSI_LEGACY_CONFIG_VECTOR,
+				   setup_legacy_vq, del_legacy_vq,
+				   nvqs, vqs, callbacks, names);
 }
 
 static struct virtio_config_ops virtio_pci_config_ops = {

^ permalink raw reply

* [RFC 11/11] virtio_pci: simplify common helpers.
From: Rusty Russell @ 2011-12-08 10:44 UTC (permalink / raw)
  To: virtualization; +Cc: Sasha Levin, Michael S. Tsirkin
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

Our helpers can take a virtio_pci_device, rather than converting from
a virtio_device all the time.  They couldn't do this when they were
called from the common virtio code, but now we wrap them anyway, it
simplifies things.
---
 drivers/virtio/virtio_pci-common.c |   54 ++++++++++++++++---------------------
 drivers/virtio/virtio_pci-common.h |    6 ++--
 drivers/virtio/virtio_pci.c        |   10 +++---
 drivers/virtio/virtio_pci_legacy.c |    9 ++----
 4 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/drivers/virtio/virtio_pci-common.c b/drivers/virtio/virtio_pci-common.c
--- a/drivers/virtio/virtio_pci-common.c
+++ b/drivers/virtio/virtio_pci-common.c
@@ -81,10 +81,9 @@ irqreturn_t virtio_pci_interrupt(int irq
 	return virtio_pci_vring_interrupt(irq, opaque);
 }
 
-static void vp_free_vectors(struct virtio_device *vdev,
+static void vp_free_vectors(struct virtio_pci_device *vp_dev,
 			    __le16 __iomem *msix_config)
 {
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i;
 
 	if (vp_dev->intx_enabled) {
@@ -113,11 +112,10 @@ static void vp_free_vectors(struct virti
 	vp_dev->msix_entries = NULL;
 }
 
-static int vp_request_msix_vectors(struct virtio_device *vdev,
+static int vp_request_msix_vectors(struct virtio_pci_device *vp_dev,
 				   __le16 __iomem *msix_config,
 				   int nvectors, bool per_vq_vectors)
 {
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	unsigned i, v;
 	int err = -ENOMEM;
@@ -176,30 +174,28 @@ static int vp_request_msix_vectors(struc
 	}
 	return 0;
 error:
-	vp_free_vectors(vdev, msix_config);
+	vp_free_vectors(vp_dev, msix_config);
 	return err;
 }
 
-static int vp_request_intx(struct virtio_device *vdev)
+static int vp_request_intx(struct virtio_pci_device *vp_dev)
 {
 	int err;
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-
 	err = request_irq(vp_dev->pci_dev->irq, virtio_pci_interrupt,
-			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
+			  IRQF_SHARED, dev_name(&vp_dev->vdev.dev), vp_dev);
 	if (!err)
 		vp_dev->intx_enabled = 1;
 	return err;
 }
 
-static int vp_try_to_find_vqs(struct virtio_device *vdev,
+static int vp_try_to_find_vqs(struct virtio_pci_device *vp_dev,
 			      __le16 __iomem *msix_config,
-			      struct virtqueue *(setup_vq)(struct virtio_device*,
-							   unsigned,
-							   void (*)(struct
-								    virtqueue *),
-							   const char *,
-							   u16 msix_vec),
+			      struct virtqueue *(setup_vq)
+			      (struct virtio_pci_device *,
+			       unsigned,
+			       void (*)(struct virtqueue *),
+			       const char *,
+			       u16 msix_vec),
 			      void (*del_vq)(struct virtqueue *vq),
 			      unsigned nvqs,
 			      struct virtqueue *vqs[],
@@ -208,13 +204,12 @@ static int vp_try_to_find_vqs(struct vir
 			      bool use_msix,
 			      bool per_vq_vectors)
 {
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	u16 msix_vec;
 	int i, err, nvectors, allocated_vectors;
 
 	if (!use_msix) {
 		/* Old style: one normal interrupt for change and all vqs. */
-		err = vp_request_intx(vdev);
+		err = vp_request_intx(vp_dev);
 		if (err)
 			goto error_request;
 	} else {
@@ -229,7 +224,7 @@ static int vp_try_to_find_vqs(struct vir
 			nvectors = 2;
 		}
 
-		err = vp_request_msix_vectors(vdev, msix_config,
+		err = vp_request_msix_vectors(vp_dev, msix_config,
 					      nvectors, per_vq_vectors);
 		if (err)
 			goto error_request;
@@ -244,7 +239,7 @@ static int vp_try_to_find_vqs(struct vir
 			msix_vec = allocated_vectors++;
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
+		vqs[i] = setup_vq(vp_dev, i, callbacks[i], names[i], msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
@@ -270,16 +265,16 @@ static int vp_try_to_find_vqs(struct vir
 	return 0;
 
 error_find:
-	virtio_pci_del_vqs(vdev, msix_config, del_vq);
+	virtio_pci_del_vqs(vp_dev, msix_config, del_vq);
 
 error_request:
 	return err;
 }
 
 /* the config->find_vqs() implementation */
-int virtio_pci_find_vqs(struct virtio_device *vdev,
+int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev,
 			__le16 __iomem *msix_config,
-			struct virtqueue *(setup_vq)(struct virtio_device *,
+			struct virtqueue *(setup_vq)(struct virtio_pci_device *,
 						     unsigned,
 						     void (*)(struct virtqueue*),
 						     const char *,
@@ -293,30 +288,29 @@ int virtio_pci_find_vqs(struct virtio_de
 	int err;
 
 	/* Try MSI-X with one vector per queue. */
-	err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq,
+	err = vp_try_to_find_vqs(vp_dev, msix_config, setup_vq, del_vq,
 				 nvqs, vqs, callbacks, names, true, true);
 	if (!err)
 		return 0;
 	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq,
+	err = vp_try_to_find_vqs(vp_dev, msix_config, setup_vq, del_vq,
 				 nvqs, vqs, callbacks, names, true, false);
 	if (!err)
 		return 0;
 	/* Finally fall back to regular interrupts. */
-	return vp_try_to_find_vqs(vdev, msix_config, setup_vq, del_vq,
+	return vp_try_to_find_vqs(vp_dev, msix_config, setup_vq, del_vq,
 				  nvqs, vqs, callbacks, names, false, false);
 }
 
 /* the core of a config->del_vqs() implementation */
-void virtio_pci_del_vqs(struct virtio_device *vdev,
+void virtio_pci_del_vqs(struct virtio_pci_device *vp_dev,
 			__le16 __iomem *msix_config,
 			void (*del_vq)(struct virtqueue *vq))
 {
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq, *n;
 	struct virtio_pci_vq_info *info;
 
-	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
+	list_for_each_entry_safe(vq, n, &vp_dev->vdev.vqs, list) {
 		info = vq->priv;
 		if (vp_dev->per_vq_vectors &&
 			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
@@ -326,5 +320,5 @@ void virtio_pci_del_vqs(struct virtio_de
 	}
 	vp_dev->per_vq_vectors = false;
 
-	vp_free_vectors(vdev, msix_config);
+	vp_free_vectors(vp_dev, msix_config);
 }
diff --git a/drivers/virtio/virtio_pci-common.h b/drivers/virtio/virtio_pci-common.h
--- a/drivers/virtio/virtio_pci-common.h
+++ b/drivers/virtio/virtio_pci-common.h
@@ -87,9 +87,9 @@ irqreturn_t virtio_pci_vring_interrupt(i
 irqreturn_t virtio_pci_interrupt(int irq, void *opaque);
 
 /* Core of a config->find_vqs() implementation */
-int virtio_pci_find_vqs(struct virtio_device *vdev,
+int virtio_pci_find_vqs(struct virtio_pci_device *vp_dev,
 			__le16 __iomem *msix_config,
-			struct virtqueue *(setup_vq)(struct virtio_device *,
+			struct virtqueue *(setup_vq)(struct virtio_pci_device *,
 						     unsigned,
 						     void (*)(struct virtqueue*),
 						     const char *,
@@ -101,7 +101,7 @@ int virtio_pci_find_vqs(struct virtio_de
 			const char *names[]);
 
 /* the core of a config->del_vqs() implementation */
-void virtio_pci_del_vqs(struct virtio_device *vdev,
+void virtio_pci_del_vqs(struct virtio_pci_device *vp_dev,
 			__le16 __iomem *msix_config,
 			void (*del_vq)(struct virtqueue *vq));
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -163,12 +163,12 @@ static void *alloc_virtqueue_pages(u16 *
 	return NULL;
 }
 
-static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
+static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
 				  u16 msix_vec)
 {
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
 	u16 num;
@@ -213,7 +213,7 @@ static struct virtqueue *setup_vq(struct
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_ALIGN,
-				 vdev, info->queue, virtio_pci_notify,
+				 &vp_dev->vdev, info->queue, virtio_pci_notify,
 				 callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -294,7 +294,7 @@ static void vp_del_vq(struct virtqueue *
 static void vp_del_vqs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	virtio_pci_del_vqs(vdev, &vp_dev->common->msix_config, vp_del_vq);
+	virtio_pci_del_vqs(vp_dev, &vp_dev->common->msix_config, vp_del_vq);
 }
 
 static int vp_find_vqs(struct virtio_device *vdev,
@@ -304,7 +304,7 @@ static int vp_find_vqs(struct virtio_dev
 			const char *names[])
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	return virtio_pci_find_vqs(vdev, &vp_dev->common->msix_config,
+	return virtio_pci_find_vqs(vp_dev, &vp_dev->common->msix_config,
 				   setup_vq, vp_del_vq,
 				   nvqs, vqs, callbacks, names);
 }
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -134,13 +134,12 @@ static void vp_reset(struct virtio_devic
 	vp_synchronize_vectors(vdev);
 }
 
-static struct virtqueue *setup_legacy_vq(struct virtio_device *vdev,
+static struct virtqueue *setup_legacy_vq(struct virtio_pci_device *vp_dev,
 					 unsigned index,
 					 void (*callback)(struct virtqueue *vq),
 					 const char *name,
 					 u16 msix_vec)
 {
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtio_pci_vq_info *info;
 	struct virtqueue *vq;
 	unsigned long flags, size;
@@ -178,7 +177,7 @@ static struct virtqueue *setup_legacy_vq
 
 	/* create the vring */
 	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, virtio_pci_notify,
+				 &vp_dev->vdev, info->queue, virtio_pci_notify,
 				 callback, name);
 	if (!vq) {
 		err = -ENOMEM;
@@ -251,7 +250,7 @@ static void vp_del_vqs(struct virtio_dev
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	return virtio_pci_del_vqs(vdev, vp_dev->legacy +
+	return virtio_pci_del_vqs(vp_dev, vp_dev->legacy +
 				  VIRTIO_MSI_LEGACY_CONFIG_VECTOR,
 				  del_legacy_vq);
 }
@@ -264,7 +263,7 @@ static int vp_find_vqs(struct virtio_dev
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
-	return virtio_pci_find_vqs(vdev, vp_dev->legacy +
+	return virtio_pci_find_vqs(vp_dev, vp_dev->legacy +
 				   VIRTIO_MSI_LEGACY_CONFIG_VECTOR,
 				   setup_legacy_vq, del_legacy_vq,
 				   nvqs, vqs, callbacks, names);

^ permalink raw reply

* Re: [PATCH v4 00/12] virtio: s4 support
From: Amit Shah @ 2011-12-08 11:56 UTC (permalink / raw)
  To: Rusty Russell
  Cc: linux-kernel, Michael S. Tsirkin, levinsasha928,
	Virtualization List
In-Reply-To: <87liqoirgm.fsf@rustcorp.com.au>

On (Wed) 07 Dec 2011 [21:22:41], Rusty Russell wrote:
> On Wed, 7 Dec 2011 13:14:56 +0530, Amit Shah <amit.shah@redhat.com> wrote:
> > On (Wed) 07 Dec 2011 [17:54:29], Rusty Russell wrote:
> > > I figure there's a reason, but it seems a bit weird :)
> > 
> > Well, there is one reason right now: migrating storage along with
> > VMs.  The guest needs to sync all data to the disk before the target
> > host accesses the image file.
> 
> I don't see why.  Sure, if qemu (or whatever) is doing buffering, it
> needs to flush that.  It doesn't, last I looked, it just maps to
> read/write.
> 
> What am I missing?

I'm sorry, I got confused.  The external file migration Gleb
mentioned, though, is something that libvirt uses right now and causes
guest time craziness so S4 is one (and natural) way to solve it.

		Amit

^ permalink raw reply

* Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.
From: Amit Shah @ 2011-12-08 12:08 UTC (permalink / raw)
  To: Miche Baker-Harvey
  Cc: Stephen Rothwell, xen-devel, Konrad Rzeszutek Wilk,
	Benjamin Herrenschmidt, linux-kernel, virtualization,
	Anton Blanchard, Mike Waychison, ppc-dev, Greg Kroah-Hartman,
	Eric Northrup
In-Reply-To: <CAB8Rdar=qOokxZkjQVRmo=zN_f75jPwzU+6=WBRo0g+P7w+pAw@mail.gmail.com>

On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote:
> Amit,
> 
> Ah, indeed.  I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs()
> calls vp_request_intx() and sets up an interrupt callback.  From
> there, when an interrupt occurs, the stack looks something like this:
> 
> virtio_pci::vp_interrupt()
>   virtio_pci::vp_vring_interrupt()
>     virtio_ring::vring_interrupt()
>       vq->vq.callback()  <-- in this case, that's virtio_console::control_intr()
>         workqueue::schedule_work()
>           workqueue::queue_work()
>             queue_work_on(get_cpu())  <-- queues the work on the current CPU.
> 
> I'm not doing anything to keep multiple control message from being
> sent concurrently to the guest, and we will take those interrupts on
> any CPU. I've confirmed that the two instances of
> handle_control_message() are occurring on different CPUs.

So let's have a new helper, port_lock() that takes the port-specific
spinlock.  There has to be a new helper, since the port lock should
depend on the portdev lock being taken too.  For the port addition
case, just the portdev lock should be taken.  For any other
operations, the port lock should be taken.

My assumption was that we would be able to serialise the work items,
but that will be too restrictive.  Taking port locks sounds like a
better idea.

We'd definitely need the port lock in the control work handler.  We
might need it in a few more places (like module removal), but we'll
worry about that later.

Does this sound fine?

		Amit

^ permalink raw reply

* Re: [PATCH 0/11] RFC: PCI using capabilitities
From: Sasha Levin @ 2011-12-08 15:37 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Michael S. Tsirkin, Avi Kivity, kvm, virtualization
In-Reply-To: <87pqfzgy6p.fsf@rustcorp.com.au>

On Thu, 2011-12-08 at 20:52 +1030, Rusty Russell wrote:
> Here's the patch series I ended up with.  I haven't coded up the QEMU
> side yet, so no idea if the new driver works.
> 
> Questions:
> (1) Do we win from separating ISR, NOTIFY and COMMON?
> (2) I used a "u8 bar"; should I use a bir and pack it instead?  BIR
>     seems a little obscure (noone else in the kernel source seems to
>     refer to it).

I started implementing it for KVM tools, when I noticed a strange thing:
my vq creating was failing because the driver was reading a value other
than 0 from the address field of a new vq, and failing.

I've added simple prints in the usermode code, and saw the following
ordering:

1. queue select vq 0
2. queue read address (returns 0 - new vq)
3. queue write address (good address of vq)
4. queue read address (returns !=0, fails)
4. queue select vq 1

From that I understood that the ordering is wrong, the driver was trying
to read address before selecting the correct vq.

At that point, I've added simple prints to the driver. Initially it
looked as follows:

	iowrite16(index, &vp_dev->common->queue_select);

	switch (ioread64(&vp_dev->common->queue_address)) {
		[...]
	};

So I added prints before the iowrite16() and after the ioread64(), and
saw that while the driver prints were ordered, the device ones weren't:

	[    1.264052] before iowrite index=1
	kvmtool: net returning pfn (vq=0): 310706176
	kvmtool: queue selected: 1
	[    1.264890] after ioread index=1

Suspecting that something was wrong with ordering, I've added a print
between the iowrite and the ioread, and it finally started working well.

Which leads me to the question: Are MMIO vs MMIO reads/writes not
ordered?

-- 

Sasha.

^ permalink raw reply

* Re: [PATCH RFC V3 2/4] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks
From: Raghavendra K T @ 2011-12-08 17:35 UTC (permalink / raw)
  To: Raghavendra K T
  Cc: Jeremy Fitzhardinge, x86, Peter Zijlstra, Virtualization,
	H. Peter Anvin, Stefano Stabellini, Xen, Dave Jiang, KVM,
	Raghavendra K T, Ingo Molnar, Avi Kivity, Rik van Riel,
	Konrad Rzeszutek Wilk, Srivatsa Vaddagiri, Jeremy Fitzhardinge,
	Sasha Levin, Sedat Dilek, Thomas Gleixner, Yinghai Lu,
	Greg Kroah-Hartman, LKML, Dave Hansen
In-Reply-To: <4ED7DA85.5080504@linux.vnet.ibm.com>

On 12/02/2011 01:20 AM, Raghavendra K T wrote:
>>>
>>> +/*
>>> + * kvm_pv_kick_cpu_op: Kick a vcpu.
>>> + *
>>> + * @cpu - vcpu to be kicked.
>>> + */
>>> +static void kvm_pv_kick_cpu_op(struct kvm *kvm, int cpu)
>>> +{
>>> + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, cpu);
>>
>> There is no guarantee that guest cpu numbers match host vcpu numbers.
>> Use APIC IDs, and kvm_apic_match_dest().
>
> Okay. I have to dig more on this.
>
Sorry for late reply on this, was experimenting with the changes needed.

Wanted to confirm if it is according to your expectation.

  Host side change should look like this to get vcpu,

	int i;
	struct kvm_vcpu *vcpu = NULL;

	kvm_for_each_vcpu(i, vcpu, kvm) {
		if (!kvm_apic_present(vcpu))
		continue;

		if (kvm_apic_match_dest(vcpu, 0, 0, cpu, 0)) {
		break;
		}
	}


In guest side, convert the cpu to apicid using,

apicid = apic->cpu_present_to_apicid(cpu);
OR
apicid =  per_cpu(x86_cpu_to_apicid, cpu);

But I have a question, as you know, we are storing the waiting cpus in
cpumask, to track the cpu to be kicked.

You want to change the logic to store the apicid directly instead of
cpu during contention or is it OK to convert before kick hypercall?.

Probably it would be more good if I can get to know the scenario,
where guest cpu numbers does not match host vcpu numbers. It may answer 
the whole question and help me in testing/validating the code.

Thanks
- Raghu

^ permalink raw reply

* Re: [net-next RFC PATCH 5/5] virtio-net: flow director support
From: Sridhar Samudrala @ 2011-12-09  2:00 UTC (permalink / raw)
  To: Jason Wang
  Cc: krkumar2, kvm, mst, virtualization, levinsasha928, netdev,
	bhutchings
In-Reply-To: <4EDF47D9.1090501@redhat.com>

On 12/7/2011 3:02 AM, Jason Wang wrote:
> On 12/06/2011 11:42 PM, Sridhar Samudrala wrote:
>> On 12/6/2011 5:15 AM, Stefan Hajnoczi wrote:
>>> On Tue, Dec 6, 2011 at 10:21 AM, Jason Wang<jasowang@redhat.com>  
>>> wrote:
>>>> On 12/06/2011 05:18 PM, Stefan Hajnoczi wrote:
>>>>> On Tue, Dec 6, 2011 at 6:33 AM, Jason Wang<jasowang@redhat.com>    
>>>>> wrote:
>>>>>> On 12/05/2011 06:55 PM, Stefan Hajnoczi wrote:
>>>>>>> On Mon, Dec 5, 2011 at 8:59 AM, Jason Wang<jasowang@redhat.com>
>>>>>>>   wrote:
>>>>> The vcpus are just threads and may not be bound to physical CPUs, so
>>>>> what is the big picture here?  Is the guest even in the position to
>>>>> set the best queue mappings today?
>>>>
>>>> Not sure it could publish the best mapping but the idea is to make 
>>>> sure the
>>>> packets of a flow were handled by the same guest vcpu and may be 
>>>> the same
>>>> vhost thread in order to eliminate the packet reordering and lock
>>>> contention. But this assumption does not take the bouncing of vhost 
>>>> or vcpu
>>>> threads which would also affect the result.
>>> Okay, this is why I'd like to know what the big picture here is.  What
>>> solution are you proposing?  How are we going to have everything from
>>> guest application, guest kernel, host threads, and host NIC driver
>>> play along so we get the right steering up the entire stack.  I think
>>> there needs to be an answer to that before changing virtio-net to add
>>> any steering mechanism.
>>>
>>>
>> Yes. Also the current model of  a vhost thread per VM's interface 
>> doesn't help with packet steering
>> all the way from the guest to the host physical NIC.
>>
>> I think we need to have vhost thread(s) per-CPU that can handle 
>> packets to/from physical NIC's
>> TX/RX queues. Currently we have a single vhost thread for a VM's i/f 
>> that handles all the packets from
>> various flows coming from a multi-queue physical NIC.
>
> Even if we have per-cpu workthread, only one socket is used to queue 
> the packet then, so a multiple queue(sockets) tap/macvtap is still 
> needed.
I think so.  We need per-cpu tap/macvtap sockets along with per-cpu 
vhost threads.
This will parallelize all the way from physical NIC to vhost.

Thanks
Sridhar

^ permalink raw reply

* Re: [net-next RFC PATCH 0/5] Series short description
From: Rusty Russell @ 2011-12-09  5:31 UTC (permalink / raw)
  To: Ben Hutchings, Jason Wang
  Cc: krkumar2, kvm, mst, netdev, virtualization, levinsasha928
In-Reply-To: <1323277324.2728.34.camel@bwh-desktop>

On Wed, 7 Dec 2011 17:02:04 +0000, Ben Hutchings <bhutchings@solarflare.com> wrote:
> Solarflare controllers (sfc driver) have 8192 perfect filters for
> TCP/IPv4 and UDP/IPv4 which can be used for flow steering.  (The filters
> are organised as a hash table, but matched based on 5-tuples.)  I
> implemented the 'accelerated RFS' interface in this driver.
> 
> I believe the Intel 82599 controllers (ixgbe driver) have both
> hash-based and perfect filter modes and the driver can be configured to
> use one or the other.  The driver has its own independent mechanism for
> steering RX and TX flows which predates RFS; I don't know whether it
> uses hash-based or perfect filters.

Thanks for this summary (and Jason, too).  I've fallen a long way behind
NIC state-of-the-art.
 
> Most multi-queue controllers could support a kind of hash-based
> filtering for TCP/IP by adjusting the RSS indirection table.  However,
> this table is usually quite small (64-256 entries).  This means that
> hash collisions will be quite common and this can result in reordering.
> The same applies to the small table Jason has proposed for virtio-net.

But this happens on real hardware today.  Better that real hardware is
nice, but is it overkill?

And can't you reorder even with perfect matching, since prior packets
will be on the old queue and more recent ones on the new queue?  Does it
discard or requeue old ones?  Or am I missing a trick?

Thanks,
Rusty.

^ permalink raw reply

* Re: [PATCH] virtio-ring: Use threshold for switching to indirect descriptors
From: Rusty Russell @ 2011-12-09  5:33 UTC (permalink / raw)
  To: Sasha Levin
  Cc: markmc, kvm, Michael S. Tsirkin, linux-kernel, virtualization,
	Avi Kivity
In-Reply-To: <1323340668.3904.21.camel@lappy>

On Thu, 08 Dec 2011 12:37:48 +0200, Sasha Levin <levinsasha928@gmail.com> wrote:
> On Thu, 2011-12-08 at 20:14 +1030, Rusty Russell wrote:
> > On Wed, 7 Dec 2011 17:48:17 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Wed, Dec 07, 2011 at 04:02:45PM +0200, Sasha Levin wrote:
> > > > On Sun, 2011-12-04 at 20:23 +0200, Sasha Levin wrote:
> > > > 
> > > > [snip]
> > > > 
> > > > Rusty, Michael, does the below looks a reasonable optimization for you?
> > > 
> > > OK overall but a bit hard to say for sure as it looks pretty incomplete ...
> > 
> > A static threshold is very hackish; we need to either initialize it to
> > a proven-good value (since noone will ever change it) or be cleverer.
> 
> I'll better wait to see how the threshold issue is resolved, and
> possibly do it as a dynamic value which depends on the threshold.
> 
> I doubt theres one magic value which would work for all.

Sure, but if it's generally better than the current value, I'll apply it.

Cheers,
Rusty.

^ permalink raw reply

* Re: [PATCH 0/11] RFC: PCI using capabilitities
From: Rusty Russell @ 2011-12-09  6:17 UTC (permalink / raw)
  To: Sasha Levin; +Cc: Michael S. Tsirkin, Avi Kivity, kvm, virtualization
In-Reply-To: <1323358657.32487.9.camel@lappy>

On Thu, 08 Dec 2011 17:37:37 +0200, Sasha Levin <levinsasha928@gmail.com> wrote:
> Which leads me to the question: Are MMIO vs MMIO reads/writes not
> ordered?

That seems really odd, especially being repeatable.

BTW, that's an address, not a pfn now.

Cheers,
Rusty.

^ permalink raw reply

* Re: [PATCH RFC V3 2/4] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks
From: Raghavendra K T @ 2011-12-09 10:59 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Jeremy Fitzhardinge, x86, Peter Zijlstra, Virtualization,
	H. Peter Anvin, Stefano Stabellini, Xen, Dave Jiang, KVM,
	Raghavendra K T, Ingo Molnar, Rik van Riel, Konrad Rzeszutek Wilk,
	Srivatsa Vaddagiri, Jeremy Fitzhardinge, Sasha Levin, Sedat Dilek,
	Thomas Gleixner, Yinghai Lu, Greg Kroah-Hartman, LKML,
	Dave Hansen, Suzuki
In-Reply-To: <4EE08606.2060508@redhat.com>

On 12/08/2011 03:10 PM, Avi Kivity wrote:
> On 12/07/2011 06:46 PM, Raghavendra K T wrote:
>> On 12/07/2011 08:22 PM, Avi Kivity wrote:
>>> On 12/07/2011 03:39 PM, Marcelo Tosatti wrote:
>>>>> Also I think we can keep the kicked flag in vcpu->requests, no need
>>>>> for
>>>>> new storage.
>>>>
>>>> Was going to suggest it but it violates the currently organized
>>>> processing of entries at the beginning of vcpu_enter_guest.
>>>>
>>>> That is, this "kicked" flag is different enough from vcpu->requests
>>>> processing that a separate variable seems worthwhile (even more
>>>> different with convertion to MP_STATE at KVM_GET_MP_STATE).
>>>
>>> IMO, it's similar to KVM_REQ_EVENT (which can also cause mpstate to
>>> change due to apic re-evaluation).
>>>
>>
>> Ok, So what I understand is we have to either :
>> 1. retain current kick flag AS-IS but would have to make it migration
>> friendly. [I still have to get more familiar with migration side]
>> or
>> 2. introduce notion similar to KVM_REQ_PVLOCK_KICK(??) to be part of
>> vcpu->requests.
>>
>> So what would be better? Please let me know.
>>
>
> IMO, KVM_REQ.
>

Ok, 'll continue in this direction. Hmm I think now the race condition 
should be kept in mind, pointed by Marcello.

^ permalink raw reply

* [PATCH v3 REPOST] xen-netfront: delay gARP until backend switches to Connected
From: Laszlo Ersek @ 2011-12-09 11:38 UTC (permalink / raw)
  To: ian.campbell, konrad.wilk, jeremy, xen-devel, virtualization,
	netdev, linux-kernel

After a guest is live migrated, the xen-netfront driver emits a gratuitous
ARP message, so that networking hardware on the target host's subnet can
take notice, and public routing to the guest is re-established. However,
if the packet appears on the backend interface before the backend is added
to the target host's bridge, the packet is lost, and the migrated guest's
peers become unable to talk to the guest.

A sufficient two-parts condition to prevent the above is:

(1) ensure that the backend only moves to Connected xenbus state after its
hotplug scripts completed, ie. the netback interface got added to the
bridge; and

(2) ensure the frontend only queues the gARP when it sees the backend move
to Connected.

These two together provide complete ordering. Sub-condition (1) is
satisfied by pvops commit 43223efd9bfd.

In general, the full condition is sufficient, not necessary, because,
according to [1], live migration has been working for a long time without
satisfying sub-condition (2). However, after 43223efd9bfd was backported
to the RHEL-5 host to ensure (1), (2) still proved necessary in the RHEL-6
guest. This patch intends to provide (2) for upstream.

The Reviewed-by line comes from [2].

[1] http://old-list-archives.xen.org/xen-devel/2011-06/msg01969.html
[2] http://old-list-archives.xen.org/xen-devel/2011-07/msg00484.html

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netfront.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index d29365a..f033656 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1646,7 +1646,6 @@ static void netback_changed(struct xenbus_device *dev,
 	case XenbusStateInitialised:
 	case XenbusStateReconfiguring:
 	case XenbusStateReconfigured:
-	case XenbusStateConnected:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 		break;
@@ -1657,6 +1656,9 @@ static void netback_changed(struct xenbus_device *dev,
 		if (xennet_connect(netdev) != 0)
 			break;
 		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateConnected:
 		netif_notify_peers(netdev);
 		break;

-- 
1.7.4.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox