From: Liu Ping Fan <kernelfans@gmail.com>
To: kvm@vger.kernel.org, netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, qemu-devel@nongnu.org,
Avi Kivity <avi@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
Rusty Russell <rusty@rustcorp.com.au>,
Anthony Liguori <anthony@codemonkey.ws>,
Ryan Harper <ryanh@us.ibm.com>, Shirley Ma <xma@us.ibm.com>,
Krishna Kumar <krkumar2@in.ibm.com>,
Tom Lendacky <toml@us.ibm.com>
Subject: [PATCH 1/2] [kvm/virtio]: make virtio support NUMA attr
Date: Thu, 17 May 2012 17:20:55 +0800 [thread overview]
Message-ID: <1337246456-30909-4-git-send-email-kernelfans@gmail.com> (raw)
In-Reply-To: <1337246456-30909-1-git-send-email-kernelfans@gmail.com>
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
For each numa node reported by vhost, we alloc a pair of i/o vq,
and assign them msix IRQ, and set irq affinity to a set of vcpu
in the same node.
Also we alloc vqs on PAGE_SIZE align, so they will be allocated by
host when pg fault happen on different node.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
drivers/virtio/virtio.c | 2 +-
drivers/virtio/virtio_pci.c | 35 +++++++++++++++++++++++++++++++++--
drivers/virtio/virtio_ring.c | 9 ++++++---
include/linux/virtio.h | 9 +++++++++
include/linux/virtio_config.h | 1 +
include/linux/virtio_pci.h | 9 +++++++++
6 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 984c501..79e873f 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -136,7 +136,7 @@ static int virtio_dev_probe(struct device *_d)
set_bit(i, dev->features);
dev->config->finalize_features(dev);
-
+ dev->config->get_numa_map(dev);
err = drv->probe(dev);
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2e03d41..5bb8a97 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -129,6 +129,24 @@ static void vp_finalize_features(struct virtio_device *vdev)
iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
}
+static void vp_get_numa_map(struct virtio_device *vdev)
+{
+ int i, cnt, sz = 32;
+ int cur, prev = 0;
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+ /* We only support 32 numa bits. */
+ vdev->allow_map = ioread32(vp_dev->ioaddr+VIRTIO_PCI_NUMA_MAP);
+ for (i = 0; i < sz; i++) {
+ cur = find_next_bit(&vdev->allow_map, sz, prev);
+ prev = cur;
+ if (cur >= sz)
+ break;
+ cnt++;
+ }
+ vdev->node_cnt = cnt;
+}
+
/* virtio config->get() implementation */
static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
@@ -516,6 +534,8 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u16 msix_vec;
int i, err, nvectors, allocated_vectors;
+ int irq, next, prev = 0;
+ struct cpumask *mask;
if (!use_msix) {
/* Old style: one normal interrupt for change and all vqs. */
@@ -562,14 +582,24 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
- err = request_irq(vp_dev->msix_entries[msix_vec].vector,
- vring_interrupt, 0,
+ irq = vp_dev->msix_entries[msix_vec].vector;
+ err = request_irq(irq, vring_interrupt, 0,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err) {
vp_del_vq(vqs[i]);
goto error_find;
}
+ if (i == vdev->node_cnt)
+ prev = 0;
+ /* fix me the @size */
+ next = find_next_bit(vdev->allow_map, 64, prev);
+ prev = next;
+ if (next < 64) {
+ mask = vnode_to_vcpumask(next);
+ mask = cpumask_and(mask, cpu_online_mask, mask);
+ irq_set_affinity(irq, mask);
+ }
}
return 0;
@@ -619,6 +649,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
+ .get_numa_map = vp_get_numa_map,
.bus_name = vp_bus_name,
};
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5aa43c3..5baa949 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -626,15 +626,18 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
const char *name)
{
struct vring_virtqueue *vq;
- unsigned int i;
+ unsigned int i, size, max;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
-
- vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+ size = PAGE_ALIGN (sizeof(*vq) + sizeof(void *)*num);
+ /* Allocate on PAGE boundary, so host can locate them at proper
+ * node
+ */
+ vq = kmalloc(size, GFP_KERNEL);
if (!vq)
return NULL;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8efd28a..ec992c9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,12 @@
#include <linux/mod_devicetable.h>
#include <linux/gfp.h>
+struct virtio_node {
+ int node_id;
+ struct virtqueue *rvq;
+ struct virtqueue *svq;
+};
+
/**
* virtqueue - a queue to register buffers for sending or receiving.
* @list: the chain of virtqueues for this device
@@ -22,6 +28,7 @@ struct virtqueue {
void (*callback)(struct virtqueue *vq);
const char *name;
struct virtio_device *vdev;
+ struct virtio_node *node;
void *priv;
};
@@ -66,6 +73,8 @@ struct virtio_device {
struct virtio_device_id id;
struct virtio_config_ops *config;
struct list_head vqs;
+ int node_cnt;
+ unsigned long allow_map;
/* Note that this is a Linux set_bit-style bitmap. */
unsigned long features[1];
void *priv;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7323a33..5e2fd77 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -124,6 +124,7 @@ struct virtio_config_ops {
void (*del_vqs)(struct virtio_device *);
u32 (*get_features)(struct virtio_device *vdev);
void (*finalize_features)(struct virtio_device *vdev);
+ void (*get_numa_map)(struct virtio_device *vdev);
const char *(*bus_name)(struct virtio_device *vdev);
};
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index ea66f3f..1426717 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -78,9 +78,18 @@
/* Vector value used to disable MSI for queue */
#define VIRTIO_MSI_NO_VECTOR 0xffff
+#ifdef VIRTIO_NUMA
+/* 32bits to show allowed numa */
+#define VIRTIO_PCI_NUMA_MAP 24
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev) 28
+#else
/* The remaining space is defined by each driver as the per-driver
* configuration space */
#define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20)
+#endif
/* Virtio ABI version, this must match exactly */
#define VIRTIO_PCI_ABI_VERSION 0
--
1.7.4.4
WARNING: multiple messages have this Message-ID (diff)
From: Liu Ping Fan <kernelfans@gmail.com>
To: kvm@vger.kernel.org, netdev@vger.kernel.org
Cc: Krishna Kumar <krkumar2@in.ibm.com>, Shirley Ma <xma@us.ibm.com>,
Tom Lendacky <toml@us.ibm.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
qemu-devel@nongnu.org, Rusty Russell <rusty@rustcorp.com.au>,
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
linux-kernel@vger.kernel.org, Ryan Harper <ryanh@us.ibm.com>,
Avi Kivity <avi@redhat.com>,
Anthony Liguori <anthony@codemonkey.ws>
Subject: [Qemu-devel] [PATCH 1/2] [kvm/virtio]: make virtio support NUMA attr
Date: Thu, 17 May 2012 17:20:55 +0800 [thread overview]
Message-ID: <1337246456-30909-4-git-send-email-kernelfans@gmail.com> (raw)
In-Reply-To: <1337246456-30909-1-git-send-email-kernelfans@gmail.com>
From: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
For each numa node reported by vhost, we alloc a pair of i/o vq,
and assign them msix IRQ, and set irq affinity to a set of vcpu
in the same node.
Also we alloc vqs on PAGE_SIZE align, so they will be allocated by
host when pg fault happen on different node.
Signed-off-by: Liu Ping Fan <pingfank@linux.vnet.ibm.com>
---
drivers/virtio/virtio.c | 2 +-
drivers/virtio/virtio_pci.c | 35 +++++++++++++++++++++++++++++++++--
drivers/virtio/virtio_ring.c | 9 ++++++---
include/linux/virtio.h | 9 +++++++++
include/linux/virtio_config.h | 1 +
include/linux/virtio_pci.h | 9 +++++++++
6 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 984c501..79e873f 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -136,7 +136,7 @@ static int virtio_dev_probe(struct device *_d)
set_bit(i, dev->features);
dev->config->finalize_features(dev);
-
+ dev->config->get_numa_map(dev);
err = drv->probe(dev);
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2e03d41..5bb8a97 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -129,6 +129,24 @@ static void vp_finalize_features(struct virtio_device *vdev)
iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
}
+static void vp_get_numa_map(struct virtio_device *vdev)
+{
+ int i, cnt, sz = 32;
+ int cur, prev = 0;
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+ /* We only support 32 numa bits. */
+ vdev->allow_map = ioread32(vp_dev->ioaddr+VIRTIO_PCI_NUMA_MAP);
+ for (i = 0; i < sz; i++) {
+ cur = find_next_bit(&vdev->allow_map, sz, prev);
+ prev = cur;
+ if (cur >= sz)
+ break;
+ cnt++;
+ }
+ vdev->node_cnt = cnt;
+}
+
/* virtio config->get() implementation */
static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
@@ -516,6 +534,8 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u16 msix_vec;
int i, err, nvectors, allocated_vectors;
+ int irq, next, prev = 0;
+ struct cpumask *mask;
if (!use_msix) {
/* Old style: one normal interrupt for change and all vqs. */
@@ -562,14 +582,24 @@ static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
- err = request_irq(vp_dev->msix_entries[msix_vec].vector,
- vring_interrupt, 0,
+ irq = vp_dev->msix_entries[msix_vec].vector;
+ err = request_irq(irq, vring_interrupt, 0,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err) {
vp_del_vq(vqs[i]);
goto error_find;
}
+ if (i == vdev->node_cnt)
+ prev = 0;
+ /* fix me the @size */
+ next = find_next_bit(vdev->allow_map, 64, prev);
+ prev = next;
+ if (next < 64) {
+ mask = vnode_to_vcpumask(next);
+ mask = cpumask_and(mask, cpu_online_mask, mask);
+ irq_set_affinity(irq, mask);
+ }
}
return 0;
@@ -619,6 +649,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
+ .get_numa_map = vp_get_numa_map,
.bus_name = vp_bus_name,
};
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5aa43c3..5baa949 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -626,15 +626,18 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
const char *name)
{
struct vring_virtqueue *vq;
- unsigned int i;
+ unsigned int i, size, max;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
-
- vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+ size = PAGE_ALIGN (sizeof(*vq) + sizeof(void *)*num);
+ /* Allocate on PAGE boundary, so host can locate them at proper
+ * node
+ */
+ vq = kmalloc(size, GFP_KERNEL);
if (!vq)
return NULL;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8efd28a..ec992c9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -9,6 +9,12 @@
#include <linux/mod_devicetable.h>
#include <linux/gfp.h>
+struct virtio_node {
+ int node_id;
+ struct virtqueue *rvq;
+ struct virtqueue *svq;
+};
+
/**
* virtqueue - a queue to register buffers for sending or receiving.
* @list: the chain of virtqueues for this device
@@ -22,6 +28,7 @@ struct virtqueue {
void (*callback)(struct virtqueue *vq);
const char *name;
struct virtio_device *vdev;
+ struct virtio_node *node;
void *priv;
};
@@ -66,6 +73,8 @@ struct virtio_device {
struct virtio_device_id id;
struct virtio_config_ops *config;
struct list_head vqs;
+ int node_cnt;
+ unsigned long allow_map;
/* Note that this is a Linux set_bit-style bitmap. */
unsigned long features[1];
void *priv;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7323a33..5e2fd77 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -124,6 +124,7 @@ struct virtio_config_ops {
void (*del_vqs)(struct virtio_device *);
u32 (*get_features)(struct virtio_device *vdev);
void (*finalize_features)(struct virtio_device *vdev);
+ void (*get_numa_map)(struct virtio_device *vdev);
const char *(*bus_name)(struct virtio_device *vdev);
};
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index ea66f3f..1426717 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -78,9 +78,18 @@
/* Vector value used to disable MSI for queue */
#define VIRTIO_MSI_NO_VECTOR 0xffff
+#ifdef VIRTIO_NUMA
+/* 32bits to show allowed numa */
+#define VIRTIO_PCI_NUMA_MAP 24
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev) 28
+#else
/* The remaining space is defined by each driver as the per-driver
* configuration space */
#define VIRTIO_PCI_CONFIG(dev) ((dev)->msix_enabled ? 24 : 20)
+#endif
/* Virtio ABI version, this must match exactly */
#define VIRTIO_PCI_ABI_VERSION 0
--
1.7.4.4
next prev parent reply other threads:[~2012-05-17 9:20 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-17 9:20 [RFC:kvm] export host NUMA info to guest & make emulated device NUMA attr Liu Ping Fan
2012-05-17 9:20 ` [Qemu-devel] " Liu Ping Fan
2012-05-17 9:20 ` Liu Ping Fan
2012-05-17 9:20 ` [PATCH 1/2] [kvm/vhost]: make vhost support NUMA model Liu Ping Fan
2012-05-17 9:20 ` [Qemu-devel] " Liu Ping Fan
2012-05-17 9:20 ` [PATCH 2/2] [kvm/vhost-net]: make vhost net own NUMA attribute Liu Ping Fan
2012-05-17 9:20 ` [Qemu-devel] " Liu Ping Fan
2012-05-17 9:20 ` Liu Ping Fan
2012-05-17 9:20 ` Liu Ping Fan [this message]
2012-05-17 9:20 ` [Qemu-devel] [PATCH 1/2] [kvm/virtio]: make virtio support NUMA attr Liu Ping Fan
2012-05-17 9:20 ` [PATCH 2/2] [net/virtio_net]: make virtio_net support NUMA info Liu Ping Fan
2012-05-17 9:20 ` [Qemu-devel] " Liu Ping Fan
2012-05-18 16:14 ` [RFC:kvm] export host NUMA info to guest & make emulated device NUMA attr Shirley Ma
2012-05-18 16:14 ` [Qemu-devel] " Shirley Ma
2012-05-22 9:28 ` Liu ping fan
2012-05-22 9:28 ` [Qemu-devel] " Liu ping fan
2012-05-23 14:52 ` Andrew Theurer
2012-05-23 14:52 ` [Qemu-devel] " Andrew Theurer
2012-05-23 15:16 ` Michael S. Tsirkin
2012-05-23 15:16 ` [Qemu-devel] " Michael S. Tsirkin
2012-05-23 15:16 ` Michael S. Tsirkin
2012-05-25 3:29 ` Liu ping fan
2012-05-25 3:29 ` [Qemu-devel] " Liu ping fan
2012-05-25 3:29 ` Liu ping fan
2012-05-25 4:05 ` Liu ping fan
2012-05-25 4:05 ` [Qemu-devel] " Liu ping fan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1337246456-30909-4-git-send-email-kernelfans@gmail.com \
--to=kernelfans@gmail.com \
--cc=anthony@codemonkey.ws \
--cc=avi@redhat.com \
--cc=krkumar2@in.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=qemu-devel@nongnu.org \
--cc=rusty@rustcorp.com.au \
--cc=ryanh@us.ibm.com \
--cc=toml@us.ibm.com \
--cc=vatsa@linux.vnet.ibm.com \
--cc=xma@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.