* [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
@ 2019-10-11 13:45 ` Michael S. Tsirkin
2019-10-12 7:28 ` Jason Wang
2019-10-12 7:28 ` Jason Wang
2019-10-11 13:45 ` Michael S. Tsirkin
` (6 subsequent siblings)
7 siblings, 2 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-11 13:45 UTC (permalink / raw)
To: linux-kernel; +Cc: Jason Wang, kvm, virtualization, netdev
The idea is to support multiple ring formats by converting
to a format-independent array of descriptors.
This costs extra cycles, but we gain in ability
to fetch a batch of descriptors in one go, which
is good for code cache locality.
To simplify benchmarking, I kept the old code
around so one can switch back and forth by
writing into a module parameter.
This will go away in the final submission.
This patch causes a minor performance degradation,
it's been kept as simple as possible for ease of review.
Next patch gets us back the performance by adding batching.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/test.c | 17 ++-
drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
drivers/vhost/vhost.h | 16 +++
3 files changed, 327 insertions(+), 5 deletions(-)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 056308008288..39a018a7af2d 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -18,6 +18,9 @@
#include "test.h"
#include "vhost.h"
+static int newcode = 0;
+module_param(newcode, int, 0644);
+
/* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_TEST_WEIGHT 0x80000
@@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
vhost_disable_notify(&n->dev, vq);
for (;;) {
- head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov),
- &out, &in,
- NULL, NULL);
+ if (newcode)
+ head = vhost_get_vq_desc_batch(vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
+ else
+ head = vhost_get_vq_desc(vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 36ca2cf419bf..36661d6cb51f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
vq->num = 1;
+ vq->ndescs = 0;
vq->desc = NULL;
vq->avail = NULL;
vq->used = NULL;
@@ -369,6 +370,9 @@ static int vhost_worker(void *data)
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
{
+ kfree(vq->descs);
+ vq->descs = NULL;
+ vq->max_descs = 0;
kfree(vq->indirect);
vq->indirect = NULL;
kfree(vq->log);
@@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
+ vq->max_descs = dev->iov_limit;
+ vq->descs = kmalloc_array(vq->max_descs,
+ sizeof(*vq->descs),
+ GFP_KERNEL);
vq->indirect = kmalloc_array(UIO_MAXIOV,
sizeof(*vq->indirect),
GFP_KERNEL);
@@ -392,7 +400,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
GFP_KERNEL);
vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
GFP_KERNEL);
- if (!vq->indirect || !vq->log || !vq->heads)
+ if (!vq->indirect || !vq->log || !vq->heads || !vq->descs)
goto err_nomem;
}
return 0;
@@ -2346,6 +2354,295 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
}
EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
+static struct vhost_desc *peek_split_desc(struct vhost_virtqueue *vq)
+{
+ BUG_ON(!vq->ndescs);
+ return &vq->descs[vq->ndescs - 1];
+}
+
+static void pop_split_desc(struct vhost_virtqueue *vq)
+{
+ BUG_ON(!vq->ndescs);
+ --vq->ndescs;
+}
+
+static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
+{
+ struct vhost_desc *h;
+
+ if (unlikely(vq->ndescs >= vq->max_descs))
+ return -EINVAL;
+ h = &vq->descs[vq->ndescs++];
+ h->addr = vhost64_to_cpu(vq, desc->addr);
+ h->len = vhost32_to_cpu(vq, desc->len);
+ h->flags = vhost16_to_cpu(vq, desc->flags);
+ h->id = id;
+
+ return 0;
+}
+
+static int fetch_indirect_descs(struct vhost_virtqueue *vq,
+ struct vhost_desc *indirect,
+ u16 head)
+{
+ struct vring_desc desc;
+ unsigned int i = 0, count, found = 0;
+ u32 len = indirect->len;
+ struct iov_iter from;
+ int ret;
+
+ /* Sanity check */
+ if (unlikely(len % sizeof desc)) {
+ vq_err(vq, "Invalid length in indirect descriptor: "
+ "len 0x%llx not multiple of 0x%zx\n",
+ (unsigned long long)len,
+ sizeof desc);
+ return -EINVAL;
+ }
+
+ ret = translate_desc(vq, indirect->addr, len, vq->indirect,
+ UIO_MAXIOV, VHOST_ACCESS_RO);
+ if (unlikely(ret < 0)) {
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d in indirect.\n", ret);
+ return ret;
+ }
+ iov_iter_init(&from, READ, vq->indirect, ret, len);
+
+ /* We will use the result as an address to read from, so most
+ * architectures only need a compiler barrier here. */
+ read_barrier_depends();
+
+ count = len / sizeof desc;
+ /* Buffers are chained via a 16 bit next field, so
+ * we can have at most 2^16 of these. */
+ if (unlikely(count > USHRT_MAX + 1)) {
+ vq_err(vq, "Indirect buffer length too big: %d\n",
+ indirect->len);
+ return -E2BIG;
+ }
+ if (unlikely(vq->ndescs + count > vq->max_descs)) {
+ vq_err(vq, "Too many indirect + direct descs: %d + %d\n",
+ vq->ndescs, indirect->len);
+ return -E2BIG;
+ }
+
+ do {
+ if (unlikely(++found > count)) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "indirect size %u\n",
+ i, count);
+ return -EINVAL;
+ }
+ if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
+ vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+ if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) {
+ vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+
+ push_split_desc(vq, &desc, head);
+ } while ((i = next_desc(vq, &desc)) != -1);
+ return 0;
+}
+
+static int fetch_descs(struct vhost_virtqueue *vq)
+{
+ struct vring_desc desc;
+ unsigned int i, head, found = 0;
+ u16 last_avail_idx;
+ __virtio16 avail_idx;
+ __virtio16 ring_head;
+ int ret;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ last_avail_idx = vq->last_avail_idx;
+
+ if (vq->avail_idx == vq->last_avail_idx) {
+ if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
+ vq_err(vq, "Failed to access avail idx at %p\n",
+ &vq->avail->idx);
+ return -EFAULT;
+ }
+ vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+
+ if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+ vq_err(vq, "Guest moved used index from %u to %u",
+ last_avail_idx, vq->avail_idx);
+ return -EFAULT;
+ }
+
+ /* If there's nothing new since last we looked, return
+ * invalid.
+ */
+ if (vq->avail_idx == last_avail_idx)
+ return vq->num;
+
+ /* Only get avail ring entries after they have been
+ * exposed by guest.
+ */
+ smp_rmb();
+ }
+
+ /* Grab the next descriptor number they're advertising */
+ if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
+ vq_err(vq, "Failed to read head: idx %d address %p\n",
+ last_avail_idx,
+ &vq->avail->ring[last_avail_idx % vq->num]);
+ return -EFAULT;
+ }
+
+ head = vhost16_to_cpu(vq, ring_head);
+
+ /* If their number is silly, that's an error. */
+ if (unlikely(head >= vq->num)) {
+ vq_err(vq, "Guest says index %u > %u is available",
+ head, vq->num);
+ return -EINVAL;
+ }
+
+ i = head;
+ do {
+ if (unlikely(i >= vq->num)) {
+ vq_err(vq, "Desc index is %u > %u, head = %u",
+ i, vq->num, head);
+ return -EINVAL;
+ }
+ if (unlikely(++found > vq->num)) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "vq size %u head %u\n",
+ i, vq->num, head);
+ return -EINVAL;
+ }
+ ret = vhost_get_desc(vq, &desc, i);
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc + i);
+ return -EFAULT;
+ }
+ ret = push_split_desc(vq, &desc, head);
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to save descriptor: idx %d\n", i);
+ return -EINVAL;
+ }
+ } while ((i = next_desc(vq, &desc)) != -1);
+
+ /* On success, increment avail index. */
+ vq->last_avail_idx++;
+
+ /* Assume notifications from guest are disabled at this point,
+ * if they aren't we would need to update avail_event index. */
+ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
+
+ return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found. A negative code is
+ * returned on error. */
+int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ int ret = fetch_descs(vq);
+ struct vhost_desc *last;
+ u16 id;
+ int i;
+
+ if (ret)
+ return ret;
+
+ last = peek_split_desc(vq);
+ id = last->id;
+
+ if (last->flags & VRING_DESC_F_INDIRECT) {
+ int r;
+
+ pop_split_desc(vq);
+ r = fetch_indirect_descs(vq, last, id);
+ if (unlikely(r < 0)) {
+ if (r != -EAGAIN)
+ vq_err(vq, "Failure detected "
+ "in indirect descriptor at idx %d\n", id);
+ return ret;
+ }
+ }
+
+ /* Now convert to IOV */
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+ if (unlikely(log))
+ *log_num = 0;
+
+ for (i = 0; i < vq->ndescs; ++i) {
+ unsigned iov_count = *in_num + *out_num;
+ struct vhost_desc *desc = &vq->descs[i];
+ int access;
+
+ if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
+ vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
+ desc->flags, desc->id);
+ ret = -EINVAL;
+ goto err;
+ }
+ if (desc->flags & VRING_DESC_F_WRITE)
+ access = VHOST_ACCESS_WO;
+ else
+ access = VHOST_ACCESS_RO;
+ ret = translate_desc(vq, desc->addr,
+ desc->len, iov + iov_count,
+ iov_size - iov_count, access);
+ if (unlikely(ret < 0)) {
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d descriptor idx %d\n",
+ ret, i);
+ goto err;
+ }
+ if (access == VHOST_ACCESS_WO) {
+ /* If this is an input descriptor,
+ * increment that count. */
+ *in_num += ret;
+ if (unlikely(log && ret)) {
+ log[*log_num].addr = desc->addr;
+ log[*log_num].len = desc->len;
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (unlikely(*in_num)) {
+ vq_err(vq, "Descriptor has out after in: "
+ "idx %d\n", i);
+ ret = -EINVAL;
+ goto err;
+ }
+ *out_num += ret;
+ }
+ }
+
+ ret = id;
+ vq->ndescs = 0;
+
+ return ret;
+
+err:
+ vhost_discard_vq_desc(vq, 1);
+ vq->ndescs = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vhost_get_vq_desc_batch);
+
/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
{
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index e9ed2722b633..1724f61b6c2d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -80,6 +80,13 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3,
};
+struct vhost_desc {
+ u64 addr;
+ u32 len;
+ u16 flags; /* VRING_DESC_F_WRITE, VRING_DESC_F_NEXT */
+ u16 id;
+};
+
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -90,6 +97,11 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
+
+ struct vhost_desc *descs;
+ int ndescs;
+ int max_descs;
+
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
@@ -190,6 +202,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
bool vhost_log_access_ok(struct vhost_dev *);
+int vhost_get_vq_desc_batch(struct vhost_virtqueue *,
+ struct iovec iov[], unsigned int iov_count,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num);
int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_count,
unsigned int *out_num, unsigned int *in_num,
--
MST
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-11 13:45 ` [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct Michael S. Tsirkin
@ 2019-10-12 7:28 ` Jason Wang
2019-10-12 20:27 ` Michael S. Tsirkin
2019-10-12 20:27 ` Michael S. Tsirkin
2019-10-12 7:28 ` Jason Wang
1 sibling, 2 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 7:28 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel; +Cc: kvm, virtualization, netdev
On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> The idea is to support multiple ring formats by converting
> to a format-independent array of descriptors.
>
> This costs extra cycles, but we gain in ability
> to fetch a batch of descriptors in one go, which
> is good for code cache locality.
>
> To simplify benchmarking, I kept the old code
> around so one can switch back and forth by
> writing into a module parameter.
> This will go away in the final submission.
>
> This patch causes a minor performance degradation,
> it's been kept as simple as possible for ease of review.
> Next patch gets us back the performance by adding batching.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/vhost/test.c | 17 ++-
> drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 16 +++
> 3 files changed, 327 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index 056308008288..39a018a7af2d 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -18,6 +18,9 @@
> #include "test.h"
> #include "vhost.h"
>
> +static int newcode = 0;
> +module_param(newcode, int, 0644);
> +
> /* Max number of bytes transferred before requeueing the job.
> * Using this limit prevents one virtqueue from starving others. */
> #define VHOST_TEST_WEIGHT 0x80000
> @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
> vhost_disable_notify(&n->dev, vq);
>
> for (;;) {
> - head = vhost_get_vq_desc(vq, vq->iov,
> - ARRAY_SIZE(vq->iov),
> - &out, &in,
> - NULL, NULL);
> + if (newcode)
> + head = vhost_get_vq_desc_batch(vq, vq->iov,
> + ARRAY_SIZE(vq->iov),
> + &out, &in,
> + NULL, NULL);
> + else
> + head = vhost_get_vq_desc(vq, vq->iov,
> + ARRAY_SIZE(vq->iov),
> + &out, &in,
> + NULL, NULL);
> /* On error, stop handling until the next kick. */
> if (unlikely(head < 0))
> break;
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 36ca2cf419bf..36661d6cb51f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> struct vhost_virtqueue *vq)
> {
> vq->num = 1;
> + vq->ndescs = 0;
> vq->desc = NULL;
> vq->avail = NULL;
> vq->used = NULL;
> @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
>
> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> {
> + kfree(vq->descs);
> + vq->descs = NULL;
> + vq->max_descs = 0;
> kfree(vq->indirect);
> vq->indirect = NULL;
> kfree(vq->log);
> @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> + vq->max_descs = dev->iov_limit;
> + vq->descs = kmalloc_array(vq->max_descs,
> + sizeof(*vq->descs),
> + GFP_KERNEL);
Is iov_limit too much here? It can obviously increase the footprint. I
guess the batching can only be done for descriptor without indirect or
next set. Then we may batch 16 or 64.
Thanks
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-12 7:28 ` Jason Wang
@ 2019-10-12 20:27 ` Michael S. Tsirkin
2019-10-12 20:27 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 20:27 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization
On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > The idea is to support multiple ring formats by converting
> > to a format-independent array of descriptors.
> >
> > This costs extra cycles, but we gain in ability
> > to fetch a batch of descriptors in one go, which
> > is good for code cache locality.
> >
> > To simplify benchmarking, I kept the old code
> > around so one can switch back and forth by
> > writing into a module parameter.
> > This will go away in the final submission.
> >
> > This patch causes a minor performance degradation,
> > it's been kept as simple as possible for ease of review.
> > Next patch gets us back the performance by adding batching.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > drivers/vhost/test.c | 17 ++-
> > drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 16 +++
> > 3 files changed, 327 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > index 056308008288..39a018a7af2d 100644
> > --- a/drivers/vhost/test.c
> > +++ b/drivers/vhost/test.c
> > @@ -18,6 +18,9 @@
> > #include "test.h"
> > #include "vhost.h"
> > +static int newcode = 0;
> > +module_param(newcode, int, 0644);
> > +
> > /* Max number of bytes transferred before requeueing the job.
> > * Using this limit prevents one virtqueue from starving others. */
> > #define VHOST_TEST_WEIGHT 0x80000
> > @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
> > vhost_disable_notify(&n->dev, vq);
> > for (;;) {
> > - head = vhost_get_vq_desc(vq, vq->iov,
> > - ARRAY_SIZE(vq->iov),
> > - &out, &in,
> > - NULL, NULL);
> > + if (newcode)
> > + head = vhost_get_vq_desc_batch(vq, vq->iov,
> > + ARRAY_SIZE(vq->iov),
> > + &out, &in,
> > + NULL, NULL);
> > + else
> > + head = vhost_get_vq_desc(vq, vq->iov,
> > + ARRAY_SIZE(vq->iov),
> > + &out, &in,
> > + NULL, NULL);
> > /* On error, stop handling until the next kick. */
> > if (unlikely(head < 0))
> > break;
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 36ca2cf419bf..36661d6cb51f 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > struct vhost_virtqueue *vq)
> > {
> > vq->num = 1;
> > + vq->ndescs = 0;
> > vq->desc = NULL;
> > vq->avail = NULL;
> > vq->used = NULL;
> > @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
> > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> > {
> > + kfree(vq->descs);
> > + vq->descs = NULL;
> > + vq->max_descs = 0;
> > kfree(vq->indirect);
> > vq->indirect = NULL;
> > kfree(vq->log);
> > @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> > for (i = 0; i < dev->nvqs; ++i) {
> > vq = dev->vqs[i];
> > + vq->max_descs = dev->iov_limit;
> > + vq->descs = kmalloc_array(vq->max_descs,
> > + sizeof(*vq->descs),
> > + GFP_KERNEL);
>
>
> Is iov_limit too much here? It can obviously increase the footprint. I guess
> the batching can only be done for descriptor without indirect or next set.
> Then we may batch 16 or 64.
>
> Thanks
Yes, next patch only batches up to 64. But we do need iov_limit because
guest can pass a long chain of scatter/gather.
We already have iovecs in a huge array so this does not look like
a big deal. If we ever teach the code to avoid the huge
iov arrays by handling huge s/g lists piece by piece,
we can make the desc array smaller at the same point.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-12 7:28 ` Jason Wang
2019-10-12 20:27 ` Michael S. Tsirkin
@ 2019-10-12 20:27 ` Michael S. Tsirkin
2019-10-14 1:43 ` Jason Wang
2019-10-14 1:43 ` Jason Wang
1 sibling, 2 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 20:27 UTC (permalink / raw)
To: Jason Wang; +Cc: linux-kernel, kvm, virtualization, netdev
On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > The idea is to support multiple ring formats by converting
> > to a format-independent array of descriptors.
> >
> > This costs extra cycles, but we gain in ability
> > to fetch a batch of descriptors in one go, which
> > is good for code cache locality.
> >
> > To simplify benchmarking, I kept the old code
> > around so one can switch back and forth by
> > writing into a module parameter.
> > This will go away in the final submission.
> >
> > This patch causes a minor performance degradation,
> > it's been kept as simple as possible for ease of review.
> > Next patch gets us back the performance by adding batching.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > drivers/vhost/test.c | 17 ++-
> > drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 16 +++
> > 3 files changed, 327 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > index 056308008288..39a018a7af2d 100644
> > --- a/drivers/vhost/test.c
> > +++ b/drivers/vhost/test.c
> > @@ -18,6 +18,9 @@
> > #include "test.h"
> > #include "vhost.h"
> > +static int newcode = 0;
> > +module_param(newcode, int, 0644);
> > +
> > /* Max number of bytes transferred before requeueing the job.
> > * Using this limit prevents one virtqueue from starving others. */
> > #define VHOST_TEST_WEIGHT 0x80000
> > @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
> > vhost_disable_notify(&n->dev, vq);
> > for (;;) {
> > - head = vhost_get_vq_desc(vq, vq->iov,
> > - ARRAY_SIZE(vq->iov),
> > - &out, &in,
> > - NULL, NULL);
> > + if (newcode)
> > + head = vhost_get_vq_desc_batch(vq, vq->iov,
> > + ARRAY_SIZE(vq->iov),
> > + &out, &in,
> > + NULL, NULL);
> > + else
> > + head = vhost_get_vq_desc(vq, vq->iov,
> > + ARRAY_SIZE(vq->iov),
> > + &out, &in,
> > + NULL, NULL);
> > /* On error, stop handling until the next kick. */
> > if (unlikely(head < 0))
> > break;
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 36ca2cf419bf..36661d6cb51f 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > struct vhost_virtqueue *vq)
> > {
> > vq->num = 1;
> > + vq->ndescs = 0;
> > vq->desc = NULL;
> > vq->avail = NULL;
> > vq->used = NULL;
> > @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
> > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> > {
> > + kfree(vq->descs);
> > + vq->descs = NULL;
> > + vq->max_descs = 0;
> > kfree(vq->indirect);
> > vq->indirect = NULL;
> > kfree(vq->log);
> > @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> > for (i = 0; i < dev->nvqs; ++i) {
> > vq = dev->vqs[i];
> > + vq->max_descs = dev->iov_limit;
> > + vq->descs = kmalloc_array(vq->max_descs,
> > + sizeof(*vq->descs),
> > + GFP_KERNEL);
>
>
> Is iov_limit too much here? It can obviously increase the footprint. I guess
> the batching can only be done for descriptor without indirect or next set.
> Then we may batch 16 or 64.
>
> Thanks
Yes, next patch only batches up to 64. But we do need iov_limit because
guest can pass a long chain of scatter/gather.
We already have iovecs in a huge array so this does not look like
a big deal. If we ever teach the code to avoid the huge
iov arrays by handling huge s/g lists piece by piece,
we can make the desc array smaller at the same point.
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-12 20:27 ` Michael S. Tsirkin
@ 2019-10-14 1:43 ` Jason Wang
2019-10-14 1:43 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-14 1:43 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
On 2019/10/13 上午4:27, Michael S. Tsirkin wrote:
> On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
>> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
>>> The idea is to support multiple ring formats by converting
>>> to a format-independent array of descriptors.
>>>
>>> This costs extra cycles, but we gain in ability
>>> to fetch a batch of descriptors in one go, which
>>> is good for code cache locality.
>>>
>>> To simplify benchmarking, I kept the old code
>>> around so one can switch back and forth by
>>> writing into a module parameter.
>>> This will go away in the final submission.
>>>
>>> This patch causes a minor performance degradation,
>>> it's been kept as simple as possible for ease of review.
>>> Next patch gets us back the performance by adding batching.
>>>
>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> ---
>>> drivers/vhost/test.c | 17 ++-
>>> drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
>>> drivers/vhost/vhost.h | 16 +++
>>> 3 files changed, 327 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>> index 056308008288..39a018a7af2d 100644
>>> --- a/drivers/vhost/test.c
>>> +++ b/drivers/vhost/test.c
>>> @@ -18,6 +18,9 @@
>>> #include "test.h"
>>> #include "vhost.h"
>>> +static int newcode = 0;
>>> +module_param(newcode, int, 0644);
>>> +
>>> /* Max number of bytes transferred before requeueing the job.
>>> * Using this limit prevents one virtqueue from starving others. */
>>> #define VHOST_TEST_WEIGHT 0x80000
>>> @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
>>> vhost_disable_notify(&n->dev, vq);
>>> for (;;) {
>>> - head = vhost_get_vq_desc(vq, vq->iov,
>>> - ARRAY_SIZE(vq->iov),
>>> - &out, &in,
>>> - NULL, NULL);
>>> + if (newcode)
>>> + head = vhost_get_vq_desc_batch(vq, vq->iov,
>>> + ARRAY_SIZE(vq->iov),
>>> + &out, &in,
>>> + NULL, NULL);
>>> + else
>>> + head = vhost_get_vq_desc(vq, vq->iov,
>>> + ARRAY_SIZE(vq->iov),
>>> + &out, &in,
>>> + NULL, NULL);
>>> /* On error, stop handling until the next kick. */
>>> if (unlikely(head < 0))
>>> break;
>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>> index 36ca2cf419bf..36661d6cb51f 100644
>>> --- a/drivers/vhost/vhost.c
>>> +++ b/drivers/vhost/vhost.c
>>> @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>> struct vhost_virtqueue *vq)
>>> {
>>> vq->num = 1;
>>> + vq->ndescs = 0;
>>> vq->desc = NULL;
>>> vq->avail = NULL;
>>> vq->used = NULL;
>>> @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
>>> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
>>> {
>>> + kfree(vq->descs);
>>> + vq->descs = NULL;
>>> + vq->max_descs = 0;
>>> kfree(vq->indirect);
>>> vq->indirect = NULL;
>>> kfree(vq->log);
>>> @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>> for (i = 0; i < dev->nvqs; ++i) {
>>> vq = dev->vqs[i];
>>> + vq->max_descs = dev->iov_limit;
>>> + vq->descs = kmalloc_array(vq->max_descs,
>>> + sizeof(*vq->descs),
>>> + GFP_KERNEL);
>>
>> Is iov_limit too much here? It can obviously increase the footprint. I guess
>> the batching can only be done for descriptor without indirect or next set.
>> Then we may batch 16 or 64.
>>
>> Thanks
> Yes, next patch only batches up to 64. But we do need iov_limit because
> guest can pass a long chain of scatter/gather.
> We already have iovecs in a huge array so this does not look like
> a big deal. If we ever teach the code to avoid the huge
> iov arrays by handling huge s/g lists piece by piece,
> we can make the desc array smaller at the same point.
>
Another possible issue, if we try to batch descriptor chain when we've
already batched some descriptors, we may reach the limit then some of
the descriptors might need re-read.
Or we may need circular index (head, tail) in this case?
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-12 20:27 ` Michael S. Tsirkin
2019-10-14 1:43 ` Jason Wang
@ 2019-10-14 1:43 ` Jason Wang
2019-10-15 20:20 ` Michael S. Tsirkin
2019-10-15 20:20 ` Michael S. Tsirkin
1 sibling, 2 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-14 1:43 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: linux-kernel, kvm, virtualization, netdev
On 2019/10/13 上午4:27, Michael S. Tsirkin wrote:
> On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
>> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
>>> The idea is to support multiple ring formats by converting
>>> to a format-independent array of descriptors.
>>>
>>> This costs extra cycles, but we gain in ability
>>> to fetch a batch of descriptors in one go, which
>>> is good for code cache locality.
>>>
>>> To simplify benchmarking, I kept the old code
>>> around so one can switch back and forth by
>>> writing into a module parameter.
>>> This will go away in the final submission.
>>>
>>> This patch causes a minor performance degradation,
>>> it's been kept as simple as possible for ease of review.
>>> Next patch gets us back the performance by adding batching.
>>>
>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> ---
>>> drivers/vhost/test.c | 17 ++-
>>> drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
>>> drivers/vhost/vhost.h | 16 +++
>>> 3 files changed, 327 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>> index 056308008288..39a018a7af2d 100644
>>> --- a/drivers/vhost/test.c
>>> +++ b/drivers/vhost/test.c
>>> @@ -18,6 +18,9 @@
>>> #include "test.h"
>>> #include "vhost.h"
>>> +static int newcode = 0;
>>> +module_param(newcode, int, 0644);
>>> +
>>> /* Max number of bytes transferred before requeueing the job.
>>> * Using this limit prevents one virtqueue from starving others. */
>>> #define VHOST_TEST_WEIGHT 0x80000
>>> @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
>>> vhost_disable_notify(&n->dev, vq);
>>> for (;;) {
>>> - head = vhost_get_vq_desc(vq, vq->iov,
>>> - ARRAY_SIZE(vq->iov),
>>> - &out, &in,
>>> - NULL, NULL);
>>> + if (newcode)
>>> + head = vhost_get_vq_desc_batch(vq, vq->iov,
>>> + ARRAY_SIZE(vq->iov),
>>> + &out, &in,
>>> + NULL, NULL);
>>> + else
>>> + head = vhost_get_vq_desc(vq, vq->iov,
>>> + ARRAY_SIZE(vq->iov),
>>> + &out, &in,
>>> + NULL, NULL);
>>> /* On error, stop handling until the next kick. */
>>> if (unlikely(head < 0))
>>> break;
>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>> index 36ca2cf419bf..36661d6cb51f 100644
>>> --- a/drivers/vhost/vhost.c
>>> +++ b/drivers/vhost/vhost.c
>>> @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>> struct vhost_virtqueue *vq)
>>> {
>>> vq->num = 1;
>>> + vq->ndescs = 0;
>>> vq->desc = NULL;
>>> vq->avail = NULL;
>>> vq->used = NULL;
>>> @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
>>> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
>>> {
>>> + kfree(vq->descs);
>>> + vq->descs = NULL;
>>> + vq->max_descs = 0;
>>> kfree(vq->indirect);
>>> vq->indirect = NULL;
>>> kfree(vq->log);
>>> @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>> for (i = 0; i < dev->nvqs; ++i) {
>>> vq = dev->vqs[i];
>>> + vq->max_descs = dev->iov_limit;
>>> + vq->descs = kmalloc_array(vq->max_descs,
>>> + sizeof(*vq->descs),
>>> + GFP_KERNEL);
>>
>> Is iov_limit too much here? It can obviously increase the footprint. I guess
>> the batching can only be done for descriptor without indirect or next set.
>> Then we may batch 16 or 64.
>>
>> Thanks
> Yes, next patch only batches up to 64. But we do need iov_limit because
> guest can pass a long chain of scatter/gather.
> We already have iovecs in a huge array so this does not look like
> a big deal. If we ever teach the code to avoid the huge
> iov arrays by handling huge s/g lists piece by piece,
> we can make the desc array smaller at the same point.
>
Another possible issue, if we try to batch descriptor chain when we've
already batched some descriptors, we may reach the limit then some of
the descriptors might need re-read.
Or we may need circular index (head, tail) in this case?
Thanks
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-14 1:43 ` Jason Wang
@ 2019-10-15 20:20 ` Michael S. Tsirkin
2019-10-16 4:38 ` Jason Wang
2019-10-16 4:38 ` Jason Wang
2019-10-15 20:20 ` Michael S. Tsirkin
1 sibling, 2 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-15 20:20 UTC (permalink / raw)
To: Jason Wang; +Cc: linux-kernel, kvm, virtualization, netdev
On Mon, Oct 14, 2019 at 09:43:25AM +0800, Jason Wang wrote:
>
> On 2019/10/13 上午4:27, Michael S. Tsirkin wrote:
> > On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
> > > On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > > > The idea is to support multiple ring formats by converting
> > > > to a format-independent array of descriptors.
> > > >
> > > > This costs extra cycles, but we gain in ability
> > > > to fetch a batch of descriptors in one go, which
> > > > is good for code cache locality.
> > > >
> > > > To simplify benchmarking, I kept the old code
> > > > around so one can switch back and forth by
> > > > writing into a module parameter.
> > > > This will go away in the final submission.
> > > >
> > > > This patch causes a minor performance degradation,
> > > > it's been kept as simple as possible for ease of review.
> > > > Next patch gets us back the performance by adding batching.
> > > >
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > > drivers/vhost/test.c | 17 ++-
> > > > drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
> > > > drivers/vhost/vhost.h | 16 +++
> > > > 3 files changed, 327 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > > > index 056308008288..39a018a7af2d 100644
> > > > --- a/drivers/vhost/test.c
> > > > +++ b/drivers/vhost/test.c
> > > > @@ -18,6 +18,9 @@
> > > > #include "test.h"
> > > > #include "vhost.h"
> > > > +static int newcode = 0;
> > > > +module_param(newcode, int, 0644);
> > > > +
> > > > /* Max number of bytes transferred before requeueing the job.
> > > > * Using this limit prevents one virtqueue from starving others. */
> > > > #define VHOST_TEST_WEIGHT 0x80000
> > > > @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
> > > > vhost_disable_notify(&n->dev, vq);
> > > > for (;;) {
> > > > - head = vhost_get_vq_desc(vq, vq->iov,
> > > > - ARRAY_SIZE(vq->iov),
> > > > - &out, &in,
> > > > - NULL, NULL);
> > > > + if (newcode)
> > > > + head = vhost_get_vq_desc_batch(vq, vq->iov,
> > > > + ARRAY_SIZE(vq->iov),
> > > > + &out, &in,
> > > > + NULL, NULL);
> > > > + else
> > > > + head = vhost_get_vq_desc(vq, vq->iov,
> > > > + ARRAY_SIZE(vq->iov),
> > > > + &out, &in,
> > > > + NULL, NULL);
> > > > /* On error, stop handling until the next kick. */
> > > > if (unlikely(head < 0))
> > > > break;
> > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > > index 36ca2cf419bf..36661d6cb51f 100644
> > > > --- a/drivers/vhost/vhost.c
> > > > +++ b/drivers/vhost/vhost.c
> > > > @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > > > struct vhost_virtqueue *vq)
> > > > {
> > > > vq->num = 1;
> > > > + vq->ndescs = 0;
> > > > vq->desc = NULL;
> > > > vq->avail = NULL;
> > > > vq->used = NULL;
> > > > @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
> > > > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> > > > {
> > > > + kfree(vq->descs);
> > > > + vq->descs = NULL;
> > > > + vq->max_descs = 0;
> > > > kfree(vq->indirect);
> > > > vq->indirect = NULL;
> > > > kfree(vq->log);
> > > > @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> > > > for (i = 0; i < dev->nvqs; ++i) {
> > > > vq = dev->vqs[i];
> > > > + vq->max_descs = dev->iov_limit;
> > > > + vq->descs = kmalloc_array(vq->max_descs,
> > > > + sizeof(*vq->descs),
> > > > + GFP_KERNEL);
> > >
> > > Is iov_limit too much here? It can obviously increase the footprint. I guess
> > > the batching can only be done for descriptor without indirect or next set.
> > > Then we may batch 16 or 64.
> > >
> > > Thanks
> > Yes, next patch only batches up to 64. But we do need iov_limit because
> > guest can pass a long chain of scatter/gather.
> > We already have iovecs in a huge array so this does not look like
> > a big deal. If we ever teach the code to avoid the huge
> > iov arrays by handling huge s/g lists piece by piece,
> > we can make the desc array smaller at the same point.
> >
>
> Another possible issue, if we try to batch descriptor chain when we've
> already batched some descriptors, we may reach the limit then some of the
> descriptors might need re-read.
>
> Or we may need circular index (head, tail) in this case?
>
> Thanks
We never supported more than IOV_MAX descriptors.
And we don't batch more than iov_limit - IOV_MAX.
so buffer never overflows.
--
MST
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-15 20:20 ` Michael S. Tsirkin
@ 2019-10-16 4:38 ` Jason Wang
2019-10-16 4:38 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-16 4:38 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
On 2019/10/16 上午4:20, Michael S. Tsirkin wrote:
> On Mon, Oct 14, 2019 at 09:43:25AM +0800, Jason Wang wrote:
>> On 2019/10/13 上午4:27, Michael S. Tsirkin wrote:
>>> On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
>>>> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
>>>>> The idea is to support multiple ring formats by converting
>>>>> to a format-independent array of descriptors.
>>>>>
>>>>> This costs extra cycles, but we gain in ability
>>>>> to fetch a batch of descriptors in one go, which
>>>>> is good for code cache locality.
>>>>>
>>>>> To simplify benchmarking, I kept the old code
>>>>> around so one can switch back and forth by
>>>>> writing into a module parameter.
>>>>> This will go away in the final submission.
>>>>>
>>>>> This patch causes a minor performance degradation,
>>>>> it's been kept as simple as possible for ease of review.
>>>>> Next patch gets us back the performance by adding batching.
>>>>>
>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>> ---
>>>>> drivers/vhost/test.c | 17 ++-
>>>>> drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
>>>>> drivers/vhost/vhost.h | 16 +++
>>>>> 3 files changed, 327 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>>>> index 056308008288..39a018a7af2d 100644
>>>>> --- a/drivers/vhost/test.c
>>>>> +++ b/drivers/vhost/test.c
>>>>> @@ -18,6 +18,9 @@
>>>>> #include "test.h"
>>>>> #include "vhost.h"
>>>>> +static int newcode = 0;
>>>>> +module_param(newcode, int, 0644);
>>>>> +
>>>>> /* Max number of bytes transferred before requeueing the job.
>>>>> * Using this limit prevents one virtqueue from starving others. */
>>>>> #define VHOST_TEST_WEIGHT 0x80000
>>>>> @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
>>>>> vhost_disable_notify(&n->dev, vq);
>>>>> for (;;) {
>>>>> - head = vhost_get_vq_desc(vq, vq->iov,
>>>>> - ARRAY_SIZE(vq->iov),
>>>>> - &out, &in,
>>>>> - NULL, NULL);
>>>>> + if (newcode)
>>>>> + head = vhost_get_vq_desc_batch(vq, vq->iov,
>>>>> + ARRAY_SIZE(vq->iov),
>>>>> + &out, &in,
>>>>> + NULL, NULL);
>>>>> + else
>>>>> + head = vhost_get_vq_desc(vq, vq->iov,
>>>>> + ARRAY_SIZE(vq->iov),
>>>>> + &out, &in,
>>>>> + NULL, NULL);
>>>>> /* On error, stop handling until the next kick. */
>>>>> if (unlikely(head < 0))
>>>>> break;
>>>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>>>> index 36ca2cf419bf..36661d6cb51f 100644
>>>>> --- a/drivers/vhost/vhost.c
>>>>> +++ b/drivers/vhost/vhost.c
>>>>> @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>>>> struct vhost_virtqueue *vq)
>>>>> {
>>>>> vq->num = 1;
>>>>> + vq->ndescs = 0;
>>>>> vq->desc = NULL;
>>>>> vq->avail = NULL;
>>>>> vq->used = NULL;
>>>>> @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
>>>>> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
>>>>> {
>>>>> + kfree(vq->descs);
>>>>> + vq->descs = NULL;
>>>>> + vq->max_descs = 0;
>>>>> kfree(vq->indirect);
>>>>> vq->indirect = NULL;
>>>>> kfree(vq->log);
>>>>> @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>>>> for (i = 0; i < dev->nvqs; ++i) {
>>>>> vq = dev->vqs[i];
>>>>> + vq->max_descs = dev->iov_limit;
>>>>> + vq->descs = kmalloc_array(vq->max_descs,
>>>>> + sizeof(*vq->descs),
>>>>> + GFP_KERNEL);
>>>> Is iov_limit too much here? It can obviously increase the footprint. I guess
>>>> the batching can only be done for descriptor without indirect or next set.
>>>> Then we may batch 16 or 64.
>>>>
>>>> Thanks
>>> Yes, next patch only batches up to 64. But we do need iov_limit because
>>> guest can pass a long chain of scatter/gather.
>>> We already have iovecs in a huge array so this does not look like
>>> a big deal. If we ever teach the code to avoid the huge
>>> iov arrays by handling huge s/g lists piece by piece,
>>> we can make the desc array smaller at the same point.
>>>
>> Another possible issue, if we try to batch descriptor chain when we've
>> already batched some descriptors, we may reach the limit then some of the
>> descriptors might need re-read.
>>
>> Or we may need circular index (head, tail) in this case?
>>
>> Thanks
> We never supported more than IOV_MAX descriptors.
> And we don't batch more than iov_limit - IOV_MAX.
Ok, but what happens when we've already batched 63 descriptors then come
a 3 descriptor chain?
And it looks to me we need forget the cached descriptor during
set_vring_base()
Thanks
>
> so buffer never overflows.
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-15 20:20 ` Michael S. Tsirkin
2019-10-16 4:38 ` Jason Wang
@ 2019-10-16 4:38 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-16 4:38 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: linux-kernel, kvm, virtualization, netdev
On 2019/10/16 上午4:20, Michael S. Tsirkin wrote:
> On Mon, Oct 14, 2019 at 09:43:25AM +0800, Jason Wang wrote:
>> On 2019/10/13 上午4:27, Michael S. Tsirkin wrote:
>>> On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
>>>> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
>>>>> The idea is to support multiple ring formats by converting
>>>>> to a format-independent array of descriptors.
>>>>>
>>>>> This costs extra cycles, but we gain in ability
>>>>> to fetch a batch of descriptors in one go, which
>>>>> is good for code cache locality.
>>>>>
>>>>> To simplify benchmarking, I kept the old code
>>>>> around so one can switch back and forth by
>>>>> writing into a module parameter.
>>>>> This will go away in the final submission.
>>>>>
>>>>> This patch causes a minor performance degradation,
>>>>> it's been kept as simple as possible for ease of review.
>>>>> Next patch gets us back the performance by adding batching.
>>>>>
>>>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>>>> ---
>>>>> drivers/vhost/test.c | 17 ++-
>>>>> drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
>>>>> drivers/vhost/vhost.h | 16 +++
>>>>> 3 files changed, 327 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>>>> index 056308008288..39a018a7af2d 100644
>>>>> --- a/drivers/vhost/test.c
>>>>> +++ b/drivers/vhost/test.c
>>>>> @@ -18,6 +18,9 @@
>>>>> #include "test.h"
>>>>> #include "vhost.h"
>>>>> +static int newcode = 0;
>>>>> +module_param(newcode, int, 0644);
>>>>> +
>>>>> /* Max number of bytes transferred before requeueing the job.
>>>>> * Using this limit prevents one virtqueue from starving others. */
>>>>> #define VHOST_TEST_WEIGHT 0x80000
>>>>> @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
>>>>> vhost_disable_notify(&n->dev, vq);
>>>>> for (;;) {
>>>>> - head = vhost_get_vq_desc(vq, vq->iov,
>>>>> - ARRAY_SIZE(vq->iov),
>>>>> - &out, &in,
>>>>> - NULL, NULL);
>>>>> + if (newcode)
>>>>> + head = vhost_get_vq_desc_batch(vq, vq->iov,
>>>>> + ARRAY_SIZE(vq->iov),
>>>>> + &out, &in,
>>>>> + NULL, NULL);
>>>>> + else
>>>>> + head = vhost_get_vq_desc(vq, vq->iov,
>>>>> + ARRAY_SIZE(vq->iov),
>>>>> + &out, &in,
>>>>> + NULL, NULL);
>>>>> /* On error, stop handling until the next kick. */
>>>>> if (unlikely(head < 0))
>>>>> break;
>>>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>>>> index 36ca2cf419bf..36661d6cb51f 100644
>>>>> --- a/drivers/vhost/vhost.c
>>>>> +++ b/drivers/vhost/vhost.c
>>>>> @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>>>> struct vhost_virtqueue *vq)
>>>>> {
>>>>> vq->num = 1;
>>>>> + vq->ndescs = 0;
>>>>> vq->desc = NULL;
>>>>> vq->avail = NULL;
>>>>> vq->used = NULL;
>>>>> @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
>>>>> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
>>>>> {
>>>>> + kfree(vq->descs);
>>>>> + vq->descs = NULL;
>>>>> + vq->max_descs = 0;
>>>>> kfree(vq->indirect);
>>>>> vq->indirect = NULL;
>>>>> kfree(vq->log);
>>>>> @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>>>> for (i = 0; i < dev->nvqs; ++i) {
>>>>> vq = dev->vqs[i];
>>>>> + vq->max_descs = dev->iov_limit;
>>>>> + vq->descs = kmalloc_array(vq->max_descs,
>>>>> + sizeof(*vq->descs),
>>>>> + GFP_KERNEL);
>>>> Is iov_limit too much here? It can obviously increase the footprint. I guess
>>>> the batching can only be done for descriptor without indirect or next set.
>>>> Then we may batch 16 or 64.
>>>>
>>>> Thanks
>>> Yes, next patch only batches up to 64. But we do need iov_limit because
>>> guest can pass a long chain of scatter/gather.
>>> We already have iovecs in a huge array so this does not look like
>>> a big deal. If we ever teach the code to avoid the huge
>>> iov arrays by handling huge s/g lists piece by piece,
>>> we can make the desc array smaller at the same point.
>>>
>> Another possible issue, if we try to batch descriptor chain when we've
>> already batched some descriptors, we may reach the limit then some of the
>> descriptors might need re-read.
>>
>> Or we may need circular index (head, tail) in this case?
>>
>> Thanks
> We never supported more than IOV_MAX descriptors.
> And we don't batch more than iov_limit - IOV_MAX.
Ok, but what happens when we've already batched 63 descriptors then come
a 3 descriptor chain?
And it looks to me we need forget the cached descriptor during
set_vring_base()
Thanks
>
> so buffer never overflows.
>
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-14 1:43 ` Jason Wang
2019-10-15 20:20 ` Michael S. Tsirkin
@ 2019-10-15 20:20 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-15 20:20 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization
On Mon, Oct 14, 2019 at 09:43:25AM +0800, Jason Wang wrote:
>
> On 2019/10/13 上午4:27, Michael S. Tsirkin wrote:
> > On Sat, Oct 12, 2019 at 03:28:49PM +0800, Jason Wang wrote:
> > > On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > > > The idea is to support multiple ring formats by converting
> > > > to a format-independent array of descriptors.
> > > >
> > > > This costs extra cycles, but we gain in ability
> > > > to fetch a batch of descriptors in one go, which
> > > > is good for code cache locality.
> > > >
> > > > To simplify benchmarking, I kept the old code
> > > > around so one can switch back and forth by
> > > > writing into a module parameter.
> > > > This will go away in the final submission.
> > > >
> > > > This patch causes a minor performance degradation,
> > > > it's been kept as simple as possible for ease of review.
> > > > Next patch gets us back the performance by adding batching.
> > > >
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > > drivers/vhost/test.c | 17 ++-
> > > > drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
> > > > drivers/vhost/vhost.h | 16 +++
> > > > 3 files changed, 327 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > > > index 056308008288..39a018a7af2d 100644
> > > > --- a/drivers/vhost/test.c
> > > > +++ b/drivers/vhost/test.c
> > > > @@ -18,6 +18,9 @@
> > > > #include "test.h"
> > > > #include "vhost.h"
> > > > +static int newcode = 0;
> > > > +module_param(newcode, int, 0644);
> > > > +
> > > > /* Max number of bytes transferred before requeueing the job.
> > > > * Using this limit prevents one virtqueue from starving others. */
> > > > #define VHOST_TEST_WEIGHT 0x80000
> > > > @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
> > > > vhost_disable_notify(&n->dev, vq);
> > > > for (;;) {
> > > > - head = vhost_get_vq_desc(vq, vq->iov,
> > > > - ARRAY_SIZE(vq->iov),
> > > > - &out, &in,
> > > > - NULL, NULL);
> > > > + if (newcode)
> > > > + head = vhost_get_vq_desc_batch(vq, vq->iov,
> > > > + ARRAY_SIZE(vq->iov),
> > > > + &out, &in,
> > > > + NULL, NULL);
> > > > + else
> > > > + head = vhost_get_vq_desc(vq, vq->iov,
> > > > + ARRAY_SIZE(vq->iov),
> > > > + &out, &in,
> > > > + NULL, NULL);
> > > > /* On error, stop handling until the next kick. */
> > > > if (unlikely(head < 0))
> > > > break;
> > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > > index 36ca2cf419bf..36661d6cb51f 100644
> > > > --- a/drivers/vhost/vhost.c
> > > > +++ b/drivers/vhost/vhost.c
> > > > @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > > > struct vhost_virtqueue *vq)
> > > > {
> > > > vq->num = 1;
> > > > + vq->ndescs = 0;
> > > > vq->desc = NULL;
> > > > vq->avail = NULL;
> > > > vq->used = NULL;
> > > > @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
> > > > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> > > > {
> > > > + kfree(vq->descs);
> > > > + vq->descs = NULL;
> > > > + vq->max_descs = 0;
> > > > kfree(vq->indirect);
> > > > vq->indirect = NULL;
> > > > kfree(vq->log);
> > > > @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> > > > for (i = 0; i < dev->nvqs; ++i) {
> > > > vq = dev->vqs[i];
> > > > + vq->max_descs = dev->iov_limit;
> > > > + vq->descs = kmalloc_array(vq->max_descs,
> > > > + sizeof(*vq->descs),
> > > > + GFP_KERNEL);
> > >
> > > Is iov_limit too much here? It can obviously increase the footprint. I guess
> > > the batching can only be done for descriptor without indirect or next set.
> > > Then we may batch 16 or 64.
> > >
> > > Thanks
> > Yes, next patch only batches up to 64. But we do need iov_limit because
> > guest can pass a long chain of scatter/gather.
> > We already have iovecs in a huge array so this does not look like
> > a big deal. If we ever teach the code to avoid the huge
> > iov arrays by handling huge s/g lists piece by piece,
> > we can make the desc array smaller at the same point.
> >
>
> Another possible issue, if we try to batch descriptor chain when we've
> already batched some descriptors, we may reach the limit then some of the
> descriptors might need re-read.
>
> Or we may need circular index (head, tail) in this case?
>
> Thanks
We never supported more than IOV_MAX descriptors.
And we don't batch more than iov_limit - IOV_MAX.
so buffer never overflows.
--
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-11 13:45 ` [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct Michael S. Tsirkin
2019-10-12 7:28 ` Jason Wang
@ 2019-10-12 7:28 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 7:28 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel; +Cc: netdev, kvm, virtualization
On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> The idea is to support multiple ring formats by converting
> to a format-independent array of descriptors.
>
> This costs extra cycles, but we gain in ability
> to fetch a batch of descriptors in one go, which
> is good for code cache locality.
>
> To simplify benchmarking, I kept the old code
> around so one can switch back and forth by
> writing into a module parameter.
> This will go away in the final submission.
>
> This patch causes a minor performance degradation,
> it's been kept as simple as possible for ease of review.
> Next patch gets us back the performance by adding batching.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/vhost/test.c | 17 ++-
> drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 16 +++
> 3 files changed, 327 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index 056308008288..39a018a7af2d 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -18,6 +18,9 @@
> #include "test.h"
> #include "vhost.h"
>
> +static int newcode = 0;
> +module_param(newcode, int, 0644);
> +
> /* Max number of bytes transferred before requeueing the job.
> * Using this limit prevents one virtqueue from starving others. */
> #define VHOST_TEST_WEIGHT 0x80000
> @@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
> vhost_disable_notify(&n->dev, vq);
>
> for (;;) {
> - head = vhost_get_vq_desc(vq, vq->iov,
> - ARRAY_SIZE(vq->iov),
> - &out, &in,
> - NULL, NULL);
> + if (newcode)
> + head = vhost_get_vq_desc_batch(vq, vq->iov,
> + ARRAY_SIZE(vq->iov),
> + &out, &in,
> + NULL, NULL);
> + else
> + head = vhost_get_vq_desc(vq, vq->iov,
> + ARRAY_SIZE(vq->iov),
> + &out, &in,
> + NULL, NULL);
> /* On error, stop handling until the next kick. */
> if (unlikely(head < 0))
> break;
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 36ca2cf419bf..36661d6cb51f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> struct vhost_virtqueue *vq)
> {
> vq->num = 1;
> + vq->ndescs = 0;
> vq->desc = NULL;
> vq->avail = NULL;
> vq->used = NULL;
> @@ -369,6 +370,9 @@ static int vhost_worker(void *data)
>
> static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
> {
> + kfree(vq->descs);
> + vq->descs = NULL;
> + vq->max_descs = 0;
> kfree(vq->indirect);
> vq->indirect = NULL;
> kfree(vq->log);
> @@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> + vq->max_descs = dev->iov_limit;
> + vq->descs = kmalloc_array(vq->max_descs,
> + sizeof(*vq->descs),
> + GFP_KERNEL);
Is iov_limit too much here? It can obviously increase the footprint. I
guess the batching can only be done for descriptor without indirect or
next set. Then we may batch 16 or 64.
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
2019-10-11 13:45 ` [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct Michael S. Tsirkin
@ 2019-10-11 13:45 ` Michael S. Tsirkin
2019-10-11 13:46 ` [PATCH RFC v1 2/2] vhost: batching fetches Michael S. Tsirkin
` (5 subsequent siblings)
7 siblings, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-11 13:45 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, kvm, virtualization
The idea is to support multiple ring formats by converting
to a format-independent array of descriptors.
This costs extra cycles, but we gain in ability
to fetch a batch of descriptors in one go, which
is good for code cache locality.
To simplify benchmarking, I kept the old code
around so one can switch back and forth by
writing into a module parameter.
This will go away in the final submission.
This patch causes a minor performance degradation,
it's been kept as simple as possible for ease of review.
Next patch gets us back the performance by adding batching.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/test.c | 17 ++-
drivers/vhost/vhost.c | 299 +++++++++++++++++++++++++++++++++++++++++-
drivers/vhost/vhost.h | 16 +++
3 files changed, 327 insertions(+), 5 deletions(-)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 056308008288..39a018a7af2d 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -18,6 +18,9 @@
#include "test.h"
#include "vhost.h"
+static int newcode = 0;
+module_param(newcode, int, 0644);
+
/* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_TEST_WEIGHT 0x80000
@@ -58,10 +61,16 @@ static void handle_vq(struct vhost_test *n)
vhost_disable_notify(&n->dev, vq);
for (;;) {
- head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov),
- &out, &in,
- NULL, NULL);
+ if (newcode)
+ head = vhost_get_vq_desc_batch(vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
+ else
+ head = vhost_get_vq_desc(vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 36ca2cf419bf..36661d6cb51f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -301,6 +301,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
vq->num = 1;
+ vq->ndescs = 0;
vq->desc = NULL;
vq->avail = NULL;
vq->used = NULL;
@@ -369,6 +370,9 @@ static int vhost_worker(void *data)
static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
{
+ kfree(vq->descs);
+ vq->descs = NULL;
+ vq->max_descs = 0;
kfree(vq->indirect);
vq->indirect = NULL;
kfree(vq->log);
@@ -385,6 +389,10 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
+ vq->max_descs = dev->iov_limit;
+ vq->descs = kmalloc_array(vq->max_descs,
+ sizeof(*vq->descs),
+ GFP_KERNEL);
vq->indirect = kmalloc_array(UIO_MAXIOV,
sizeof(*vq->indirect),
GFP_KERNEL);
@@ -392,7 +400,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
GFP_KERNEL);
vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
GFP_KERNEL);
- if (!vq->indirect || !vq->log || !vq->heads)
+ if (!vq->indirect || !vq->log || !vq->heads || !vq->descs)
goto err_nomem;
}
return 0;
@@ -2346,6 +2354,295 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
}
EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
+static struct vhost_desc *peek_split_desc(struct vhost_virtqueue *vq)
+{
+ BUG_ON(!vq->ndescs);
+ return &vq->descs[vq->ndescs - 1];
+}
+
+static void pop_split_desc(struct vhost_virtqueue *vq)
+{
+ BUG_ON(!vq->ndescs);
+ --vq->ndescs;
+}
+
+static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
+{
+ struct vhost_desc *h;
+
+ if (unlikely(vq->ndescs >= vq->max_descs))
+ return -EINVAL;
+ h = &vq->descs[vq->ndescs++];
+ h->addr = vhost64_to_cpu(vq, desc->addr);
+ h->len = vhost32_to_cpu(vq, desc->len);
+ h->flags = vhost16_to_cpu(vq, desc->flags);
+ h->id = id;
+
+ return 0;
+}
+
+static int fetch_indirect_descs(struct vhost_virtqueue *vq,
+ struct vhost_desc *indirect,
+ u16 head)
+{
+ struct vring_desc desc;
+ unsigned int i = 0, count, found = 0;
+ u32 len = indirect->len;
+ struct iov_iter from;
+ int ret;
+
+ /* Sanity check */
+ if (unlikely(len % sizeof desc)) {
+ vq_err(vq, "Invalid length in indirect descriptor: "
+ "len 0x%llx not multiple of 0x%zx\n",
+ (unsigned long long)len,
+ sizeof desc);
+ return -EINVAL;
+ }
+
+ ret = translate_desc(vq, indirect->addr, len, vq->indirect,
+ UIO_MAXIOV, VHOST_ACCESS_RO);
+ if (unlikely(ret < 0)) {
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d in indirect.\n", ret);
+ return ret;
+ }
+ iov_iter_init(&from, READ, vq->indirect, ret, len);
+
+ /* We will use the result as an address to read from, so most
+ * architectures only need a compiler barrier here. */
+ read_barrier_depends();
+
+ count = len / sizeof desc;
+ /* Buffers are chained via a 16 bit next field, so
+ * we can have at most 2^16 of these. */
+ if (unlikely(count > USHRT_MAX + 1)) {
+ vq_err(vq, "Indirect buffer length too big: %d\n",
+ indirect->len);
+ return -E2BIG;
+ }
+ if (unlikely(vq->ndescs + count > vq->max_descs)) {
+ vq_err(vq, "Too many indirect + direct descs: %d + %d\n",
+ vq->ndescs, indirect->len);
+ return -E2BIG;
+ }
+
+ do {
+ if (unlikely(++found > count)) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "indirect size %u\n",
+ i, count);
+ return -EINVAL;
+ }
+ if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
+ vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+ if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) {
+ vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
+ i, (size_t)indirect->addr + i * sizeof desc);
+ return -EINVAL;
+ }
+
+ push_split_desc(vq, &desc, head);
+ } while ((i = next_desc(vq, &desc)) != -1);
+ return 0;
+}
+
+static int fetch_descs(struct vhost_virtqueue *vq)
+{
+ struct vring_desc desc;
+ unsigned int i, head, found = 0;
+ u16 last_avail_idx;
+ __virtio16 avail_idx;
+ __virtio16 ring_head;
+ int ret;
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ last_avail_idx = vq->last_avail_idx;
+
+ if (vq->avail_idx == vq->last_avail_idx) {
+ if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
+ vq_err(vq, "Failed to access avail idx at %p\n",
+ &vq->avail->idx);
+ return -EFAULT;
+ }
+ vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+
+ if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+ vq_err(vq, "Guest moved used index from %u to %u",
+ last_avail_idx, vq->avail_idx);
+ return -EFAULT;
+ }
+
+ /* If there's nothing new since last we looked, return
+ * invalid.
+ */
+ if (vq->avail_idx == last_avail_idx)
+ return vq->num;
+
+ /* Only get avail ring entries after they have been
+ * exposed by guest.
+ */
+ smp_rmb();
+ }
+
+ /* Grab the next descriptor number they're advertising */
+ if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
+ vq_err(vq, "Failed to read head: idx %d address %p\n",
+ last_avail_idx,
+ &vq->avail->ring[last_avail_idx % vq->num]);
+ return -EFAULT;
+ }
+
+ head = vhost16_to_cpu(vq, ring_head);
+
+ /* If their number is silly, that's an error. */
+ if (unlikely(head >= vq->num)) {
+ vq_err(vq, "Guest says index %u > %u is available",
+ head, vq->num);
+ return -EINVAL;
+ }
+
+ i = head;
+ do {
+ if (unlikely(i >= vq->num)) {
+ vq_err(vq, "Desc index is %u > %u, head = %u",
+ i, vq->num, head);
+ return -EINVAL;
+ }
+ if (unlikely(++found > vq->num)) {
+ vq_err(vq, "Loop detected: last one at %u "
+ "vq size %u head %u\n",
+ i, vq->num, head);
+ return -EINVAL;
+ }
+ ret = vhost_get_desc(vq, &desc, i);
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+ i, vq->desc + i);
+ return -EFAULT;
+ }
+ ret = push_split_desc(vq, &desc, head);
+ if (unlikely(ret)) {
+ vq_err(vq, "Failed to save descriptor: idx %d\n", i);
+ return -EINVAL;
+ }
+ } while ((i = next_desc(vq, &desc)) != -1);
+
+ /* On success, increment avail index. */
+ vq->last_avail_idx++;
+
+ /* Assume notifications from guest are disabled at this point,
+ * if they aren't we would need to update avail_event index. */
+ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
+
+ return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found. A negative code is
+ * returned on error. */
+int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
+{
+ int ret = fetch_descs(vq);
+ struct vhost_desc *last;
+ u16 id;
+ int i;
+
+ if (ret)
+ return ret;
+
+ last = peek_split_desc(vq);
+ id = last->id;
+
+ if (last->flags & VRING_DESC_F_INDIRECT) {
+ int r;
+
+ pop_split_desc(vq);
+ r = fetch_indirect_descs(vq, last, id);
+ if (unlikely(r < 0)) {
+ if (r != -EAGAIN)
+ vq_err(vq, "Failure detected "
+ "in indirect descriptor at idx %d\n", id);
+ return ret;
+ }
+ }
+
+ /* Now convert to IOV */
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+ if (unlikely(log))
+ *log_num = 0;
+
+ for (i = 0; i < vq->ndescs; ++i) {
+ unsigned iov_count = *in_num + *out_num;
+ struct vhost_desc *desc = &vq->descs[i];
+ int access;
+
+ if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
+ vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
+ desc->flags, desc->id);
+ ret = -EINVAL;
+ goto err;
+ }
+ if (desc->flags & VRING_DESC_F_WRITE)
+ access = VHOST_ACCESS_WO;
+ else
+ access = VHOST_ACCESS_RO;
+ ret = translate_desc(vq, desc->addr,
+ desc->len, iov + iov_count,
+ iov_size - iov_count, access);
+ if (unlikely(ret < 0)) {
+ if (ret != -EAGAIN)
+ vq_err(vq, "Translation failure %d descriptor idx %d\n",
+ ret, i);
+ goto err;
+ }
+ if (access == VHOST_ACCESS_WO) {
+ /* If this is an input descriptor,
+ * increment that count. */
+ *in_num += ret;
+ if (unlikely(log && ret)) {
+ log[*log_num].addr = desc->addr;
+ log[*log_num].len = desc->len;
+ ++*log_num;
+ }
+ } else {
+ /* If it's an output descriptor, they're all supposed
+ * to come before any input descriptors. */
+ if (unlikely(*in_num)) {
+ vq_err(vq, "Descriptor has out after in: "
+ "idx %d\n", i);
+ ret = -EINVAL;
+ goto err;
+ }
+ *out_num += ret;
+ }
+ }
+
+ ret = id;
+ vq->ndescs = 0;
+
+ return ret;
+
+err:
+ vhost_discard_vq_desc(vq, 1);
+ vq->ndescs = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vhost_get_vq_desc_batch);
+
/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
{
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index e9ed2722b633..1724f61b6c2d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -80,6 +80,13 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3,
};
+struct vhost_desc {
+ u64 addr;
+ u32 len;
+ u16 flags; /* VRING_DESC_F_WRITE, VRING_DESC_F_NEXT */
+ u16 id;
+};
+
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -90,6 +97,11 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
+
+ struct vhost_desc *descs;
+ int ndescs;
+ int max_descs;
+
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
@@ -190,6 +202,10 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
bool vhost_log_access_ok(struct vhost_dev *);
+int vhost_get_vq_desc_batch(struct vhost_virtqueue *,
+ struct iovec iov[], unsigned int iov_count,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num);
int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_count,
unsigned int *out_num, unsigned int *in_num,
--
MST
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
2019-10-11 13:45 ` [PATCH RFC v1 1/2] vhost: option to fetch descriptors through an independent struct Michael S. Tsirkin
2019-10-11 13:45 ` Michael S. Tsirkin
@ 2019-10-11 13:46 ` Michael S. Tsirkin
2019-10-11 13:46 ` Michael S. Tsirkin
` (4 subsequent siblings)
7 siblings, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-11 13:46 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, kvm, virtualization
With this patch applied, new and old code perform identically.
Lots of extra optimizations are now possible, e.g.
we can fetch multiple heads with copy_from/to_user now.
We can get rid of maintaining the log array. Etc etc.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/test.c | 2 +-
drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
drivers/vhost/vhost.h | 4 +++-
3 files changed, 46 insertions(+), 10 deletions(-)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 39a018a7af2d..e3a8e9db22cd 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
dev = &n->dev;
vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
- vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
+ vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
f->private_data = n;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 36661d6cb51f..aa383e847865 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
{
vq->num = 1;
vq->ndescs = 0;
+ vq->first_desc = 0;
vq->desc = NULL;
vq->avail = NULL;
vq->used = NULL;
@@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
vq->max_descs = dev->iov_limit;
+ vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
vq->descs = kmalloc_array(vq->max_descs,
sizeof(*vq->descs),
GFP_KERNEL);
@@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
--vq->ndescs;
}
+#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
+ VRING_DESC_F_NEXT)
static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
{
struct vhost_desc *h;
@@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
h = &vq->descs[vq->ndescs++];
h->addr = vhost64_to_cpu(vq, desc->addr);
h->len = vhost32_to_cpu(vq, desc->len);
- h->flags = vhost16_to_cpu(vq, desc->flags);
+ h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
h->id = id;
return 0;
@@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
return 0;
}
-static int fetch_descs(struct vhost_virtqueue *vq)
+static int fetch_buf(struct vhost_virtqueue *vq)
{
struct vring_desc desc;
unsigned int i, head, found = 0;
@@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
/* Check it isn't doing very strange things with descriptor numbers. */
last_avail_idx = vq->last_avail_idx;
- if (vq->avail_idx == vq->last_avail_idx) {
+ if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
+ /* If we already have work to do, don't bother re-checking. */
+ if (likely(vq->ndescs))
+ return vq->num;
+
if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
vq_err(vq, "Failed to access avail idx at %p\n",
&vq->avail->idx);
@@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
return 0;
}
+static int fetch_descs(struct vhost_virtqueue *vq)
+{
+ int ret = 0;
+
+ if (unlikely(vq->first_desc >= vq->ndescs)) {
+ vq->first_desc = 0;
+ vq->ndescs = 0;
+ }
+
+ if (vq->ndescs)
+ return 0;
+
+ while (!ret && vq->ndescs <= vq->batch_descs)
+ ret = fetch_buf(vq);
+
+ return vq->ndescs ? 0 : ret;
+}
+
/* This looks in the virtqueue and for the first available buffer, and converts
* it to an iovec for convenient access. Since descriptors consist of some
* number of output then some number of input descriptors, it's actually two
@@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
if (ret)
return ret;
+ /* Note: indirect descriptors are not batched */
+ /* TODO: batch up to a limit */
last = peek_split_desc(vq);
id = last->id;
@@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
if (unlikely(log))
*log_num = 0;
- for (i = 0; i < vq->ndescs; ++i) {
+ for (i = vq->first_desc; i < vq->ndescs; ++i) {
unsigned iov_count = *in_num + *out_num;
struct vhost_desc *desc = &vq->descs[i];
int access;
- if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
+ if (desc->flags & ~VHOST_DESC_FLAGS) {
vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
desc->flags, desc->id);
ret = -EINVAL;
@@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
}
*out_num += ret;
}
+
+ ret = desc->id;
+
+ if (!(desc->flags & VRING_DESC_F_NEXT))
+ break;
}
- ret = id;
- vq->ndescs = 0;
+ vq->first_desc = i + 1;
return ret;
err:
- vhost_discard_vq_desc(vq, 1);
+ for (i = vq->first_desc; i < vq->ndescs; ++i)
+ if (!(desc->flags & VRING_DESC_F_NEXT))
+ vhost_discard_vq_desc(vq, 1);
vq->ndescs = 0;
return ret;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 1724f61b6c2d..8b88e0c903da 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -100,7 +100,9 @@ struct vhost_virtqueue {
struct vhost_desc *descs;
int ndescs;
+ int first_desc;
int max_descs;
+ int batch_descs;
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
@@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
#define vq_err(vq, fmt, ...) do { \
- pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+ pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
if ((vq)->error_ctx) \
eventfd_signal((vq)->error_ctx, 1);\
} while (0)
--
MST
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
` (2 preceding siblings ...)
2019-10-11 13:46 ` [PATCH RFC v1 2/2] vhost: batching fetches Michael S. Tsirkin
@ 2019-10-11 13:46 ` Michael S. Tsirkin
2019-10-12 7:30 ` Jason Wang
2019-10-12 7:30 ` Jason Wang
2019-10-12 7:31 ` [PATCH RFC v1 0/2] vhost: ring format independence Jason Wang
` (3 subsequent siblings)
7 siblings, 2 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-11 13:46 UTC (permalink / raw)
To: linux-kernel; +Cc: Jason Wang, kvm, virtualization, netdev
With this patch applied, new and old code perform identically.
Lots of extra optimizations are now possible, e.g.
we can fetch multiple heads with copy_from/to_user now.
We can get rid of maintaining the log array. Etc etc.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/test.c | 2 +-
drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
drivers/vhost/vhost.h | 4 +++-
3 files changed, 46 insertions(+), 10 deletions(-)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 39a018a7af2d..e3a8e9db22cd 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
dev = &n->dev;
vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
- vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
+ vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
f->private_data = n;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 36661d6cb51f..aa383e847865 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
{
vq->num = 1;
vq->ndescs = 0;
+ vq->first_desc = 0;
vq->desc = NULL;
vq->avail = NULL;
vq->used = NULL;
@@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i];
vq->max_descs = dev->iov_limit;
+ vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
vq->descs = kmalloc_array(vq->max_descs,
sizeof(*vq->descs),
GFP_KERNEL);
@@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
--vq->ndescs;
}
+#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
+ VRING_DESC_F_NEXT)
static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
{
struct vhost_desc *h;
@@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
h = &vq->descs[vq->ndescs++];
h->addr = vhost64_to_cpu(vq, desc->addr);
h->len = vhost32_to_cpu(vq, desc->len);
- h->flags = vhost16_to_cpu(vq, desc->flags);
+ h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
h->id = id;
return 0;
@@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
return 0;
}
-static int fetch_descs(struct vhost_virtqueue *vq)
+static int fetch_buf(struct vhost_virtqueue *vq)
{
struct vring_desc desc;
unsigned int i, head, found = 0;
@@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
/* Check it isn't doing very strange things with descriptor numbers. */
last_avail_idx = vq->last_avail_idx;
- if (vq->avail_idx == vq->last_avail_idx) {
+ if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
+ /* If we already have work to do, don't bother re-checking. */
+ if (likely(vq->ndescs))
+ return vq->num;
+
if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
vq_err(vq, "Failed to access avail idx at %p\n",
&vq->avail->idx);
@@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
return 0;
}
+static int fetch_descs(struct vhost_virtqueue *vq)
+{
+ int ret = 0;
+
+ if (unlikely(vq->first_desc >= vq->ndescs)) {
+ vq->first_desc = 0;
+ vq->ndescs = 0;
+ }
+
+ if (vq->ndescs)
+ return 0;
+
+ while (!ret && vq->ndescs <= vq->batch_descs)
+ ret = fetch_buf(vq);
+
+ return vq->ndescs ? 0 : ret;
+}
+
/* This looks in the virtqueue and for the first available buffer, and converts
* it to an iovec for convenient access. Since descriptors consist of some
* number of output then some number of input descriptors, it's actually two
@@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
if (ret)
return ret;
+ /* Note: indirect descriptors are not batched */
+ /* TODO: batch up to a limit */
last = peek_split_desc(vq);
id = last->id;
@@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
if (unlikely(log))
*log_num = 0;
- for (i = 0; i < vq->ndescs; ++i) {
+ for (i = vq->first_desc; i < vq->ndescs; ++i) {
unsigned iov_count = *in_num + *out_num;
struct vhost_desc *desc = &vq->descs[i];
int access;
- if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
+ if (desc->flags & ~VHOST_DESC_FLAGS) {
vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
desc->flags, desc->id);
ret = -EINVAL;
@@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
}
*out_num += ret;
}
+
+ ret = desc->id;
+
+ if (!(desc->flags & VRING_DESC_F_NEXT))
+ break;
}
- ret = id;
- vq->ndescs = 0;
+ vq->first_desc = i + 1;
return ret;
err:
- vhost_discard_vq_desc(vq, 1);
+ for (i = vq->first_desc; i < vq->ndescs; ++i)
+ if (!(desc->flags & VRING_DESC_F_NEXT))
+ vhost_discard_vq_desc(vq, 1);
vq->ndescs = 0;
return ret;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 1724f61b6c2d..8b88e0c903da 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -100,7 +100,9 @@ struct vhost_virtqueue {
struct vhost_desc *descs;
int ndescs;
+ int first_desc;
int max_descs;
+ int batch_descs;
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
@@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
#define vq_err(vq, fmt, ...) do { \
- pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
+ pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
if ((vq)->error_ctx) \
eventfd_signal((vq)->error_ctx, 1);\
} while (0)
--
MST
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-11 13:46 ` Michael S. Tsirkin
@ 2019-10-12 7:30 ` Jason Wang
2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-12 7:30 ` Jason Wang
1 sibling, 2 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 7:30 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel; +Cc: kvm, virtualization, netdev
On 2019/10/11 下午9:46, Michael S. Tsirkin wrote:
> With this patch applied, new and old code perform identically.
>
> Lots of extra optimizations are now possible, e.g.
> we can fetch multiple heads with copy_from/to_user now.
> We can get rid of maintaining the log array. Etc etc.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/vhost/test.c | 2 +-
> drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
> drivers/vhost/vhost.h | 4 +++-
> 3 files changed, 46 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index 39a018a7af2d..e3a8e9db22cd 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
> dev = &n->dev;
> vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
> n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
>
> f->private_data = n;
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 36661d6cb51f..aa383e847865 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> {
> vq->num = 1;
> vq->ndescs = 0;
> + vq->first_desc = 0;
> vq->desc = NULL;
> vq->avail = NULL;
> vq->used = NULL;
> @@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> vq->max_descs = dev->iov_limit;
> + vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
> vq->descs = kmalloc_array(vq->max_descs,
> sizeof(*vq->descs),
> GFP_KERNEL);
> @@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
> --vq->ndescs;
> }
>
> +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
> + VRING_DESC_F_NEXT)
> static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
> {
> struct vhost_desc *h;
> @@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
> h = &vq->descs[vq->ndescs++];
> h->addr = vhost64_to_cpu(vq, desc->addr);
> h->len = vhost32_to_cpu(vq, desc->len);
> - h->flags = vhost16_to_cpu(vq, desc->flags);
> + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
> h->id = id;
>
> return 0;
> @@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
> return 0;
> }
>
> -static int fetch_descs(struct vhost_virtqueue *vq)
> +static int fetch_buf(struct vhost_virtqueue *vq)
> {
> struct vring_desc desc;
> unsigned int i, head, found = 0;
> @@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> /* Check it isn't doing very strange things with descriptor numbers. */
> last_avail_idx = vq->last_avail_idx;
>
> - if (vq->avail_idx == vq->last_avail_idx) {
> + if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
> + /* If we already have work to do, don't bother re-checking. */
> + if (likely(vq->ndescs))
> + return vq->num;
> +
> if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
> vq_err(vq, "Failed to access avail idx at %p\n",
> &vq->avail->idx);
> @@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> return 0;
> }
>
> +static int fetch_descs(struct vhost_virtqueue *vq)
> +{
> + int ret = 0;
> +
> + if (unlikely(vq->first_desc >= vq->ndescs)) {
> + vq->first_desc = 0;
> + vq->ndescs = 0;
> + }
> +
> + if (vq->ndescs)
> + return 0;
> +
> + while (!ret && vq->ndescs <= vq->batch_descs)
> + ret = fetch_buf(vq);
It looks to me descriptor chaining might be broken here.
> +
> + return vq->ndescs ? 0 : ret;
> +}
> +
> /* This looks in the virtqueue and for the first available buffer, and converts
> * it to an iovec for convenient access. Since descriptors consist of some
> * number of output then some number of input descriptors, it's actually two
> @@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> if (ret)
> return ret;
>
> + /* Note: indirect descriptors are not batched */
> + /* TODO: batch up to a limit */
> last = peek_split_desc(vq);
> id = last->id;
>
> @@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> if (unlikely(log))
> *log_num = 0;
>
> - for (i = 0; i < vq->ndescs; ++i) {
> + for (i = vq->first_desc; i < vq->ndescs; ++i) {
> unsigned iov_count = *in_num + *out_num;
> struct vhost_desc *desc = &vq->descs[i];
> int access;
>
> - if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
> + if (desc->flags & ~VHOST_DESC_FLAGS) {
> vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
> desc->flags, desc->id);
> ret = -EINVAL;
> @@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> }
> *out_num += ret;
> }
> +
> + ret = desc->id;
> +
> + if (!(desc->flags & VRING_DESC_F_NEXT))
> + break;
> }
What happens if we reach vq->ndescs but VRING_DESC_F_NEXT is still set?
Thanks
>
> - ret = id;
> - vq->ndescs = 0;
> + vq->first_desc = i + 1;
>
> return ret;
>
> err:
> - vhost_discard_vq_desc(vq, 1);
> + for (i = vq->first_desc; i < vq->ndescs; ++i)
> + if (!(desc->flags & VRING_DESC_F_NEXT))
> + vhost_discard_vq_desc(vq, 1);
> vq->ndescs = 0;
>
> return ret;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 1724f61b6c2d..8b88e0c903da 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -100,7 +100,9 @@ struct vhost_virtqueue {
>
> struct vhost_desc *descs;
> int ndescs;
> + int first_desc;
> int max_descs;
> + int batch_descs;
>
> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> struct file *kick;
> @@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
> int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
>
> #define vq_err(vq, fmt, ...) do { \
> - pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
> + pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
> if ((vq)->error_ctx) \
> eventfd_signal((vq)->error_ctx, 1);\
> } while (0)
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-12 7:30 ` Jason Wang
@ 2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-14 1:47 ` Jason Wang
2019-10-14 1:47 ` Jason Wang
2019-10-12 20:36 ` Michael S. Tsirkin
1 sibling, 2 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 20:36 UTC (permalink / raw)
To: Jason Wang; +Cc: linux-kernel, kvm, virtualization, netdev
On Sat, Oct 12, 2019 at 03:30:52PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:46, Michael S. Tsirkin wrote:
> > With this patch applied, new and old code perform identically.
> >
> > Lots of extra optimizations are now possible, e.g.
> > we can fetch multiple heads with copy_from/to_user now.
> > We can get rid of maintaining the log array. Etc etc.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > drivers/vhost/test.c | 2 +-
> > drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
> > drivers/vhost/vhost.h | 4 +++-
> > 3 files changed, 46 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > index 39a018a7af2d..e3a8e9db22cd 100644
> > --- a/drivers/vhost/test.c
> > +++ b/drivers/vhost/test.c
> > @@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
> > dev = &n->dev;
> > vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
> > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> > - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> > + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> > VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
> > f->private_data = n;
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 36661d6cb51f..aa383e847865 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > {
> > vq->num = 1;
> > vq->ndescs = 0;
> > + vq->first_desc = 0;
> > vq->desc = NULL;
> > vq->avail = NULL;
> > vq->used = NULL;
> > @@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> > for (i = 0; i < dev->nvqs; ++i) {
> > vq = dev->vqs[i];
> > vq->max_descs = dev->iov_limit;
> > + vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
> > vq->descs = kmalloc_array(vq->max_descs,
> > sizeof(*vq->descs),
> > GFP_KERNEL);
> > @@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
> > --vq->ndescs;
> > }
> > +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
> > + VRING_DESC_F_NEXT)
> > static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
> > {
> > struct vhost_desc *h;
> > @@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
> > h = &vq->descs[vq->ndescs++];
> > h->addr = vhost64_to_cpu(vq, desc->addr);
> > h->len = vhost32_to_cpu(vq, desc->len);
> > - h->flags = vhost16_to_cpu(vq, desc->flags);
> > + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
> > h->id = id;
> > return 0;
> > @@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
> > return 0;
> > }
> > -static int fetch_descs(struct vhost_virtqueue *vq)
> > +static int fetch_buf(struct vhost_virtqueue *vq)
> > {
> > struct vring_desc desc;
> > unsigned int i, head, found = 0;
> > @@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> > /* Check it isn't doing very strange things with descriptor numbers. */
> > last_avail_idx = vq->last_avail_idx;
> > - if (vq->avail_idx == vq->last_avail_idx) {
> > + if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
> > + /* If we already have work to do, don't bother re-checking. */
> > + if (likely(vq->ndescs))
> > + return vq->num;
> > +
> > if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
> > vq_err(vq, "Failed to access avail idx at %p\n",
> > &vq->avail->idx);
> > @@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> > return 0;
> > }
> > +static int fetch_descs(struct vhost_virtqueue *vq)
> > +{
> > + int ret = 0;
> > +
> > + if (unlikely(vq->first_desc >= vq->ndescs)) {
> > + vq->first_desc = 0;
> > + vq->ndescs = 0;
> > + }
> > +
> > + if (vq->ndescs)
> > + return 0;
> > +
> > + while (!ret && vq->ndescs <= vq->batch_descs)
> > + ret = fetch_buf(vq);
>
>
> It looks to me descriptor chaining might be broken here.
It should work because fetch_buf fetches a whole buf, following
the chain. Seems to work in a small test ... what issues do you see?
>
> > +
> > + return vq->ndescs ? 0 : ret;
> > +}
> > +
> > /* This looks in the virtqueue and for the first available buffer, and converts
> > * it to an iovec for convenient access. Since descriptors consist of some
> > * number of output then some number of input descriptors, it's actually two
> > @@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> > if (ret)
> > return ret;
> > + /* Note: indirect descriptors are not batched */
> > + /* TODO: batch up to a limit */
> > last = peek_split_desc(vq);
> > id = last->id;
> > @@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> > if (unlikely(log))
> > *log_num = 0;
> > - for (i = 0; i < vq->ndescs; ++i) {
> > + for (i = vq->first_desc; i < vq->ndescs; ++i) {
> > unsigned iov_count = *in_num + *out_num;
> > struct vhost_desc *desc = &vq->descs[i];
> > int access;
> > - if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
> > + if (desc->flags & ~VHOST_DESC_FLAGS) {
> > vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
> > desc->flags, desc->id);
> > ret = -EINVAL;
> > @@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> > }
> > *out_num += ret;
> > }
> > +
> > + ret = desc->id;
> > +
> > + if (!(desc->flags & VRING_DESC_F_NEXT))
> > + break;
> > }
>
>
> What happens if we reach vq->ndescs but VRING_DESC_F_NEXT is still set?
>
> Thanks
This can't happen: descriptors are pushed by push_split_desc each time
we go through a loop in fetch_buf. The only way to exit the loop
with return code 0 is if next_desc return -1 that is when VRING_DESC_F_NEXT
is clear.
But it's a good idea to add a BUG_ON here, I'll do it in the next version.
>
> > - ret = id;
> > - vq->ndescs = 0;
> > + vq->first_desc = i + 1;
> > return ret;
> > err:
> > - vhost_discard_vq_desc(vq, 1);
> > + for (i = vq->first_desc; i < vq->ndescs; ++i)
> > + if (!(desc->flags & VRING_DESC_F_NEXT))
> > + vhost_discard_vq_desc(vq, 1);
> > vq->ndescs = 0;
> > return ret;
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index 1724f61b6c2d..8b88e0c903da 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -100,7 +100,9 @@ struct vhost_virtqueue {
> > struct vhost_desc *descs;
> > int ndescs;
> > + int first_desc;
> > int max_descs;
> > + int batch_descs;
> > const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> > struct file *kick;
> > @@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
> > int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
> > #define vq_err(vq, fmt, ...) do { \
> > - pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
> > + pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
> > if ((vq)->error_ctx) \
> > eventfd_signal((vq)->error_ctx, 1);\
> > } while (0)
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-12 20:36 ` Michael S. Tsirkin
@ 2019-10-14 1:47 ` Jason Wang
2019-10-14 1:47 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-14 1:47 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
On 2019/10/13 上午4:36, Michael S. Tsirkin wrote:
> On Sat, Oct 12, 2019 at 03:30:52PM +0800, Jason Wang wrote:
>> On 2019/10/11 下午9:46, Michael S. Tsirkin wrote:
>>> With this patch applied, new and old code perform identically.
>>>
>>> Lots of extra optimizations are now possible, e.g.
>>> we can fetch multiple heads with copy_from/to_user now.
>>> We can get rid of maintaining the log array. Etc etc.
>>>
>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> ---
>>> drivers/vhost/test.c | 2 +-
>>> drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
>>> drivers/vhost/vhost.h | 4 +++-
>>> 3 files changed, 46 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>> index 39a018a7af2d..e3a8e9db22cd 100644
>>> --- a/drivers/vhost/test.c
>>> +++ b/drivers/vhost/test.c
>>> @@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
>>> dev = &n->dev;
>>> vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
>>> n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
>>> - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
>>> + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
>>> VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
>>> f->private_data = n;
>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>> index 36661d6cb51f..aa383e847865 100644
>>> --- a/drivers/vhost/vhost.c
>>> +++ b/drivers/vhost/vhost.c
>>> @@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>> {
>>> vq->num = 1;
>>> vq->ndescs = 0;
>>> + vq->first_desc = 0;
>>> vq->desc = NULL;
>>> vq->avail = NULL;
>>> vq->used = NULL;
>>> @@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>> for (i = 0; i < dev->nvqs; ++i) {
>>> vq = dev->vqs[i];
>>> vq->max_descs = dev->iov_limit;
>>> + vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
>>> vq->descs = kmalloc_array(vq->max_descs,
>>> sizeof(*vq->descs),
>>> GFP_KERNEL);
>>> @@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
>>> --vq->ndescs;
>>> }
>>> +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
>>> + VRING_DESC_F_NEXT)
>>> static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
>>> {
>>> struct vhost_desc *h;
>>> @@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
>>> h = &vq->descs[vq->ndescs++];
>>> h->addr = vhost64_to_cpu(vq, desc->addr);
>>> h->len = vhost32_to_cpu(vq, desc->len);
>>> - h->flags = vhost16_to_cpu(vq, desc->flags);
>>> + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
>>> h->id = id;
>>> return 0;
>>> @@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
>>> return 0;
>>> }
>>> -static int fetch_descs(struct vhost_virtqueue *vq)
>>> +static int fetch_buf(struct vhost_virtqueue *vq)
>>> {
>>> struct vring_desc desc;
>>> unsigned int i, head, found = 0;
>>> @@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
>>> /* Check it isn't doing very strange things with descriptor numbers. */
>>> last_avail_idx = vq->last_avail_idx;
>>> - if (vq->avail_idx == vq->last_avail_idx) {
>>> + if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
>>> + /* If we already have work to do, don't bother re-checking. */
>>> + if (likely(vq->ndescs))
>>> + return vq->num;
>>> +
>>> if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
>>> vq_err(vq, "Failed to access avail idx at %p\n",
>>> &vq->avail->idx);
>>> @@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
>>> return 0;
>>> }
>>> +static int fetch_descs(struct vhost_virtqueue *vq)
>>> +{
>>> + int ret = 0;
>>> +
>>> + if (unlikely(vq->first_desc >= vq->ndescs)) {
>>> + vq->first_desc = 0;
>>> + vq->ndescs = 0;
>>> + }
>>> +
>>> + if (vq->ndescs)
>>> + return 0;
>>> +
>>> + while (!ret && vq->ndescs <= vq->batch_descs)
>>> + ret = fetch_buf(vq);
>>
>> It looks to me descriptor chaining might be broken here.
> It should work because fetch_buf fetches a whole buf, following
> the chain. Seems to work in a small test ... what issues do you see?
Consider the case when fetch_buf return -EINVAL when push_split_desc()
fail. This means we only have partial descriptors.
Do we need decreasing last_avail_idx and vq->ndescs here? Or having a
head,tail instead of first_desc?
Thanks
>
>>> +
>>> + return vq->ndescs ? 0 : ret;
>>> +}
>>> +
>>> /* This looks in the virtqueue and for the first available buffer, and converts
>>> * it to an iovec for convenient access. Since descriptors consist of some
>>> * number of output then some number of input descriptors, it's actually two
>>> @@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
>>> if (ret)
>>> return ret;
>>> + /* Note: indirect descriptors are not batched */
>>> + /* TODO: batch up to a limit */
>>> last = peek_split_desc(vq);
>>> id = last->id;
>>> @@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
>>> if (unlikely(log))
>>> *log_num = 0;
>>> - for (i = 0; i < vq->ndescs; ++i) {
>>> + for (i = vq->first_desc; i < vq->ndescs; ++i) {
>>> unsigned iov_count = *in_num + *out_num;
>>> struct vhost_desc *desc = &vq->descs[i];
>>> int access;
>>> - if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
>>> + if (desc->flags & ~VHOST_DESC_FLAGS) {
>>> vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
>>> desc->flags, desc->id);
>>> ret = -EINVAL;
>>> @@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
>>> }
>>> *out_num += ret;
>>> }
>>> +
>>> + ret = desc->id;
>>> +
>>> + if (!(desc->flags & VRING_DESC_F_NEXT))
>>> + break;
>>> }
>>
>> What happens if we reach vq->ndescs but VRING_DESC_F_NEXT is still set?
>>
>> Thanks
> This can't happen: descriptors are pushed by push_split_desc each time
> we go through a loop in fetch_buf. The only way to exit the loop
> with return code 0 is if next_desc return -1 that is when VRING_DESC_F_NEXT
> is clear.
>
> But it's a good idea to add a BUG_ON here, I'll do it in the next version.
>
>
>>> - ret = id;
>>> - vq->ndescs = 0;
>>> + vq->first_desc = i + 1;
>>> return ret;
>>> err:
>>> - vhost_discard_vq_desc(vq, 1);
>>> + for (i = vq->first_desc; i < vq->ndescs; ++i)
>>> + if (!(desc->flags & VRING_DESC_F_NEXT))
>>> + vhost_discard_vq_desc(vq, 1);
>>> vq->ndescs = 0;
>>> return ret;
>>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>>> index 1724f61b6c2d..8b88e0c903da 100644
>>> --- a/drivers/vhost/vhost.h
>>> +++ b/drivers/vhost/vhost.h
>>> @@ -100,7 +100,9 @@ struct vhost_virtqueue {
>>> struct vhost_desc *descs;
>>> int ndescs;
>>> + int first_desc;
>>> int max_descs;
>>> + int batch_descs;
>>> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
>>> struct file *kick;
>>> @@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
>>> int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
>>> #define vq_err(vq, fmt, ...) do { \
>>> - pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
>>> + pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
>>> if ((vq)->error_ctx) \
>>> eventfd_signal((vq)->error_ctx, 1);\
>>> } while (0)
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-14 1:47 ` Jason Wang
@ 2019-10-14 1:47 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-14 1:47 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: linux-kernel, kvm, virtualization, netdev
On 2019/10/13 上午4:36, Michael S. Tsirkin wrote:
> On Sat, Oct 12, 2019 at 03:30:52PM +0800, Jason Wang wrote:
>> On 2019/10/11 下午9:46, Michael S. Tsirkin wrote:
>>> With this patch applied, new and old code perform identically.
>>>
>>> Lots of extra optimizations are now possible, e.g.
>>> we can fetch multiple heads with copy_from/to_user now.
>>> We can get rid of maintaining the log array. Etc etc.
>>>
>>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> ---
>>> drivers/vhost/test.c | 2 +-
>>> drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
>>> drivers/vhost/vhost.h | 4 +++-
>>> 3 files changed, 46 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>> index 39a018a7af2d..e3a8e9db22cd 100644
>>> --- a/drivers/vhost/test.c
>>> +++ b/drivers/vhost/test.c
>>> @@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
>>> dev = &n->dev;
>>> vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
>>> n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
>>> - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
>>> + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
>>> VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
>>> f->private_data = n;
>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>> index 36661d6cb51f..aa383e847865 100644
>>> --- a/drivers/vhost/vhost.c
>>> +++ b/drivers/vhost/vhost.c
>>> @@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>> {
>>> vq->num = 1;
>>> vq->ndescs = 0;
>>> + vq->first_desc = 0;
>>> vq->desc = NULL;
>>> vq->avail = NULL;
>>> vq->used = NULL;
>>> @@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
>>> for (i = 0; i < dev->nvqs; ++i) {
>>> vq = dev->vqs[i];
>>> vq->max_descs = dev->iov_limit;
>>> + vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
>>> vq->descs = kmalloc_array(vq->max_descs,
>>> sizeof(*vq->descs),
>>> GFP_KERNEL);
>>> @@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
>>> --vq->ndescs;
>>> }
>>> +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
>>> + VRING_DESC_F_NEXT)
>>> static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
>>> {
>>> struct vhost_desc *h;
>>> @@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
>>> h = &vq->descs[vq->ndescs++];
>>> h->addr = vhost64_to_cpu(vq, desc->addr);
>>> h->len = vhost32_to_cpu(vq, desc->len);
>>> - h->flags = vhost16_to_cpu(vq, desc->flags);
>>> + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
>>> h->id = id;
>>> return 0;
>>> @@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
>>> return 0;
>>> }
>>> -static int fetch_descs(struct vhost_virtqueue *vq)
>>> +static int fetch_buf(struct vhost_virtqueue *vq)
>>> {
>>> struct vring_desc desc;
>>> unsigned int i, head, found = 0;
>>> @@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
>>> /* Check it isn't doing very strange things with descriptor numbers. */
>>> last_avail_idx = vq->last_avail_idx;
>>> - if (vq->avail_idx == vq->last_avail_idx) {
>>> + if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
>>> + /* If we already have work to do, don't bother re-checking. */
>>> + if (likely(vq->ndescs))
>>> + return vq->num;
>>> +
>>> if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
>>> vq_err(vq, "Failed to access avail idx at %p\n",
>>> &vq->avail->idx);
>>> @@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
>>> return 0;
>>> }
>>> +static int fetch_descs(struct vhost_virtqueue *vq)
>>> +{
>>> + int ret = 0;
>>> +
>>> + if (unlikely(vq->first_desc >= vq->ndescs)) {
>>> + vq->first_desc = 0;
>>> + vq->ndescs = 0;
>>> + }
>>> +
>>> + if (vq->ndescs)
>>> + return 0;
>>> +
>>> + while (!ret && vq->ndescs <= vq->batch_descs)
>>> + ret = fetch_buf(vq);
>>
>> It looks to me descriptor chaining might be broken here.
> It should work because fetch_buf fetches a whole buf, following
> the chain. Seems to work in a small test ... what issues do you see?
Consider the case when fetch_buf return -EINVAL when push_split_desc()
fail. This means we only have partial descriptors.
Do we need decreasing last_avail_idx and vq->ndescs here? Or having a
head,tail instead of first_desc?
Thanks
>
>>> +
>>> + return vq->ndescs ? 0 : ret;
>>> +}
>>> +
>>> /* This looks in the virtqueue and for the first available buffer, and converts
>>> * it to an iovec for convenient access. Since descriptors consist of some
>>> * number of output then some number of input descriptors, it's actually two
>>> @@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
>>> if (ret)
>>> return ret;
>>> + /* Note: indirect descriptors are not batched */
>>> + /* TODO: batch up to a limit */
>>> last = peek_split_desc(vq);
>>> id = last->id;
>>> @@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
>>> if (unlikely(log))
>>> *log_num = 0;
>>> - for (i = 0; i < vq->ndescs; ++i) {
>>> + for (i = vq->first_desc; i < vq->ndescs; ++i) {
>>> unsigned iov_count = *in_num + *out_num;
>>> struct vhost_desc *desc = &vq->descs[i];
>>> int access;
>>> - if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
>>> + if (desc->flags & ~VHOST_DESC_FLAGS) {
>>> vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
>>> desc->flags, desc->id);
>>> ret = -EINVAL;
>>> @@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
>>> }
>>> *out_num += ret;
>>> }
>>> +
>>> + ret = desc->id;
>>> +
>>> + if (!(desc->flags & VRING_DESC_F_NEXT))
>>> + break;
>>> }
>>
>> What happens if we reach vq->ndescs but VRING_DESC_F_NEXT is still set?
>>
>> Thanks
> This can't happen: descriptors are pushed by push_split_desc each time
> we go through a loop in fetch_buf. The only way to exit the loop
> with return code 0 is if next_desc return -1 that is when VRING_DESC_F_NEXT
> is clear.
>
> But it's a good idea to add a BUG_ON here, I'll do it in the next version.
>
>
>>> - ret = id;
>>> - vq->ndescs = 0;
>>> + vq->first_desc = i + 1;
>>> return ret;
>>> err:
>>> - vhost_discard_vq_desc(vq, 1);
>>> + for (i = vq->first_desc; i < vq->ndescs; ++i)
>>> + if (!(desc->flags & VRING_DESC_F_NEXT))
>>> + vhost_discard_vq_desc(vq, 1);
>>> vq->ndescs = 0;
>>> return ret;
>>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>>> index 1724f61b6c2d..8b88e0c903da 100644
>>> --- a/drivers/vhost/vhost.h
>>> +++ b/drivers/vhost/vhost.h
>>> @@ -100,7 +100,9 @@ struct vhost_virtqueue {
>>> struct vhost_desc *descs;
>>> int ndescs;
>>> + int first_desc;
>>> int max_descs;
>>> + int batch_descs;
>>> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
>>> struct file *kick;
>>> @@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
>>> int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
>>> #define vq_err(vq, fmt, ...) do { \
>>> - pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
>>> + pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
>>> if ((vq)->error_ctx) \
>>> eventfd_signal((vq)->error_ctx, 1);\
>>> } while (0)
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-12 7:30 ` Jason Wang
2019-10-12 20:36 ` Michael S. Tsirkin
@ 2019-10-12 20:36 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 20:36 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization
On Sat, Oct 12, 2019 at 03:30:52PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:46, Michael S. Tsirkin wrote:
> > With this patch applied, new and old code perform identically.
> >
> > Lots of extra optimizations are now possible, e.g.
> > we can fetch multiple heads with copy_from/to_user now.
> > We can get rid of maintaining the log array. Etc etc.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > drivers/vhost/test.c | 2 +-
> > drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
> > drivers/vhost/vhost.h | 4 +++-
> > 3 files changed, 46 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > index 39a018a7af2d..e3a8e9db22cd 100644
> > --- a/drivers/vhost/test.c
> > +++ b/drivers/vhost/test.c
> > @@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
> > dev = &n->dev;
> > vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
> > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> > - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> > + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> > VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
> > f->private_data = n;
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 36661d6cb51f..aa383e847865 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > {
> > vq->num = 1;
> > vq->ndescs = 0;
> > + vq->first_desc = 0;
> > vq->desc = NULL;
> > vq->avail = NULL;
> > vq->used = NULL;
> > @@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> > for (i = 0; i < dev->nvqs; ++i) {
> > vq = dev->vqs[i];
> > vq->max_descs = dev->iov_limit;
> > + vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
> > vq->descs = kmalloc_array(vq->max_descs,
> > sizeof(*vq->descs),
> > GFP_KERNEL);
> > @@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
> > --vq->ndescs;
> > }
> > +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
> > + VRING_DESC_F_NEXT)
> > static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
> > {
> > struct vhost_desc *h;
> > @@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
> > h = &vq->descs[vq->ndescs++];
> > h->addr = vhost64_to_cpu(vq, desc->addr);
> > h->len = vhost32_to_cpu(vq, desc->len);
> > - h->flags = vhost16_to_cpu(vq, desc->flags);
> > + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
> > h->id = id;
> > return 0;
> > @@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
> > return 0;
> > }
> > -static int fetch_descs(struct vhost_virtqueue *vq)
> > +static int fetch_buf(struct vhost_virtqueue *vq)
> > {
> > struct vring_desc desc;
> > unsigned int i, head, found = 0;
> > @@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> > /* Check it isn't doing very strange things with descriptor numbers. */
> > last_avail_idx = vq->last_avail_idx;
> > - if (vq->avail_idx == vq->last_avail_idx) {
> > + if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
> > + /* If we already have work to do, don't bother re-checking. */
> > + if (likely(vq->ndescs))
> > + return vq->num;
> > +
> > if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
> > vq_err(vq, "Failed to access avail idx at %p\n",
> > &vq->avail->idx);
> > @@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> > return 0;
> > }
> > +static int fetch_descs(struct vhost_virtqueue *vq)
> > +{
> > + int ret = 0;
> > +
> > + if (unlikely(vq->first_desc >= vq->ndescs)) {
> > + vq->first_desc = 0;
> > + vq->ndescs = 0;
> > + }
> > +
> > + if (vq->ndescs)
> > + return 0;
> > +
> > + while (!ret && vq->ndescs <= vq->batch_descs)
> > + ret = fetch_buf(vq);
>
>
> It looks to me descriptor chaining might be broken here.
It should work because fetch_buf fetches a whole buf, following
the chain. Seems to work in a small test ... what issues do you see?
>
> > +
> > + return vq->ndescs ? 0 : ret;
> > +}
> > +
> > /* This looks in the virtqueue and for the first available buffer, and converts
> > * it to an iovec for convenient access. Since descriptors consist of some
> > * number of output then some number of input descriptors, it's actually two
> > @@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> > if (ret)
> > return ret;
> > + /* Note: indirect descriptors are not batched */
> > + /* TODO: batch up to a limit */
> > last = peek_split_desc(vq);
> > id = last->id;
> > @@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> > if (unlikely(log))
> > *log_num = 0;
> > - for (i = 0; i < vq->ndescs; ++i) {
> > + for (i = vq->first_desc; i < vq->ndescs; ++i) {
> > unsigned iov_count = *in_num + *out_num;
> > struct vhost_desc *desc = &vq->descs[i];
> > int access;
> > - if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
> > + if (desc->flags & ~VHOST_DESC_FLAGS) {
> > vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
> > desc->flags, desc->id);
> > ret = -EINVAL;
> > @@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> > }
> > *out_num += ret;
> > }
> > +
> > + ret = desc->id;
> > +
> > + if (!(desc->flags & VRING_DESC_F_NEXT))
> > + break;
> > }
>
>
> What happens if we reach vq->ndescs but VRING_DESC_F_NEXT is still set?
>
> Thanks
This can't happen: descriptors are pushed by push_split_desc each time
we go through a loop in fetch_buf. The only way to exit the loop
with return code 0 is if next_desc return -1 that is when VRING_DESC_F_NEXT
is clear.
But it's a good idea to add a BUG_ON here, I'll do it in the next version.
>
> > - ret = id;
> > - vq->ndescs = 0;
> > + vq->first_desc = i + 1;
> > return ret;
> > err:
> > - vhost_discard_vq_desc(vq, 1);
> > + for (i = vq->first_desc; i < vq->ndescs; ++i)
> > + if (!(desc->flags & VRING_DESC_F_NEXT))
> > + vhost_discard_vq_desc(vq, 1);
> > vq->ndescs = 0;
> > return ret;
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index 1724f61b6c2d..8b88e0c903da 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -100,7 +100,9 @@ struct vhost_virtqueue {
> > struct vhost_desc *descs;
> > int ndescs;
> > + int first_desc;
> > int max_descs;
> > + int batch_descs;
> > const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> > struct file *kick;
> > @@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
> > int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
> > #define vq_err(vq, fmt, ...) do { \
> > - pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
> > + pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
> > if ((vq)->error_ctx) \
> > eventfd_signal((vq)->error_ctx, 1);\
> > } while (0)
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 2/2] vhost: batching fetches
2019-10-11 13:46 ` Michael S. Tsirkin
2019-10-12 7:30 ` Jason Wang
@ 2019-10-12 7:30 ` Jason Wang
1 sibling, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 7:30 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel; +Cc: netdev, kvm, virtualization
On 2019/10/11 下午9:46, Michael S. Tsirkin wrote:
> With this patch applied, new and old code perform identically.
>
> Lots of extra optimizations are now possible, e.g.
> we can fetch multiple heads with copy_from/to_user now.
> We can get rid of maintaining the log array. Etc etc.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/vhost/test.c | 2 +-
> drivers/vhost/vhost.c | 50 ++++++++++++++++++++++++++++++++++++-------
> drivers/vhost/vhost.h | 4 +++-
> 3 files changed, 46 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> index 39a018a7af2d..e3a8e9db22cd 100644
> --- a/drivers/vhost/test.c
> +++ b/drivers/vhost/test.c
> @@ -128,7 +128,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
> dev = &n->dev;
> vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
> n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
>
> f->private_data = n;
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 36661d6cb51f..aa383e847865 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -302,6 +302,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> {
> vq->num = 1;
> vq->ndescs = 0;
> + vq->first_desc = 0;
> vq->desc = NULL;
> vq->avail = NULL;
> vq->used = NULL;
> @@ -390,6 +391,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> for (i = 0; i < dev->nvqs; ++i) {
> vq = dev->vqs[i];
> vq->max_descs = dev->iov_limit;
> + vq->batch_descs = dev->iov_limit - UIO_MAXIOV;
> vq->descs = kmalloc_array(vq->max_descs,
> sizeof(*vq->descs),
> GFP_KERNEL);
> @@ -2366,6 +2368,8 @@ static void pop_split_desc(struct vhost_virtqueue *vq)
> --vq->ndescs;
> }
>
> +#define VHOST_DESC_FLAGS (VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE | \
> + VRING_DESC_F_NEXT)
> static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, u16 id)
> {
> struct vhost_desc *h;
> @@ -2375,7 +2379,7 @@ static int push_split_desc(struct vhost_virtqueue *vq, struct vring_desc *desc,
> h = &vq->descs[vq->ndescs++];
> h->addr = vhost64_to_cpu(vq, desc->addr);
> h->len = vhost32_to_cpu(vq, desc->len);
> - h->flags = vhost16_to_cpu(vq, desc->flags);
> + h->flags = vhost16_to_cpu(vq, desc->flags) & VHOST_DESC_FLAGS;
> h->id = id;
>
> return 0;
> @@ -2450,7 +2454,7 @@ static int fetch_indirect_descs(struct vhost_virtqueue *vq,
> return 0;
> }
>
> -static int fetch_descs(struct vhost_virtqueue *vq)
> +static int fetch_buf(struct vhost_virtqueue *vq)
> {
> struct vring_desc desc;
> unsigned int i, head, found = 0;
> @@ -2462,7 +2466,11 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> /* Check it isn't doing very strange things with descriptor numbers. */
> last_avail_idx = vq->last_avail_idx;
>
> - if (vq->avail_idx == vq->last_avail_idx) {
> + if (unlikely(vq->avail_idx == vq->last_avail_idx)) {
> + /* If we already have work to do, don't bother re-checking. */
> + if (likely(vq->ndescs))
> + return vq->num;
> +
> if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
> vq_err(vq, "Failed to access avail idx at %p\n",
> &vq->avail->idx);
> @@ -2541,6 +2549,24 @@ static int fetch_descs(struct vhost_virtqueue *vq)
> return 0;
> }
>
> +static int fetch_descs(struct vhost_virtqueue *vq)
> +{
> + int ret = 0;
> +
> + if (unlikely(vq->first_desc >= vq->ndescs)) {
> + vq->first_desc = 0;
> + vq->ndescs = 0;
> + }
> +
> + if (vq->ndescs)
> + return 0;
> +
> + while (!ret && vq->ndescs <= vq->batch_descs)
> + ret = fetch_buf(vq);
It looks to me descriptor chaining might be broken here.
> +
> + return vq->ndescs ? 0 : ret;
> +}
> +
> /* This looks in the virtqueue and for the first available buffer, and converts
> * it to an iovec for convenient access. Since descriptors consist of some
> * number of output then some number of input descriptors, it's actually two
> @@ -2562,6 +2588,8 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> if (ret)
> return ret;
>
> + /* Note: indirect descriptors are not batched */
> + /* TODO: batch up to a limit */
> last = peek_split_desc(vq);
> id = last->id;
>
> @@ -2584,12 +2612,12 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> if (unlikely(log))
> *log_num = 0;
>
> - for (i = 0; i < vq->ndescs; ++i) {
> + for (i = vq->first_desc; i < vq->ndescs; ++i) {
> unsigned iov_count = *in_num + *out_num;
> struct vhost_desc *desc = &vq->descs[i];
> int access;
>
> - if (desc->flags & ~(VRING_DESC_F_INDIRECT | VRING_DESC_F_WRITE)) {
> + if (desc->flags & ~VHOST_DESC_FLAGS) {
> vq_err(vq, "Unexpected flags: 0x%x at descriptor id 0x%x\n",
> desc->flags, desc->id);
> ret = -EINVAL;
> @@ -2628,15 +2656,21 @@ int vhost_get_vq_desc_batch(struct vhost_virtqueue *vq,
> }
> *out_num += ret;
> }
> +
> + ret = desc->id;
> +
> + if (!(desc->flags & VRING_DESC_F_NEXT))
> + break;
> }
What happens if we reach vq->ndescs but VRING_DESC_F_NEXT is still set?
Thanks
>
> - ret = id;
> - vq->ndescs = 0;
> + vq->first_desc = i + 1;
>
> return ret;
>
> err:
> - vhost_discard_vq_desc(vq, 1);
> + for (i = vq->first_desc; i < vq->ndescs; ++i)
> + if (!(desc->flags & VRING_DESC_F_NEXT))
> + vhost_discard_vq_desc(vq, 1);
> vq->ndescs = 0;
>
> return ret;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 1724f61b6c2d..8b88e0c903da 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -100,7 +100,9 @@ struct vhost_virtqueue {
>
> struct vhost_desc *descs;
> int ndescs;
> + int first_desc;
> int max_descs;
> + int batch_descs;
>
> const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
> struct file *kick;
> @@ -245,7 +247,7 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
> int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
>
> #define vq_err(vq, fmt, ...) do { \
> - pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
> + pr_err(pr_fmt(fmt), ##__VA_ARGS__); \
> if ((vq)->error_ctx) \
> eventfd_signal((vq)->error_ctx, 1);\
> } while (0)
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
` (3 preceding siblings ...)
2019-10-11 13:46 ` Michael S. Tsirkin
@ 2019-10-12 7:31 ` Jason Wang
2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-12 7:31 ` Jason Wang
` (2 subsequent siblings)
7 siblings, 2 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 7:31 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel, kvm, virtualization, netdev
On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> So the idea is as follows: we convert descriptors to an
> independent format first, and process that converting to
> iov later.
>
> The point is that we have a tight loop that fetches
> descriptors, which is good for cache utilization.
> This will also allow all kind of batching tricks -
> e.g. it seems possible to keep SMAP disabled while
> we are fetching multiple descriptors.
>
> And perhaps more importantly, this is a very good fit for the packed
> ring layout, where we get and put descriptors in order.
>
> This patchset seems to already perform exactly the same as the original
> code already based on a microbenchmark. More testing would be very much
> appreciated.
>
> Biggest TODO before this first step is ready to go in is to
> batch indirect descriptors as well.
>
> Integrating into vhost-net is basically
> s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> or add a module parameter like I did in the test module.
It would be better to convert vhost_net then I can do some benchmark on
that.
Thanks
>
>
>
> Michael S. Tsirkin (2):
> vhost: option to fetch descriptors through an independent struct
> vhost: batching fetches
>
> drivers/vhost/test.c | 19 ++-
> drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 20 ++-
> 3 files changed, 365 insertions(+), 7 deletions(-)
>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-12 7:31 ` [PATCH RFC v1 0/2] vhost: ring format independence Jason Wang
@ 2019-10-12 20:36 ` Michael S. Tsirkin
2019-10-12 20:36 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 20:36 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization
On Sat, Oct 12, 2019 at 03:31:50PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > So the idea is as follows: we convert descriptors to an
> > independent format first, and process that converting to
> > iov later.
> >
> > The point is that we have a tight loop that fetches
> > descriptors, which is good for cache utilization.
> > This will also allow all kind of batching tricks -
> > e.g. it seems possible to keep SMAP disabled while
> > we are fetching multiple descriptors.
> >
> > And perhaps more importantly, this is a very good fit for the packed
> > ring layout, where we get and put descriptors in order.
> >
> > This patchset seems to already perform exactly the same as the original
> > code already based on a microbenchmark. More testing would be very much
> > appreciated.
> >
> > Biggest TODO before this first step is ready to go in is to
> > batch indirect descriptors as well.
> >
> > Integrating into vhost-net is basically
> > s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> > or add a module parameter like I did in the test module.
>
>
> It would be better to convert vhost_net then I can do some benchmark on
> that.
>
> Thanks
Sure, I post a small patch that does this.
>
> >
> >
> >
> > Michael S. Tsirkin (2):
> > vhost: option to fetch descriptors through an independent struct
> > vhost: batching fetches
> >
> > drivers/vhost/test.c | 19 ++-
> > drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 20 ++-
> > 3 files changed, 365 insertions(+), 7 deletions(-)
> >
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-12 7:31 ` [PATCH RFC v1 0/2] vhost: ring format independence Jason Wang
2019-10-12 20:36 ` Michael S. Tsirkin
@ 2019-10-12 20:36 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 20:36 UTC (permalink / raw)
To: Jason Wang; +Cc: linux-kernel, kvm, virtualization, netdev
On Sat, Oct 12, 2019 at 03:31:50PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > So the idea is as follows: we convert descriptors to an
> > independent format first, and process that converting to
> > iov later.
> >
> > The point is that we have a tight loop that fetches
> > descriptors, which is good for cache utilization.
> > This will also allow all kind of batching tricks -
> > e.g. it seems possible to keep SMAP disabled while
> > we are fetching multiple descriptors.
> >
> > And perhaps more importantly, this is a very good fit for the packed
> > ring layout, where we get and put descriptors in order.
> >
> > This patchset seems to already perform exactly the same as the original
> > code already based on a microbenchmark. More testing would be very much
> > appreciated.
> >
> > Biggest TODO before this first step is ready to go in is to
> > batch indirect descriptors as well.
> >
> > Integrating into vhost-net is basically
> > s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> > or add a module parameter like I did in the test module.
>
>
> It would be better to convert vhost_net then I can do some benchmark on
> that.
>
> Thanks
Sure, I post a small patch that does this.
>
> >
> >
> >
> > Michael S. Tsirkin (2):
> > vhost: option to fetch descriptors through an independent struct
> > vhost: batching fetches
> >
> > drivers/vhost/test.c | 19 ++-
> > drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 20 ++-
> > 3 files changed, 365 insertions(+), 7 deletions(-)
> >
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
` (4 preceding siblings ...)
2019-10-12 7:31 ` [PATCH RFC v1 0/2] vhost: ring format independence Jason Wang
@ 2019-10-12 7:31 ` Jason Wang
2019-10-12 8:15 ` Jason Wang
2019-10-12 8:15 ` Jason Wang
7 siblings, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 7:31 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel, kvm, virtualization, netdev
On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> So the idea is as follows: we convert descriptors to an
> independent format first, and process that converting to
> iov later.
>
> The point is that we have a tight loop that fetches
> descriptors, which is good for cache utilization.
> This will also allow all kind of batching tricks -
> e.g. it seems possible to keep SMAP disabled while
> we are fetching multiple descriptors.
>
> And perhaps more importantly, this is a very good fit for the packed
> ring layout, where we get and put descriptors in order.
>
> This patchset seems to already perform exactly the same as the original
> code already based on a microbenchmark. More testing would be very much
> appreciated.
>
> Biggest TODO before this first step is ready to go in is to
> batch indirect descriptors as well.
>
> Integrating into vhost-net is basically
> s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> or add a module parameter like I did in the test module.
It would be better to convert vhost_net then I can do some benchmark on
that.
Thanks
>
>
>
> Michael S. Tsirkin (2):
> vhost: option to fetch descriptors through an independent struct
> vhost: batching fetches
>
> drivers/vhost/test.c | 19 ++-
> drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 20 ++-
> 3 files changed, 365 insertions(+), 7 deletions(-)
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
` (5 preceding siblings ...)
2019-10-12 7:31 ` Jason Wang
@ 2019-10-12 8:15 ` Jason Wang
2019-10-12 19:26 ` Michael S. Tsirkin
2019-10-12 19:26 ` Michael S. Tsirkin
2019-10-12 8:15 ` Jason Wang
7 siblings, 2 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 8:15 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel, kvm, virtualization, netdev
On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> So the idea is as follows: we convert descriptors to an
> independent format first, and process that converting to
> iov later.
>
> The point is that we have a tight loop that fetches
> descriptors, which is good for cache utilization.
> This will also allow all kind of batching tricks -
> e.g. it seems possible to keep SMAP disabled while
> we are fetching multiple descriptors.
I wonder this may help for performance:
- another indirection layer, increased footprint
- won't help or even degrade when there's no batch
- an extra overhead in the case of in order where we should already had
tight loop
- need carefully deal with indirect and chain or make it only work for
packet sit just in a single descriptor
Thanks
>
> And perhaps more importantly, this is a very good fit for the packed
> ring layout, where we get and put descriptors in order.
>
> This patchset seems to already perform exactly the same as the original
> code already based on a microbenchmark. More testing would be very much
> appreciated.
>
> Biggest TODO before this first step is ready to go in is to
> batch indirect descriptors as well.
>
> Integrating into vhost-net is basically
> s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> or add a module parameter like I did in the test module.
>
>
>
> Michael S. Tsirkin (2):
> vhost: option to fetch descriptors through an independent struct
> vhost: batching fetches
>
> drivers/vhost/test.c | 19 ++-
> drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 20 ++-
> 3 files changed, 365 insertions(+), 7 deletions(-)
>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-12 8:15 ` Jason Wang
@ 2019-10-12 19:26 ` Michael S. Tsirkin
2019-10-12 19:26 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 19:26 UTC (permalink / raw)
To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization
On Sat, Oct 12, 2019 at 04:15:42PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > So the idea is as follows: we convert descriptors to an
> > independent format first, and process that converting to
> > iov later.
> >
> > The point is that we have a tight loop that fetches
> > descriptors, which is good for cache utilization.
> > This will also allow all kind of batching tricks -
> > e.g. it seems possible to keep SMAP disabled while
> > we are fetching multiple descriptors.
>
>
> I wonder this may help for performance:
Could you try it out and report please?
Would be very much appreciated.
> - another indirection layer, increased footprint
Seems to be offset off by improved batching.
For sure will be even better if we can move stac/clac out,
or replace some get/put user with bigger copy to/from.
> - won't help or even degrade when there's no batch
I couldn't measure a difference. I'm guessing
> - an extra overhead in the case of in order where we should already had
> tight loop
it's not so tight with translation in there.
this exactly makes the loop tight.
> - need carefully deal with indirect and chain or make it only work for
> packet sit just in a single descriptor
>
> Thanks
I don't understand this last comment.
>
> >
> > And perhaps more importantly, this is a very good fit for the packed
> > ring layout, where we get and put descriptors in order.
> >
> > This patchset seems to already perform exactly the same as the original
> > code already based on a microbenchmark. More testing would be very much
> > appreciated.
> >
> > Biggest TODO before this first step is ready to go in is to
> > batch indirect descriptors as well.
> >
> > Integrating into vhost-net is basically
> > s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> > or add a module parameter like I did in the test module.
> >
> >
> >
> > Michael S. Tsirkin (2):
> > vhost: option to fetch descriptors through an independent struct
> > vhost: batching fetches
> >
> > drivers/vhost/test.c | 19 ++-
> > drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 20 ++-
> > 3 files changed, 365 insertions(+), 7 deletions(-)
> >
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-12 8:15 ` Jason Wang
2019-10-12 19:26 ` Michael S. Tsirkin
@ 2019-10-12 19:26 ` Michael S. Tsirkin
1 sibling, 0 replies; 29+ messages in thread
From: Michael S. Tsirkin @ 2019-10-12 19:26 UTC (permalink / raw)
To: Jason Wang; +Cc: linux-kernel, kvm, virtualization, netdev
On Sat, Oct 12, 2019 at 04:15:42PM +0800, Jason Wang wrote:
>
> On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> > So the idea is as follows: we convert descriptors to an
> > independent format first, and process that converting to
> > iov later.
> >
> > The point is that we have a tight loop that fetches
> > descriptors, which is good for cache utilization.
> > This will also allow all kind of batching tricks -
> > e.g. it seems possible to keep SMAP disabled while
> > we are fetching multiple descriptors.
>
>
> I wonder this may help for performance:
Could you try it out and report please?
Would be very much appreciated.
> - another indirection layer, increased footprint
Seems to be offset off by improved batching.
For sure will be even better if we can move stac/clac out,
or replace some get/put user with bigger copy to/from.
> - won't help or even degrade when there's no batch
I couldn't measure a difference. I'm guessing
> - an extra overhead in the case of in order where we should already had
> tight loop
it's not so tight with translation in there.
this exactly makes the loop tight.
> - need carefully deal with indirect and chain or make it only work for
> packet sit just in a single descriptor
>
> Thanks
I don't understand this last comment.
>
> >
> > And perhaps more importantly, this is a very good fit for the packed
> > ring layout, where we get and put descriptors in order.
> >
> > This patchset seems to already perform exactly the same as the original
> > code already based on a microbenchmark. More testing would be very much
> > appreciated.
> >
> > Biggest TODO before this first step is ready to go in is to
> > batch indirect descriptors as well.
> >
> > Integrating into vhost-net is basically
> > s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> > or add a module parameter like I did in the test module.
> >
> >
> >
> > Michael S. Tsirkin (2):
> > vhost: option to fetch descriptors through an independent struct
> > vhost: batching fetches
> >
> > drivers/vhost/test.c | 19 ++-
> > drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> > drivers/vhost/vhost.h | 20 ++-
> > 3 files changed, 365 insertions(+), 7 deletions(-)
> >
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH RFC v1 0/2] vhost: ring format independence
2019-10-11 13:45 [PATCH RFC v1 0/2] vhost: ring format independence Michael S. Tsirkin
` (6 preceding siblings ...)
2019-10-12 8:15 ` Jason Wang
@ 2019-10-12 8:15 ` Jason Wang
7 siblings, 0 replies; 29+ messages in thread
From: Jason Wang @ 2019-10-12 8:15 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel, kvm, virtualization, netdev
On 2019/10/11 下午9:45, Michael S. Tsirkin wrote:
> So the idea is as follows: we convert descriptors to an
> independent format first, and process that converting to
> iov later.
>
> The point is that we have a tight loop that fetches
> descriptors, which is good for cache utilization.
> This will also allow all kind of batching tricks -
> e.g. it seems possible to keep SMAP disabled while
> we are fetching multiple descriptors.
I wonder this may help for performance:
- another indirection layer, increased footprint
- won't help or even degrade when there's no batch
- an extra overhead in the case of in order where we should already had
tight loop
- need carefully deal with indirect and chain or make it only work for
packet sit just in a single descriptor
Thanks
>
> And perhaps more importantly, this is a very good fit for the packed
> ring layout, where we get and put descriptors in order.
>
> This patchset seems to already perform exactly the same as the original
> code already based on a microbenchmark. More testing would be very much
> appreciated.
>
> Biggest TODO before this first step is ready to go in is to
> batch indirect descriptors as well.
>
> Integrating into vhost-net is basically
> s/vhost_get_vq_desc/vhost_get_vq_desc_batch/ -
> or add a module parameter like I did in the test module.
>
>
>
> Michael S. Tsirkin (2):
> vhost: option to fetch descriptors through an independent struct
> vhost: batching fetches
>
> drivers/vhost/test.c | 19 ++-
> drivers/vhost/vhost.c | 333 +++++++++++++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 20 ++-
> 3 files changed, 365 insertions(+), 7 deletions(-)
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 29+ messages in thread