* Re: [PATCH] virtio_mem: prevent overflow with subblock size
From: Michael S. Tsirkin @ 2020-06-08 7:08 UTC (permalink / raw)
To: David Hildenbrand
Cc: linux-kernel, Jason Wang, Pankaj Gupta, virtualization, teawater
In-Reply-To: <0930c9d0-0708-c079-29bd-b80d4e3ce446@redhat.com>
On Mon, Jun 08, 2020 at 08:58:31AM +0200, David Hildenbrand wrote:
> On 08.06.20 08:14, Michael S. Tsirkin wrote:
> > If subblock size is large (e.g. 1G) 32 bit math involving it
> > can overflow. Rather than try to catch all instances of that,
> > let's tweak block size to 64 bit.
>
> I fail to see where we could actually trigger an overflow. The reported
> warning looked like a false positive to me.
So
const uint64_t size = count * vm->subblock_size;
is it unreasonable for count to be 4K with subblock_size being 1M?
> >
> > It ripples through UAPI which is an ABI change, but it's not too late to
> > make it, and it will allow supporting >4Gbyte blocks while might
> > become necessary down the road.
> >
>
> This might break cloud-hypervisor, who's already implementing this
> protocol upstream (ccing Hui).
> https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/vm-virtio/src/mem.rs
>
> (blocks in the gigabyte range were never the original intention of
> virtio-mem, but I am not completely opposed to that)
So in that case, can you code up validation in the probe function?
> > Fixes: 5f1f79bbc9e26 ("virtio-mem: Paravirtualized memory hotplug")
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > drivers/virtio/virtio_mem.c | 14 +++++++-------
> > include/uapi/linux/virtio_mem.h | 4 ++--
> > 2 files changed, 9 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> > index 2f357142ea5e..7b1bece8a331 100644
> > --- a/drivers/virtio/virtio_mem.c
> > +++ b/drivers/virtio/virtio_mem.c
> > @@ -77,7 +77,7 @@ struct virtio_mem {
> > uint64_t requested_size;
> >
> > /* The device block size (for communicating with the device). */
> > - uint32_t device_block_size;
> > + uint64_t device_block_size;
> > /* The translated node id. NUMA_NO_NODE in case not specified. */
> > int nid;
> > /* Physical start address of the memory region. */
> > @@ -86,7 +86,7 @@ struct virtio_mem {
> > uint64_t region_size;
> >
> > /* The subblock size. */
> > - uint32_t subblock_size;
> > + uint64_t subblock_size;
> > /* The number of subblocks per memory block. */
> > uint32_t nb_sb_per_mb;
> >
> > @@ -1698,9 +1698,9 @@ static int virtio_mem_init(struct virtio_mem *vm)
> > * - At least the device block size.
> > * In the worst case, a single subblock per memory block.
> > */
> > - vm->subblock_size = PAGE_SIZE * 1u << max_t(uint32_t, MAX_ORDER - 1,
> > - pageblock_order);
> > - vm->subblock_size = max_t(uint32_t, vm->device_block_size,
> > + vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1,
> > + pageblock_order);
> > + vm->subblock_size = max_t(uint64_t, vm->device_block_size,
> > vm->subblock_size);
> > vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size;
> >
> > @@ -1713,8 +1713,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
> >
> > dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
> > dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
> > - dev_info(&vm->vdev->dev, "device block size: 0x%x",
> > - vm->device_block_size);
> > + dev_info(&vm->vdev->dev, "device block size: 0x%llx",
> > + (unsigned long long)vm->device_block_size);
> > dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
> > memory_block_size_bytes());
> > dev_info(&vm->vdev->dev, "subblock size: 0x%x",
> > diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h
> > index a455c488a995..a9ffe041843c 100644
> > --- a/include/uapi/linux/virtio_mem.h
> > +++ b/include/uapi/linux/virtio_mem.h
> > @@ -185,10 +185,10 @@ struct virtio_mem_resp {
> >
> > struct virtio_mem_config {
> > /* Block size and alignment. Cannot change. */
> > - __u32 block_size;
> > + __u64 block_size;
> > /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
> > __u16 node_id;
> > - __u16 padding;
> > + __u8 padding[6];
> > /* Start address of the memory region. Cannot change. */
> > __u64 addr;
> > /* Region size (maximum). Cannot change. */
> >
>
>
> --
> Thanks,
>
> David / dhildenb
^ permalink raw reply
* Re: [PATCH] virtio_mem: prevent overflow with subblock size
From: David Hildenbrand @ 2020-06-08 6:58 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel; +Cc: Pankaj Gupta, teawater, virtualization
In-Reply-To: <20200608061406.709211-1-mst@redhat.com>
On 08.06.20 08:14, Michael S. Tsirkin wrote:
> If subblock size is large (e.g. 1G) 32 bit math involving it
> can overflow. Rather than try to catch all instances of that,
> let's tweak block size to 64 bit.
I fail to see where we could actually trigger an overflow. The reported
warning looked like a false positive to me.
>
> It ripples through UAPI which is an ABI change, but it's not too late to
> make it, and it will allow supporting >4Gbyte blocks while might
> become necessary down the road.
>
This might break cloud-hypervisor, who's already implementing this
protocol upstream (ccing Hui).
https://github.com/cloud-hypervisor/cloud-hypervisor/blob/master/vm-virtio/src/mem.rs
(blocks in the gigabyte range were never the original intention of
virtio-mem, but I am not completely opposed to that)
> Fixes: 5f1f79bbc9e26 ("virtio-mem: Paravirtualized memory hotplug")
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/virtio/virtio_mem.c | 14 +++++++-------
> include/uapi/linux/virtio_mem.h | 4 ++--
> 2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> index 2f357142ea5e..7b1bece8a331 100644
> --- a/drivers/virtio/virtio_mem.c
> +++ b/drivers/virtio/virtio_mem.c
> @@ -77,7 +77,7 @@ struct virtio_mem {
> uint64_t requested_size;
>
> /* The device block size (for communicating with the device). */
> - uint32_t device_block_size;
> + uint64_t device_block_size;
> /* The translated node id. NUMA_NO_NODE in case not specified. */
> int nid;
> /* Physical start address of the memory region. */
> @@ -86,7 +86,7 @@ struct virtio_mem {
> uint64_t region_size;
>
> /* The subblock size. */
> - uint32_t subblock_size;
> + uint64_t subblock_size;
> /* The number of subblocks per memory block. */
> uint32_t nb_sb_per_mb;
>
> @@ -1698,9 +1698,9 @@ static int virtio_mem_init(struct virtio_mem *vm)
> * - At least the device block size.
> * In the worst case, a single subblock per memory block.
> */
> - vm->subblock_size = PAGE_SIZE * 1u << max_t(uint32_t, MAX_ORDER - 1,
> - pageblock_order);
> - vm->subblock_size = max_t(uint32_t, vm->device_block_size,
> + vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1,
> + pageblock_order);
> + vm->subblock_size = max_t(uint64_t, vm->device_block_size,
> vm->subblock_size);
> vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size;
>
> @@ -1713,8 +1713,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
>
> dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
> dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
> - dev_info(&vm->vdev->dev, "device block size: 0x%x",
> - vm->device_block_size);
> + dev_info(&vm->vdev->dev, "device block size: 0x%llx",
> + (unsigned long long)vm->device_block_size);
> dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
> memory_block_size_bytes());
> dev_info(&vm->vdev->dev, "subblock size: 0x%x",
> diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h
> index a455c488a995..a9ffe041843c 100644
> --- a/include/uapi/linux/virtio_mem.h
> +++ b/include/uapi/linux/virtio_mem.h
> @@ -185,10 +185,10 @@ struct virtio_mem_resp {
>
> struct virtio_mem_config {
> /* Block size and alignment. Cannot change. */
> - __u32 block_size;
> + __u64 block_size;
> /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
> __u16 node_id;
> - __u16 padding;
> + __u8 padding[6];
> /* Start address of the memory region. Cannot change. */
> __u64 addr;
> /* Region size (maximum). Cannot change. */
>
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH RFC v5 13/13] vhost: drop head based APIs
From: Michael S. Tsirkin @ 2020-06-08 6:45 UTC (permalink / raw)
To: Jason Wang; +Cc: linux-kernel, kvm, virtualization, netdev, eperezma
In-Reply-To: <8e3f5b6f-a47b-73cd-e8e3-959d40f6c91c@redhat.com>
On Mon, Jun 08, 2020 at 11:57:48AM +0800, Jason Wang wrote:
>
> On 2020/6/7 下午10:11, Michael S. Tsirkin wrote:
> > Everyone's using buf APIs, no need for head based ones anymore.
> >
> > Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
> > ---
> > drivers/vhost/vhost.c | 36 ++++++++----------------------------
> > drivers/vhost/vhost.h | 12 ------------
> > 2 files changed, 8 insertions(+), 40 deletions(-)
> >
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index 72ee55c810c4..e6931b760b61 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -2299,12 +2299,12 @@ static int fetch_buf(struct vhost_virtqueue *vq)
> > return 1;
> > }
> > -/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
> > +/* Revert the effect of fetch_buf. Useful for error handling. */
> > +static
> > void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
> > {
> > vq->last_avail_idx -= n;
> > }
> > -EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
>
>
> The same question as previous version.
>
> Do we need to rewind cached descriptor here?
>
> Thanks
Good point. This needs more thought, we need to also
rewind the avail idx each time we flush the descriptor cache.
--
MST
^ permalink raw reply
* Re: [PATCH] virtio-mem: drop unnecessary initialization
From: David Hildenbrand @ 2020-06-08 6:44 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel
Cc: Pankaj Gupta, kernel test robot, virtualization
In-Reply-To: <20200608054517.708167-1-mst@redhat.com>
On 08.06.20 07:45, Michael S. Tsirkin wrote:
> rc is initialized to -ENIVAL but that's never used. Drop it.
>
> Fixes: 5f1f79bbc9e2 ("virtio-mem: Paravirtualized memory hotplug")
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/virtio/virtio_mem.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> index f658fe9149be..2f357142ea5e 100644
> --- a/drivers/virtio/virtio_mem.c
> +++ b/drivers/virtio/virtio_mem.c
> @@ -1768,7 +1768,7 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm)
> static int virtio_mem_probe(struct virtio_device *vdev)
> {
> struct virtio_mem *vm;
> - int rc = -EINVAL;
> + int rc;
>
> BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
> BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10);
>
Acked-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH 5/6] vdpa: introduce virtio pci driver
From: Michael S. Tsirkin @ 2020-06-08 6:32 UTC (permalink / raw)
To: Jason Wang
Cc: kvm, virtualization, netdev, linux-kernel, rob.miller,
lingshan.zhu, eperezma, lulu, shahafs, hanand, mhabets, gdawar,
saugatm, vmireyno, zhangweining, eli
In-Reply-To: <9b1abd2b-232c-aa0f-d8bb-03e65fd47de2@redhat.com>
On Mon, Jun 08, 2020 at 11:32:31AM +0800, Jason Wang wrote:
>
> On 2020/6/7 下午9:51, Michael S. Tsirkin wrote:
> > On Fri, Jun 05, 2020 at 04:54:17PM +0800, Jason Wang wrote:
> > > On 2020/6/2 下午3:08, Jason Wang wrote:
> > > > > > +static const struct pci_device_id vp_vdpa_id_table[] = {
> > > > > > + { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
> > > > > > + { 0 }
> > > > > > +};
> > > > > This looks like it'll create a mess with either virtio pci
> > > > > or vdpa being loaded at random. Maybe just don't specify
> > > > > any IDs for now. Down the road we could get a
> > > > > distinct vendor ID or a range of device IDs for this.
> > > >
> > > > Right, will do.
> > > >
> > > > Thanks
> > >
> > > Rethink about this. If we don't specify any ID, the binding won't work.
> > We can bind manually. It's not really for production anyway, so
> > not a big deal imho.
>
>
> I think you mean doing it via "new_id", right.
I really meant driver_override. This is what people have been using
with pci-stub for years now.
>
> >
> > > How about using a dedicated subsystem vendor id for this?
> > >
> > > Thanks
> > If virtio vendor id is used then standard driver is expected
> > to bind, right? Maybe use a dedicated vendor id?
>
>
> I meant something like:
>
> static const struct pci_device_id vp_vdpa_id_table[] = {
> { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID,
> VP_TEST_VENDOR_ID, VP_TEST_DEVICE_ID) },
> { 0 }
> };
>
> Thanks
>
Then regular virtio will still bind to it. It has
drivers/virtio/virtio_pci_common.c: { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
--
MST
^ permalink raw reply
* [PATCH] virtio_mem: prevent overflow with subblock size
From: Michael S. Tsirkin @ 2020-06-08 6:14 UTC (permalink / raw)
To: linux-kernel; +Cc: Pankaj Gupta, virtualization
If subblock size is large (e.g. 1G) 32 bit math involving it
can overflow. Rather than try to catch all instances of that,
let's tweak block size to 64 bit.
It ripples through UAPI which is an ABI change, but it's not too late to
make it, and it will allow supporting >4Gbyte blocks while might
become necessary down the road.
Fixes: 5f1f79bbc9e26 ("virtio-mem: Paravirtualized memory hotplug")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/virtio/virtio_mem.c | 14 +++++++-------
include/uapi/linux/virtio_mem.h | 4 ++--
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 2f357142ea5e..7b1bece8a331 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -77,7 +77,7 @@ struct virtio_mem {
uint64_t requested_size;
/* The device block size (for communicating with the device). */
- uint32_t device_block_size;
+ uint64_t device_block_size;
/* The translated node id. NUMA_NO_NODE in case not specified. */
int nid;
/* Physical start address of the memory region. */
@@ -86,7 +86,7 @@ struct virtio_mem {
uint64_t region_size;
/* The subblock size. */
- uint32_t subblock_size;
+ uint64_t subblock_size;
/* The number of subblocks per memory block. */
uint32_t nb_sb_per_mb;
@@ -1698,9 +1698,9 @@ static int virtio_mem_init(struct virtio_mem *vm)
* - At least the device block size.
* In the worst case, a single subblock per memory block.
*/
- vm->subblock_size = PAGE_SIZE * 1u << max_t(uint32_t, MAX_ORDER - 1,
- pageblock_order);
- vm->subblock_size = max_t(uint32_t, vm->device_block_size,
+ vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1,
+ pageblock_order);
+ vm->subblock_size = max_t(uint64_t, vm->device_block_size,
vm->subblock_size);
vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size;
@@ -1713,8 +1713,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
- dev_info(&vm->vdev->dev, "device block size: 0x%x",
- vm->device_block_size);
+ dev_info(&vm->vdev->dev, "device block size: 0x%llx",
+ (unsigned long long)vm->device_block_size);
dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
memory_block_size_bytes());
dev_info(&vm->vdev->dev, "subblock size: 0x%x",
diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h
index a455c488a995..a9ffe041843c 100644
--- a/include/uapi/linux/virtio_mem.h
+++ b/include/uapi/linux/virtio_mem.h
@@ -185,10 +185,10 @@ struct virtio_mem_resp {
struct virtio_mem_config {
/* Block size and alignment. Cannot change. */
- __u32 block_size;
+ __u64 block_size;
/* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */
__u16 node_id;
- __u16 padding;
+ __u8 padding[6];
/* Start address of the memory region. Cannot change. */
__u64 addr;
/* Region size (maximum). Cannot change. */
--
MST
^ permalink raw reply related
* Re: [PATCH RFC 03/13] vhost: batching fetches
From: Michael S. Tsirkin @ 2020-06-08 6:01 UTC (permalink / raw)
To: Jason Wang; +Cc: Eugenio Pérez, netdev, linux-kernel, kvm, virtualization
In-Reply-To: <0d791fe6-8fbe-ddcc-07fa-efbd4fac5ea4@redhat.com>
On Mon, Jun 08, 2020 at 11:35:40AM +0800, Jason Wang wrote:
>
> On 2020/6/7 下午9:57, Michael S. Tsirkin wrote:
> > On Fri, Jun 05, 2020 at 11:40:17AM +0800, Jason Wang wrote:
> > > On 2020/6/4 下午4:59, Michael S. Tsirkin wrote:
> > > > On Wed, Jun 03, 2020 at 03:27:39PM +0800, Jason Wang wrote:
> > > > > On 2020/6/2 下午9:06, Michael S. Tsirkin wrote:
> > > > > > With this patch applied, new and old code perform identically.
> > > > > >
> > > > > > Lots of extra optimizations are now possible, e.g.
> > > > > > we can fetch multiple heads with copy_from/to_user now.
> > > > > > We can get rid of maintaining the log array. Etc etc.
> > > > > >
> > > > > > Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
> > > > > > Signed-off-by: Eugenio Pérez<eperezma@redhat.com>
> > > > > > Link:https://lore.kernel.org/r/20200401183118.8334-4-eperezma@redhat.com
> > > > > > Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
> > > > > > ---
> > > > > > drivers/vhost/test.c | 2 +-
> > > > > > drivers/vhost/vhost.c | 47 ++++++++++++++++++++++++++++++++++++++-----
> > > > > > drivers/vhost/vhost.h | 5 ++++-
> > > > > > 3 files changed, 47 insertions(+), 7 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
> > > > > > index 9a3a09005e03..02806d6f84ef 100644
> > > > > > --- a/drivers/vhost/test.c
> > > > > > +++ b/drivers/vhost/test.c
> > > > > > @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
> > > > > > dev = &n->dev;
> > > > > > vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
> > > > > > n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
> > > > > > - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
> > > > > > + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
> > > > > > VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, NULL);
> > > > > > f->private_data = n;
> > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > > > > index 8f9a07282625..aca2a5b0d078 100644
> > > > > > --- a/drivers/vhost/vhost.c
> > > > > > +++ b/drivers/vhost/vhost.c
> > > > > > @@ -299,6 +299,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
> > > > > > {
> > > > > > vq->num = 1;
> > > > > > vq->ndescs = 0;
> > > > > > + vq->first_desc = 0;
> > > > > > vq->desc = NULL;
> > > > > > vq->avail = NULL;
> > > > > > vq->used = NULL;
> > > > > > @@ -367,6 +368,11 @@ static int vhost_worker(void *data)
> > > > > > return 0;
> > > > > > }
> > > > > > +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
> > > > > > +{
> > > > > > + return vq->max_descs - UIO_MAXIOV;
> > > > > > +}
> > > > > 1 descriptor does not mean 1 iov, e.g userspace may pass several 1 byte
> > > > > length memory regions for us to translate.
> > > > >
> > > > Yes but I don't see the relevance. This tells us how many descriptors to
> > > > batch, not how many IOVs.
> > > Yes, but questions are:
> > >
> > > - this introduce another obstacle to support more than 1K queue size
> > > - if we support 1K queue size, does it mean we need to cache 1K descriptors,
> > > which seems a large stress on the cache
> > >
> > > Thanks
> > >
> > >
> > Still don't understand the relevance. We support up to 1K descriptors
> > per buffer just for IOV since we always did. This adds 64 more
> > descriptors - is that a big deal?
>
>
> If I understanding correctly, for net, the code tries to batch descriptors
> for at last one packet.
>
> If we allow 1K queue size then we allow a packet that consists of 1K
> descriptors. Then we need to cache 1K descriptors.
>
> Thanks
That case is already so pathological, I am not at all worried about
it performing well.
--
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
From: Michael S. Tsirkin @ 2020-06-08 6:00 UTC (permalink / raw)
To: David Stevens
Cc: Gerd Hoffmann, David Airlie, Daniel Vetter, Sumit Semwal,
Jason Wang, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
open list, ML dri-devel, open list:VIRTIO GPU DRIVER,
Linux Media Mailing List,
moderated list:DMA BUFFER SHARING FRAMEWORK, virtio-dev
In-Reply-To: <CAD=HUj5Jn+grQVfxmPSSnERdGwnu8RceDsdpWpoxXH+WL4k+qw@mail.gmail.com>
On Mon, Jun 08, 2020 at 10:33:09AM +0900, David Stevens wrote:
> On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote:
> > > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > > >
> > > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > > > > This change adds a new flavor of dma-bufs that can be used by virtio
> > > > > drivers to share exported objects. A virtio dma-buf can be queried by
> > > > > virtio drivers to obtain the UUID which identifies the underlying
> > > > > exported object.
> > > > >
> > > > > Signed-off-by: David Stevens <stevensd@chromium.org>
> > > >
> > > > Is this just for graphics? If yes I'd rather we put it in the graphics
> > > > driver. We can always move it later ...
> > >
> > > As stated in the cover letter, this will be used by virtio-video.
> > >
> > > The proposed virtio-video patches: https://markmail.org/thread/p5d3k566srtdtute
> > > The patch which imports these dma-bufs (slightly out of data, uses v3
> > > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks
> > >
> > > > > ---
> > > > > drivers/virtio/Makefile | 2 +-
> > > > > drivers/virtio/virtio.c | 6 +++
> > > > > drivers/virtio/virtio_dma_buf.c | 89 +++++++++++++++++++++++++++++++++
> > > > > include/linux/virtio.h | 1 +
> > > > > include/linux/virtio_dma_buf.h | 58 +++++++++++++++++++++
> > > > > 5 files changed, 155 insertions(+), 1 deletion(-)
> > > > > create mode 100644 drivers/virtio/virtio_dma_buf.c
> > > > > create mode 100644 include/linux/virtio_dma_buf.h
> > > > >
> > > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > > > > index 29a1386ecc03..ecdae5b596de 100644
> > > > > --- a/drivers/virtio/Makefile
> > > > > +++ b/drivers/virtio/Makefile
> > > > > @@ -1,5 +1,5 @@
> > > > > # SPDX-License-Identifier: GPL-2.0
> > > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> > > > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> > > > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> > > > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > > index a977e32a88f2..5d46f0ded92d 100644
> > > > > --- a/drivers/virtio/virtio.c
> > > > > +++ b/drivers/virtio/virtio.c
> > > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
> > > > > }
> > > > > EXPORT_SYMBOL_GPL(register_virtio_device);
> > > > >
> > > > > +bool is_virtio_device(struct device *dev)
> > > > > +{
> > > > > + return dev->bus == &virtio_bus;
> > > > > +}
> > > > > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > > > > +
> > > > > void unregister_virtio_device(struct virtio_device *dev)
> > > > > {
> > > > > int index = dev->index; /* save for after device release */
> > > > > diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c
> > > > > new file mode 100644
> > > > > index 000000000000..23e3399b11ed
> > > > > --- /dev/null
> > > > > +++ b/drivers/virtio/virtio_dma_buf.c
> > > > > @@ -0,0 +1,89 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > > +/*
> > > > > + * dma-bufs for virtio exported objects
> > > > > + *
> > > > > + * Copyright (C) 2020 Google, Inc.
> > > > > + */
> > > > > +
> > > > > +#include <linux/virtio_dma_buf.h>
> > > > > +
> > > > > +/**
> > > > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object
> > > > > + *
> > > > > + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
> > > > > + * for an virtio exported object that can be queried by other virtio drivers
> > > > > + * for the object's UUID.
> > > > > + */
> > > > > +struct dma_buf *virtio_dma_buf_export(
> > > > > + const struct virtio_dma_buf_export_info *virtio_exp_info)
> > > > > +{
> > > > > + struct dma_buf_export_info exp_info;
> > > > > +
> > > > > + if (!virtio_exp_info->ops
> > > > > + || virtio_exp_info->ops->ops.attach != &virtio_dma_buf_attach
> > > > > + || !virtio_exp_info->ops->get_uuid) {
> > > > > + return ERR_PTR(-EINVAL);
> > > > > + }
> > > > > +
> > > > > + exp_info.exp_name = virtio_exp_info->exp_name;
> > > > > + exp_info.owner = virtio_exp_info->owner;
> > > > > + exp_info.ops = &virtio_exp_info->ops->ops;
> > > > > + exp_info.size = virtio_exp_info->size;
> > > > > + exp_info.flags = virtio_exp_info->flags;
> > > > > + exp_info.resv = virtio_exp_info->resv;
> > > > > + exp_info.priv = virtio_exp_info->priv;
> > > > > + BUILD_BUG_ON(sizeof(struct virtio_dma_buf_export_info)
> > > > > + != sizeof(struct dma_buf_export_info));
> > > >
> > > > This is the only part that gives me pause. Why do we need this hack?
> > > > What's wrong with just using dma_buf_export_info directly,
> > > > and if you want the virtio ops, just using container_off?
> > >
> > > This approach provides a more explicit type signature and a little
> > > more type safety, I think. If others don't think it's a worthwhile
> > > tradeoff, I can remove it.
> > >
> > > -David
> >
> > The cost is that if dma_buf_export_info changes even slightly, we get
> > weird crashes.
>
> I'm not sure I understand what types of changes you're referring to.
> As this is written, virtio-dma-buf is just another client of the
> dma-buf API. If this were rewritten to use dma-buf directly, then
> whatever code calls virtio_dma_buf_export would become a client of the
> dma-buf API. If the semantics of existing fields in the dma-buf API
> were changed and virtio-dma-buf wasn't updated, then yes, you could
> get weird crashes from virtio-dma-buf.
> However, the same problem would
> exist if virtio_dma_buf_export used dma-buf directly - changes to
> dma-buf's semantics could cause weird crashes if the caller of
> virtio_dma_buf_export wasn't updated properly. The only potential
> source of problems I see is if virtio_dma_buf_export_info wasn't
> updated properly, but virtio_dma_buf_export_info is dead simple, so I
> don't know if that's really a problem.
>
> -David
I think you can get weird crashes if fields in dma buf are reordered, or
if a field size changes. You have a build bug catching overall struct
size changes but that can remain the same due do compiler padding or
such.
--
MST
^ permalink raw reply
* Re: [PATCH] virtio-mem: drop unnecessary initialization
From: Jason Wang @ 2020-06-08 5:59 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel
Cc: Pankaj Gupta, virtualization, kernel test robot
In-Reply-To: <20200608054517.708167-1-mst@redhat.com>
On 2020/6/8 下午1:45, Michael S. Tsirkin wrote:
> rc is initialized to -ENIVAL but that's never used. Drop it.
>
> Fixes: 5f1f79bbc9e2 ("virtio-mem: Paravirtualized memory hotplug")
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> drivers/virtio/virtio_mem.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
> index f658fe9149be..2f357142ea5e 100644
> --- a/drivers/virtio/virtio_mem.c
> +++ b/drivers/virtio/virtio_mem.c
> @@ -1768,7 +1768,7 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm)
> static int virtio_mem_probe(struct virtio_device *vdev)
> {
> struct virtio_mem *vm;
> - int rc = -EINVAL;
> + int rc;
>
> BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
> BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10);
Acked-by: Jason Wang <jasowang@redhat.com>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* [PATCH] virtio-mem: drop unnecessary initialization
From: Michael S. Tsirkin @ 2020-06-08 5:45 UTC (permalink / raw)
To: linux-kernel; +Cc: Pankaj Gupta, virtualization, kernel test robot
rc is initialized to -ENIVAL but that's never used. Drop it.
Fixes: 5f1f79bbc9e2 ("virtio-mem: Paravirtualized memory hotplug")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/virtio/virtio_mem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index f658fe9149be..2f357142ea5e 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -1768,7 +1768,7 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm)
static int virtio_mem_probe(struct virtio_device *vdev)
{
struct virtio_mem *vm;
- int rc = -EINVAL;
+ int rc;
BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10);
--
MST
^ permalink raw reply related
* Re: [PATCH] MAINTAINERS: Update PARAVIRT_OPS_INTERFACE and VMWARE_HYPERVISOR_INTERFACE
From: Jürgen Groß @ 2020-06-08 5:35 UTC (permalink / raw)
To: Deep Shah, linux-kernel; +Cc: virtualization, pv-drivers, thellstrom
In-Reply-To: <20200416234520.GA1700@prme-mon-cfl-mlw-07>
On 17.04.20 01:45, Deep Shah wrote:
> Thomas Hellstrom will be handing over VMware's maintainership of these
> interfaces to Deep Shah.
>
> Signed-off-by: Deep Shah <sdeep@vmware.com>
> Acked-by: Thomas Hellstrom <thellstrom@vmware.com>
Pushed to xen/tip.git for-linus-5.8
Juergen
^ permalink raw reply
* Re: [PATCH RFC v5 13/13] vhost: drop head based APIs
From: Jason Wang @ 2020-06-08 3:57 UTC (permalink / raw)
To: Michael S. Tsirkin, linux-kernel; +Cc: kvm, virtualization, netdev, eperezma
In-Reply-To: <20200607141057.704085-14-mst@redhat.com>
On 2020/6/7 下午10:11, Michael S. Tsirkin wrote:
> Everyone's using buf APIs, no need for head based ones anymore.
>
> Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
> ---
> drivers/vhost/vhost.c | 36 ++++++++----------------------------
> drivers/vhost/vhost.h | 12 ------------
> 2 files changed, 8 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 72ee55c810c4..e6931b760b61 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -2299,12 +2299,12 @@ static int fetch_buf(struct vhost_virtqueue *vq)
> return 1;
> }
>
> -/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
> +/* Revert the effect of fetch_buf. Useful for error handling. */
> +static
> void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
> {
> vq->last_avail_idx -= n;
> }
> -EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
The same question as previous version.
Do we need to rewind cached descriptor here?
Thanks
^ permalink raw reply
* Re: [PATCH RFC 03/13] vhost: batching fetches
From: Jason Wang @ 2020-06-08 3:35 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Eugenio Pérez, netdev, linux-kernel, kvm, virtualization
In-Reply-To: <20200607095219-mutt-send-email-mst@kernel.org>
On 2020/6/7 下午9:57, Michael S. Tsirkin wrote:
> On Fri, Jun 05, 2020 at 11:40:17AM +0800, Jason Wang wrote:
>> On 2020/6/4 下午4:59, Michael S. Tsirkin wrote:
>>> On Wed, Jun 03, 2020 at 03:27:39PM +0800, Jason Wang wrote:
>>>> On 2020/6/2 下午9:06, Michael S. Tsirkin wrote:
>>>>> With this patch applied, new and old code perform identically.
>>>>>
>>>>> Lots of extra optimizations are now possible, e.g.
>>>>> we can fetch multiple heads with copy_from/to_user now.
>>>>> We can get rid of maintaining the log array. Etc etc.
>>>>>
>>>>> Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
>>>>> Signed-off-by: Eugenio Pérez<eperezma@redhat.com>
>>>>> Link:https://lore.kernel.org/r/20200401183118.8334-4-eperezma@redhat.com
>>>>> Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
>>>>> ---
>>>>> drivers/vhost/test.c | 2 +-
>>>>> drivers/vhost/vhost.c | 47 ++++++++++++++++++++++++++++++++++++++-----
>>>>> drivers/vhost/vhost.h | 5 ++++-
>>>>> 3 files changed, 47 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
>>>>> index 9a3a09005e03..02806d6f84ef 100644
>>>>> --- a/drivers/vhost/test.c
>>>>> +++ b/drivers/vhost/test.c
>>>>> @@ -119,7 +119,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
>>>>> dev = &n->dev;
>>>>> vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
>>>>> n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
>>>>> - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
>>>>> + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV + 64,
>>>>> VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, NULL);
>>>>> f->private_data = n;
>>>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>>>> index 8f9a07282625..aca2a5b0d078 100644
>>>>> --- a/drivers/vhost/vhost.c
>>>>> +++ b/drivers/vhost/vhost.c
>>>>> @@ -299,6 +299,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>>>>> {
>>>>> vq->num = 1;
>>>>> vq->ndescs = 0;
>>>>> + vq->first_desc = 0;
>>>>> vq->desc = NULL;
>>>>> vq->avail = NULL;
>>>>> vq->used = NULL;
>>>>> @@ -367,6 +368,11 @@ static int vhost_worker(void *data)
>>>>> return 0;
>>>>> }
>>>>> +static int vhost_vq_num_batch_descs(struct vhost_virtqueue *vq)
>>>>> +{
>>>>> + return vq->max_descs - UIO_MAXIOV;
>>>>> +}
>>>> 1 descriptor does not mean 1 iov, e.g userspace may pass several 1 byte
>>>> length memory regions for us to translate.
>>>>
>>> Yes but I don't see the relevance. This tells us how many descriptors to
>>> batch, not how many IOVs.
>> Yes, but questions are:
>>
>> - this introduce another obstacle to support more than 1K queue size
>> - if we support 1K queue size, does it mean we need to cache 1K descriptors,
>> which seems a large stress on the cache
>>
>> Thanks
>>
>>
> Still don't understand the relevance. We support up to 1K descriptors
> per buffer just for IOV since we always did. This adds 64 more
> descriptors - is that a big deal?
If I understanding correctly, for net, the code tries to batch
descriptors for at last one packet.
If we allow 1K queue size then we allow a packet that consists of 1K
descriptors. Then we need to cache 1K descriptors.
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH 5/6] vdpa: introduce virtio pci driver
From: Jason Wang @ 2020-06-08 3:32 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: shahafs, lulu, kvm, saugatm, netdev, mhabets, vmireyno,
linux-kernel, gdawar, virtualization, eperezma, hanand,
zhangweining, eli, lingshan.zhu, rob.miller
In-Reply-To: <20200607095012-mutt-send-email-mst@kernel.org>
On 2020/6/7 下午9:51, Michael S. Tsirkin wrote:
> On Fri, Jun 05, 2020 at 04:54:17PM +0800, Jason Wang wrote:
>> On 2020/6/2 下午3:08, Jason Wang wrote:
>>>>> +static const struct pci_device_id vp_vdpa_id_table[] = {
>>>>> + { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) },
>>>>> + { 0 }
>>>>> +};
>>>> This looks like it'll create a mess with either virtio pci
>>>> or vdpa being loaded at random. Maybe just don't specify
>>>> any IDs for now. Down the road we could get a
>>>> distinct vendor ID or a range of device IDs for this.
>>>
>>> Right, will do.
>>>
>>> Thanks
>>
>> Rethink about this. If we don't specify any ID, the binding won't work.
> We can bind manually. It's not really for production anyway, so
> not a big deal imho.
I think you mean doing it via "new_id", right.
>
>> How about using a dedicated subsystem vendor id for this?
>>
>> Thanks
> If virtio vendor id is used then standard driver is expected
> to bind, right? Maybe use a dedicated vendor id?
I meant something like:
static const struct pci_device_id vp_vdpa_id_table[] = {
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID,
VP_TEST_VENDOR_ID, VP_TEST_DEVICE_ID) },
{ 0 }
};
Thanks
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply
* Re: [PATCH v4 1/3] virtio: add dma-buf support for exported objects
From: David Stevens @ 2020-06-08 1:33 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Gerd Hoffmann, David Airlie, Daniel Vetter, Sumit Semwal,
Jason Wang, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
open list, ML dri-devel, open list:VIRTIO GPU DRIVER,
Linux Media Mailing List,
moderated list:DMA BUFFER SHARING FRAMEWORK, virtio-dev
In-Reply-To: <20200606160155-mutt-send-email-mst@kernel.org>
On Sun, Jun 7, 2020 at 5:04 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Jun 05, 2020 at 10:28:42AM +0900, David Stevens wrote:
> > On Fri, Jun 5, 2020 at 4:05 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> > >
> > > On Tue, May 26, 2020 at 07:58:09PM +0900, David Stevens wrote:
> > > > This change adds a new flavor of dma-bufs that can be used by virtio
> > > > drivers to share exported objects. A virtio dma-buf can be queried by
> > > > virtio drivers to obtain the UUID which identifies the underlying
> > > > exported object.
> > > >
> > > > Signed-off-by: David Stevens <stevensd@chromium.org>
> > >
> > > Is this just for graphics? If yes I'd rather we put it in the graphics
> > > driver. We can always move it later ...
> >
> > As stated in the cover letter, this will be used by virtio-video.
> >
> > The proposed virtio-video patches: https://markmail.org/thread/p5d3k566srtdtute
> > The patch which imports these dma-bufs (slightly out of data, uses v3
> > of this patch set): https://markmail.org/thread/j4xlqaaim266qpks
> >
> > > > ---
> > > > drivers/virtio/Makefile | 2 +-
> > > > drivers/virtio/virtio.c | 6 +++
> > > > drivers/virtio/virtio_dma_buf.c | 89 +++++++++++++++++++++++++++++++++
> > > > include/linux/virtio.h | 1 +
> > > > include/linux/virtio_dma_buf.h | 58 +++++++++++++++++++++
> > > > 5 files changed, 155 insertions(+), 1 deletion(-)
> > > > create mode 100644 drivers/virtio/virtio_dma_buf.c
> > > > create mode 100644 include/linux/virtio_dma_buf.h
> > > >
> > > > diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> > > > index 29a1386ecc03..ecdae5b596de 100644
> > > > --- a/drivers/virtio/Makefile
> > > > +++ b/drivers/virtio/Makefile
> > > > @@ -1,5 +1,5 @@
> > > > # SPDX-License-Identifier: GPL-2.0
> > > > -obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
> > > > +obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o virtio_dma_buf.o
> > > > obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
> > > > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
> > > > virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
> > > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> > > > index a977e32a88f2..5d46f0ded92d 100644
> > > > --- a/drivers/virtio/virtio.c
> > > > +++ b/drivers/virtio/virtio.c
> > > > @@ -357,6 +357,12 @@ int register_virtio_device(struct virtio_device *dev)
> > > > }
> > > > EXPORT_SYMBOL_GPL(register_virtio_device);
> > > >
> > > > +bool is_virtio_device(struct device *dev)
> > > > +{
> > > > + return dev->bus == &virtio_bus;
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(is_virtio_device);
> > > > +
> > > > void unregister_virtio_device(struct virtio_device *dev)
> > > > {
> > > > int index = dev->index; /* save for after device release */
> > > > diff --git a/drivers/virtio/virtio_dma_buf.c b/drivers/virtio/virtio_dma_buf.c
> > > > new file mode 100644
> > > > index 000000000000..23e3399b11ed
> > > > --- /dev/null
> > > > +++ b/drivers/virtio/virtio_dma_buf.c
> > > > @@ -0,0 +1,89 @@
> > > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > > +/*
> > > > + * dma-bufs for virtio exported objects
> > > > + *
> > > > + * Copyright (C) 2020 Google, Inc.
> > > > + */
> > > > +
> > > > +#include <linux/virtio_dma_buf.h>
> > > > +
> > > > +/**
> > > > + * virtio_dma_buf_export - Creates a new dma-buf for a virtio exported object
> > > > + *
> > > > + * This wraps dma_buf_export() to allow virtio drivers to create a dma-buf
> > > > + * for an virtio exported object that can be queried by other virtio drivers
> > > > + * for the object's UUID.
> > > > + */
> > > > +struct dma_buf *virtio_dma_buf_export(
> > > > + const struct virtio_dma_buf_export_info *virtio_exp_info)
> > > > +{
> > > > + struct dma_buf_export_info exp_info;
> > > > +
> > > > + if (!virtio_exp_info->ops
> > > > + || virtio_exp_info->ops->ops.attach != &virtio_dma_buf_attach
> > > > + || !virtio_exp_info->ops->get_uuid) {
> > > > + return ERR_PTR(-EINVAL);
> > > > + }
> > > > +
> > > > + exp_info.exp_name = virtio_exp_info->exp_name;
> > > > + exp_info.owner = virtio_exp_info->owner;
> > > > + exp_info.ops = &virtio_exp_info->ops->ops;
> > > > + exp_info.size = virtio_exp_info->size;
> > > > + exp_info.flags = virtio_exp_info->flags;
> > > > + exp_info.resv = virtio_exp_info->resv;
> > > > + exp_info.priv = virtio_exp_info->priv;
> > > > + BUILD_BUG_ON(sizeof(struct virtio_dma_buf_export_info)
> > > > + != sizeof(struct dma_buf_export_info));
> > >
> > > This is the only part that gives me pause. Why do we need this hack?
> > > What's wrong with just using dma_buf_export_info directly,
> > > and if you want the virtio ops, just using container_off?
> >
> > This approach provides a more explicit type signature and a little
> > more type safety, I think. If others don't think it's a worthwhile
> > tradeoff, I can remove it.
> >
> > -David
>
> The cost is that if dma_buf_export_info changes even slightly, we get
> weird crashes.
I'm not sure I understand what types of changes you're referring to.
As this is written, virtio-dma-buf is just another client of the
dma-buf API. If this were rewritten to use dma-buf directly, then
whatever code calls virtio_dma_buf_export would become a client of the
dma-buf API. If the semantics of existing fields in the dma-buf API
were changed and virtio-dma-buf wasn't updated, then yes, you could
get weird crashes from virtio-dma-buf. However, the same problem would
exist if virtio_dma_buf_export used dma-buf directly - changes to
dma-buf's semantics could cause weird crashes if the caller of
virtio_dma_buf_export wasn't updated properly. The only potential
source of problems I see is if virtio_dma_buf_export_info wasn't
updated properly, but virtio_dma_buf_export_info is dead simple, so I
don't know if that's really a problem.
-David
^ permalink raw reply
* [vhost:vhost 18/52] drivers/virtio/virtio_mem.c:1391:5: warning: Variable 'rc' is reassigned a value before the old one has been used.
From: kernel test robot @ 2020-06-08 0:59 UTC (permalink / raw)
To: David Hildenbrand
Cc: kbuild-all, kvm, virtualization, netdev, Michael S. Tsirkin,
Pankaj Gupta
tree: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
head: f3861bc96a7e130943e1975e571ae62c0319b064
commit: 5f1f79bbc9e26fa9412fa9522f957bb8f030c442 [18/52] virtio-mem: Paravirtualized memory hotplug
compiler: gcc-9 (Debian 9.3.0-13) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
cppcheck warnings: (new ones prefixed by >>)
>> drivers/virtio/virtio_mem.c:1391:5: warning: Variable 'rc' is reassigned a value before the old one has been used. [redundantAssignment]
rc = virtio_mem_init_vq(vm);
^
drivers/virtio/virtio_mem.c:1375:0: note: Variable 'rc' is reassigned a value before the old one has been used.
int rc = -EINVAL;
^
drivers/virtio/virtio_mem.c:1391:5: note: Variable 'rc' is reassigned a value before the old one has been used.
rc = virtio_mem_init_vq(vm);
^
>> drivers/virtio/virtio_mem.c:801:22: warning: int result is assigned to long variable. If the variable is long to avoid loss of information, then you have loss of information. [truncLongCastAssignment]
const uint64_t size = count * vm->subblock_size;
^
drivers/virtio/virtio_mem.c:822:22: warning: int result is assigned to long variable. If the variable is long to avoid loss of information, then you have loss of information. [truncLongCastAssignment]
const uint64_t size = count * vm->subblock_size;
^
vim +/rc +1391 drivers/virtio/virtio_mem.c
1371
1372 static int virtio_mem_probe(struct virtio_device *vdev)
1373 {
1374 struct virtio_mem *vm;
1375 int rc = -EINVAL;
1376
1377 vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1378 if (!vm)
1379 return -ENOMEM;
1380
1381 init_waitqueue_head(&vm->host_resp);
1382 vm->vdev = vdev;
1383 INIT_WORK(&vm->wq, virtio_mem_run_wq);
1384 mutex_init(&vm->hotplug_mutex);
1385 INIT_LIST_HEAD(&vm->next);
1386 spin_lock_init(&vm->removal_lock);
1387 hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1388 vm->retry_timer.function = virtio_mem_timer_expired;
1389
1390 /* register the virtqueue */
> 1391 rc = virtio_mem_init_vq(vm);
1392 if (rc)
1393 goto out_free_vm;
1394
1395 /* initialize the device by querying the config */
1396 rc = virtio_mem_init(vm);
1397 if (rc)
1398 goto out_del_vq;
1399
1400 /* register callbacks */
1401 vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
1402 rc = register_memory_notifier(&vm->memory_notifier);
1403 if (rc)
1404 goto out_del_vq;
1405 rc = register_virtio_mem_device(vm);
1406 if (rc)
1407 goto out_unreg_mem;
1408
1409 virtio_device_ready(vdev);
1410
1411 /* trigger a config update to start processing the requested_size */
1412 atomic_set(&vm->config_changed, 1);
1413 queue_work(system_freezable_wq, &vm->wq);
1414
1415 return 0;
1416 out_unreg_mem:
1417 unregister_memory_notifier(&vm->memory_notifier);
1418 out_del_vq:
1419 vdev->config->del_vqs(vdev);
1420 out_free_vm:
1421 kfree(vm);
1422 vdev->priv = NULL;
1423
1424 return rc;
1425 }
1426
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [PATCH RFC v5 13/13] vhost: drop head based APIs
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Everyone's using buf APIs, no need for head based ones anymore.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/vhost.c | 36 ++++++++----------------------------
drivers/vhost/vhost.h | 12 ------------
2 files changed, 8 insertions(+), 40 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 72ee55c810c4..e6931b760b61 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2299,12 +2299,12 @@ static int fetch_buf(struct vhost_virtqueue *vq)
return 1;
}
-/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+/* Revert the effect of fetch_buf. Useful for error handling. */
+static
void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
{
vq->last_avail_idx -= n;
}
-EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
/* This function returns a value > 0 if a descriptor was found, or 0 if none were found.
* A negative code is returned on error. */
@@ -2464,8 +2464,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
return 0;
}
-/* After we've used one of their buffers, we tell them about it. We'll then
- * want to notify the guest, using eventfd. */
+static
int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
unsigned count)
{
@@ -2499,10 +2498,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
}
return r;
}
-EXPORT_SYMBOL_GPL(vhost_add_used_n);
-/* After we've used one of their buffers, we tell them about it. We'll then
- * want to notify the guest, using eventfd. */
+static
int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
{
struct vring_used_elem heads = {
@@ -2512,14 +2509,17 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
return vhost_add_used_n(vq, &heads, 1);
}
-EXPORT_SYMBOL_GPL(vhost_add_used);
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using vhost_signal. */
int vhost_put_used_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf)
{
return vhost_add_used(vq, buf->id, buf->in_len);
}
EXPORT_SYMBOL_GPL(vhost_put_used_buf);
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using vhost_signal. */
int vhost_put_used_n_bufs(struct vhost_virtqueue *vq,
struct vhost_buf *bufs, unsigned count)
{
@@ -2580,26 +2580,6 @@ void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
}
EXPORT_SYMBOL_GPL(vhost_signal);
-/* And here's the combo meal deal. Supersize me! */
-void vhost_add_used_and_signal(struct vhost_dev *dev,
- struct vhost_virtqueue *vq,
- unsigned int head, int len)
-{
- vhost_add_used(vq, head, len);
- vhost_signal(dev, vq);
-}
-EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
-
-/* multi-buffer version of vhost_add_used_and_signal */
-void vhost_add_used_and_signal_n(struct vhost_dev *dev,
- struct vhost_virtqueue *vq,
- struct vring_used_elem *heads, unsigned count)
-{
- vhost_add_used_n(vq, heads, count);
- vhost_signal(dev, vq);
-}
-EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
-
/* return true if we're sure that avaiable ring is empty */
bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 28eea0155efb..264a2a2fae97 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -197,11 +197,6 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
bool vhost_log_access_ok(struct vhost_dev *);
-int vhost_get_vq_desc(struct vhost_virtqueue *,
- struct iovec iov[], unsigned int iov_count,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num);
-void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
int vhost_get_avail_buf(struct vhost_virtqueue *, struct vhost_buf *buf,
struct iovec iov[], unsigned int iov_count,
unsigned int *out_num, unsigned int *in_num,
@@ -209,13 +204,6 @@ int vhost_get_avail_buf(struct vhost_virtqueue *, struct vhost_buf *buf,
void vhost_discard_avail_bufs(struct vhost_virtqueue *,
struct vhost_buf *, unsigned count);
int vhost_vq_init_access(struct vhost_virtqueue *);
-int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
-int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
- unsigned count);
-void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
- unsigned int id, int len);
-void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
- struct vring_used_elem *heads, unsigned count);
int vhost_put_used_buf(struct vhost_virtqueue *, struct vhost_buf *buf);
int vhost_put_used_n_bufs(struct vhost_virtqueue *,
struct vhost_buf *bufs, unsigned count);
--
MST
^ permalink raw reply related
* [PATCH RFC v5 12/13] vhost/vsock: switch to the buf API
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: kvm, netdev, virtualization, eperezma, Stefan Hajnoczi
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
A straight-forward conversion.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/vsock.c | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index a483cec31d5c..61c6d3dd2ae3 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -103,7 +103,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
unsigned out, in;
size_t nbytes;
size_t iov_len, payload_len;
- int head;
+ struct vhost_buf buf;
+ int ret;
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
@@ -117,16 +118,17 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
list_del_init(&pkt->list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
- head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
- &out, &in, NULL, NULL);
- if (head < 0) {
+ ret = vhost_get_avail_buf(vq, &buf,
+ vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (ret < 0) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
break;
}
- if (head == vq->num) {
+ if (!ret) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
@@ -186,7 +188,8 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
*/
virtio_transport_deliver_tap_pkt(pkt);
- vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
+ buf.in_len = sizeof(pkt->hdr) + payload_len;
+ vhost_put_used_buf(vq, &buf);
added = true;
pkt->off += payload_len;
@@ -440,7 +443,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
dev);
struct virtio_vsock_pkt *pkt;
- int head, pkts = 0, total_len = 0;
+ int ret, pkts = 0, total_len = 0;
+ struct vhost_buf buf;
unsigned int out, in;
bool added = false;
@@ -461,12 +465,13 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
goto no_more_replies;
}
- head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
- &out, &in, NULL, NULL);
- if (head < 0)
+ ret = vhost_get_avail_buf(vq, &buf,
+ vq->iov, ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (ret < 0)
break;
- if (head == vq->num) {
+ if (!ret) {
if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
vhost_disable_notify(&vsock->dev, vq);
continue;
@@ -494,7 +499,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
virtio_transport_free_pkt(pkt);
len += sizeof(pkt->hdr);
- vhost_add_used(vq, head, len);
+ buf.in_len = len;
+ vhost_put_used_buf(vq, &buf);
total_len += len;
added = true;
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
--
MST
^ permalink raw reply related
* [PATCH RFC v5 11/13] vhost/scsi: switch to buf APIs
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel
Cc: kvm, netdev, virtualization, eperezma, Stefan Hajnoczi,
Paolo Bonzini
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Switch to buf APIs. Doing this exposes a spec violation in vhost scsi:
all used bufs are marked with length 0.
Fix that is left for another day.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/scsi.c | 73 ++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 29 deletions(-)
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 0cbaa0b3893d..a5cdd4c01a3a 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -71,8 +71,8 @@ struct vhost_scsi_inflight {
};
struct vhost_scsi_cmd {
- /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
- int tvc_vq_desc;
+ /* Descriptor from vhost_get_avail_buf() for virt_queue segment */
+ struct vhost_buf tvc_vq_desc;
/* virtio-scsi initiator task attribute */
int tvc_task_attr;
/* virtio-scsi response incoming iovecs */
@@ -213,7 +213,7 @@ struct vhost_scsi {
* Context for processing request and control queue operations.
*/
struct vhost_scsi_ctx {
- int head;
+ struct vhost_buf buf;
unsigned int out, in;
size_t req_size, rsp_size;
size_t out_size, in_size;
@@ -443,6 +443,20 @@ static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd)
return target_put_sess_cmd(se_cmd);
}
+/* Signal to guest that request finished with no input buffer. */
+/* TODO calling this when writing into buffer and most likely a bug */
+static void vhost_scsi_signal_noinput(struct vhost_dev *vdev,
+ struct vhost_virtqueue *vq,
+ struct vhost_buf *bufp)
+{
+ struct vhost_buf buf = *bufp;
+
+ buf.in_len = 0;
+ vhost_put_used_buf(vq, &buf);
+ vhost_signal(vdev, vq);
+}
+
+
static void
vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
{
@@ -450,7 +464,8 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
struct virtio_scsi_event *event = &evt->event;
struct virtio_scsi_event __user *eventp;
unsigned out, in;
- int head, ret;
+ struct vhost_buf buf;
+ int ret;
if (!vhost_vq_get_backend(vq)) {
vs->vs_events_missed = true;
@@ -459,14 +474,14 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
again:
vhost_disable_notify(&vs->dev, vq);
- head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov), &out, &in,
- NULL, NULL);
- if (head < 0) {
+ ret = vhost_get_avail_buf(vq, &buf,
+ vq->iov, ARRAY_SIZE(vq->iov), &out, &in,
+ NULL, NULL);
+ if (ret < 0) {
vs->vs_events_missed = true;
return;
}
- if (head == vq->num) {
+ if (!ret) {
if (vhost_enable_notify(&vs->dev, vq))
goto again;
vs->vs_events_missed = true;
@@ -488,7 +503,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
eventp = vq->iov[out].iov_base;
ret = __copy_to_user(eventp, event, sizeof(*event));
if (!ret)
- vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+ vhost_scsi_signal_noinput(&vs->dev, vq, &buf);
else
vq_err(vq, "Faulted on vhost_scsi_send_event\n");
}
@@ -549,7 +564,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
if (likely(ret == sizeof(v_rsp))) {
struct vhost_scsi_virtqueue *q;
- vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
+ vhost_put_used_buf(cmd->tvc_vq, &cmd->tvc_vq_desc);
q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
vq = q - vs->vqs;
__set_bit(vq, signal);
@@ -793,7 +808,7 @@ static void vhost_scsi_submission_work(struct work_struct *work)
static void
vhost_scsi_send_bad_target(struct vhost_scsi *vs,
struct vhost_virtqueue *vq,
- int head, unsigned out)
+ struct vhost_buf *buf, unsigned out)
{
struct virtio_scsi_cmd_resp __user *resp;
struct virtio_scsi_cmd_resp rsp;
@@ -804,7 +819,7 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
resp = vq->iov[out].iov_base;
ret = __copy_to_user(resp, &rsp, sizeof(rsp));
if (!ret)
- vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+ vhost_scsi_signal_noinput(&vs->dev, vq, buf);
else
pr_err("Faulted on virtio_scsi_cmd_resp\n");
}
@@ -813,21 +828,21 @@ static int
vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
struct vhost_scsi_ctx *vc)
{
- int ret = -ENXIO;
+ int r, ret = -ENXIO;
- vc->head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
- NULL, NULL);
+ r = vhost_get_avail_buf(vq, &vc->buf,
+ vq->iov, ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
+ NULL, NULL);
- pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
- vc->head, vc->out, vc->in);
+ pr_debug("vhost_get_avail_buf: buf: %d, out: %u in: %u\n",
+ vc->buf.id, vc->out, vc->in);
/* On error, stop handling until the next kick. */
- if (unlikely(vc->head < 0))
+ if (unlikely(r < 0))
goto done;
/* Nothing new? Wait for eventfd to tell us they refilled. */
- if (vc->head == vq->num) {
+ if (!r) {
if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
vhost_disable_notify(&vs->dev, vq);
ret = -EAGAIN;
@@ -1093,11 +1108,11 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
}
}
/*
- * Save the descriptor from vhost_get_vq_desc() to be used to
+ * Save the descriptor from vhost_get_avail_buf() to be used to
* complete the virtio-scsi request in TCM callback context via
* vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
*/
- cmd->tvc_vq_desc = vc.head;
+ cmd->tvc_vq_desc = vc.buf;
/*
* Dispatch cmd descriptor for cmwq execution in process
* context provided by vhost_scsi_workqueue. This also ensures
@@ -1117,7 +1132,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
if (ret == -ENXIO)
break;
else if (ret == -EIO)
- vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
+ vhost_scsi_send_bad_target(vs, vq, &vc.buf, vc.out);
} while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
out:
mutex_unlock(&vq->mutex);
@@ -1139,9 +1154,9 @@ vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
iov_iter_init(&iov_iter, READ, &vq->iov[vc->out], vc->in, sizeof(rsp));
ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter);
- if (likely(ret == sizeof(rsp)))
- vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
- else
+ if (likely(ret == sizeof(rsp))) {
+ vhost_scsi_signal_noinput(&vs->dev, vq, &vc->buf);
+ } else
pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
}
@@ -1162,7 +1177,7 @@ vhost_scsi_send_an_resp(struct vhost_scsi *vs,
ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter);
if (likely(ret == sizeof(rsp)))
- vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
+ vhost_scsi_signal_noinput(&vs->dev, vq, &vc->buf);
else
pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
}
@@ -1269,7 +1284,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
if (ret == -ENXIO)
break;
else if (ret == -EIO)
- vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
+ vhost_scsi_send_bad_target(vs, vq, &vc.buf, vc.out);
} while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
out:
mutex_unlock(&vq->mutex);
--
MST
^ permalink raw reply related
* [PATCH RFC v5 10/13] vhost/test: convert to the buf API
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/test.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 02806d6f84ef..251fd2bf74a3 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -44,9 +44,10 @@ static void handle_vq(struct vhost_test *n)
{
struct vhost_virtqueue *vq = &n->vqs[VHOST_TEST_VQ];
unsigned out, in;
- int head;
+ int ret;
size_t len, total_len = 0;
void *private;
+ struct vhost_buf buf;
mutex_lock(&vq->mutex);
private = vhost_vq_get_backend(vq);
@@ -58,15 +59,15 @@ static void handle_vq(struct vhost_test *n)
vhost_disable_notify(&n->dev, vq);
for (;;) {
- head = vhost_get_vq_desc(vq, vq->iov,
- ARRAY_SIZE(vq->iov),
- &out, &in,
- NULL, NULL);
+ ret = vhost_get_avail_buf(vq, vq->iov, &buf,
+ ARRAY_SIZE(vq->iov),
+ &out, &in,
+ NULL, NULL);
/* On error, stop handling until the next kick. */
- if (unlikely(head < 0))
+ if (unlikely(ret < 0))
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
- if (head == vq->num) {
+ if (!ret) {
if (unlikely(vhost_enable_notify(&n->dev, vq))) {
vhost_disable_notify(&n->dev, vq);
continue;
@@ -78,13 +79,14 @@ static void handle_vq(struct vhost_test *n)
"out %d, int %d\n", out, in);
break;
}
- len = iov_length(vq->iov, out);
+ len = buf.out_len;
/* Sanity check */
if (!len) {
vq_err(vq, "Unexpected 0 len for TX\n");
break;
}
- vhost_add_used_and_signal(&n->dev, vq, head, 0);
+ vhost_put_used_buf(vq, &buf);
+ vhost_signal(&n->dev, vq);
total_len += len;
if (unlikely(vhost_exceeds_weight(vq, 0, total_len)))
break;
--
MST
^ permalink raw reply related
* [PATCH RFC v5 09/13] vhost/net: avoid iov length math
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Now that API exposes buffer length, we no longer need to
scan IOVs to figure it out.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/net.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 830fe84912a5..0b509be8d7b1 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -607,11 +607,9 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net)
}
static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
- size_t hdr_size, int out)
+ size_t len, size_t hdr_size, int out)
{
/* Skip header. TODO: support TSO. */
- size_t len = iov_length(vq->iov, out);
-
iov_iter_init(iter, WRITE, vq->iov, out, len);
iov_iter_advance(iter, hdr_size);
@@ -640,7 +638,7 @@ static int get_tx_bufs(struct vhost_net *net,
}
/* Sanity check */
- *len = init_iov_iter(vq, &msg->msg_iter, nvq->vhost_hlen, *out);
+ *len = init_iov_iter(vq, &msg->msg_iter, buf->out_len, nvq->vhost_hlen, *out);
if (*len == 0) {
vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n",
*len, nvq->vhost_hlen);
@@ -1080,7 +1078,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
- len = iov_length(vq->iov + seg, in);
+ len = bufs[bufcount].in_len;
datalen -= len;
++bufcount;
seg += in;
--
MST
^ permalink raw reply related
* [PATCH RFC v5 08/13] vhost/net: convert to new API: heads->bufs
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Convert vhost net to use the new format-agnostic API.
In particular, don't poke at vq internals such as the
heads array.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/net.c | 154 +++++++++++++++++++++++---------------------
1 file changed, 82 insertions(+), 72 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ff594eec8ae3..830fe84912a5 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -59,13 +59,13 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
* status internally; used for zerocopy tx only.
*/
/* Lower device DMA failed */
-#define VHOST_DMA_FAILED_LEN ((__force __virtio32)3)
+#define VHOST_DMA_FAILED_LEN (3)
/* Lower device DMA done */
-#define VHOST_DMA_DONE_LEN ((__force __virtio32)2)
+#define VHOST_DMA_DONE_LEN (2)
/* Lower device DMA in progress */
-#define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1)
+#define VHOST_DMA_IN_PROGRESS (1)
/* Buffer unused */
-#define VHOST_DMA_CLEAR_LEN ((__force __virtio32)0)
+#define VHOST_DMA_CLEAR_LEN (0)
#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)
@@ -112,9 +112,12 @@ struct vhost_net_virtqueue {
/* last used idx for outstanding DMA zerocopy buffers */
int upend_idx;
/* For TX, first used idx for DMA done zerocopy buffers
- * For RX, number of batched heads
+ * For RX, number of batched bufs
*/
int done_idx;
+ /* Outstanding user bufs. UIO_MAXIOV in length. */
+ /* TODO: we can make this smaller for sure. */
+ struct vhost_buf *bufs;
/* Number of XDP frames batched */
int batched_xdp;
/* an array of userspace buffers info */
@@ -271,6 +274,8 @@ static void vhost_net_clear_ubuf_info(struct vhost_net *n)
int i;
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+ kfree(n->vqs[i].bufs);
+ n->vqs[i].bufs = NULL;
kfree(n->vqs[i].ubuf_info);
n->vqs[i].ubuf_info = NULL;
}
@@ -282,6 +287,12 @@ static int vhost_net_set_ubuf_info(struct vhost_net *n)
int i;
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+ n->vqs[i].bufs = kmalloc_array(UIO_MAXIOV,
+ sizeof(*n->vqs[i].bufs),
+ GFP_KERNEL);
+ if (!n->vqs[i].bufs)
+ goto err;
+
zcopy = vhost_net_zcopy_mask & (0x1 << i);
if (!zcopy)
continue;
@@ -364,18 +375,18 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
int j = 0;
for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
- if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
+ if (nvq->bufs[i].in_len == VHOST_DMA_FAILED_LEN)
vhost_net_tx_err(net);
- if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
- vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
+ if (VHOST_DMA_IS_DONE(nvq->bufs[i].in_len)) {
+ nvq->bufs[i].in_len = VHOST_DMA_CLEAR_LEN;
++j;
} else
break;
}
while (j) {
add = min(UIO_MAXIOV - nvq->done_idx, j);
- vhost_add_used_and_signal_n(vq->dev, vq,
- &vq->heads[nvq->done_idx], add);
+ vhost_put_used_n_bufs(vq, &nvq->bufs[nvq->done_idx], add);
+ vhost_signal(vq->dev, vq);
nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV;
j -= add;
}
@@ -390,7 +401,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
rcu_read_lock_bh();
/* set len to mark this desc buffers done DMA */
- nvq->vq.heads[ubuf->desc].in_len = success ?
+ nvq->bufs[ubuf->desc].in_len = success ?
VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
cnt = vhost_net_ubuf_put(ubufs);
@@ -452,7 +463,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
if (!nvq->done_idx)
return;
- vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx);
+ vhost_put_used_n_bufs(vq, nvq->bufs, nvq->done_idx);
+ vhost_signal(dev, vq);
nvq->done_idx = 0;
}
@@ -558,6 +570,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_net_virtqueue *tnvq,
+ struct vhost_buf *buf,
unsigned int *out_num, unsigned int *in_num,
struct msghdr *msghdr, bool *busyloop_intr)
{
@@ -565,10 +578,10 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_virtqueue *rvq = &rnvq->vq;
struct vhost_virtqueue *tvq = &tnvq->vq;
- int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ int r = vhost_get_avail_buf(tvq, buf, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL);
- if (r == tvq->num && tvq->busyloop_timeout) {
+ if (!r && tvq->busyloop_timeout) {
/* Flush batched packets first */
if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq)))
vhost_tx_batch(net, tnvq,
@@ -577,8 +590,8 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);
- r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov),
- out_num, in_num, NULL, NULL);
+ r = vhost_get_avail_buf(tvq, buf, tvq->iov, ARRAY_SIZE(tvq->iov),
+ out_num, in_num, NULL, NULL);
}
return r;
@@ -607,6 +620,7 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
static int get_tx_bufs(struct vhost_net *net,
struct vhost_net_virtqueue *nvq,
+ struct vhost_buf *buf,
struct msghdr *msg,
unsigned int *out, unsigned int *in,
size_t *len, bool *busyloop_intr)
@@ -614,9 +628,9 @@ static int get_tx_bufs(struct vhost_net *net,
struct vhost_virtqueue *vq = &nvq->vq;
int ret;
- ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr);
+ ret = vhost_net_tx_get_vq_desc(net, nvq, buf, out, in, msg, busyloop_intr);
- if (ret < 0 || ret == vq->num)
+ if (ret <= 0)
return ret;
if (*in) {
@@ -761,7 +775,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
unsigned out, in;
- int head;
+ int ret;
struct msghdr msg = {
.msg_name = NULL,
.msg_namelen = 0,
@@ -773,6 +787,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int err;
int sent_pkts = 0;
bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
+ struct vhost_buf buf;
do {
bool busyloop_intr = false;
@@ -780,13 +795,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
if (nvq->done_idx == VHOST_NET_BATCH)
vhost_tx_batch(net, nvq, sock, &msg);
- head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ ret = get_tx_bufs(net, nvq, &buf, &msg, &out, &in, &len,
+ &busyloop_intr);
/* On error, stop handling until the next kick. */
- if (unlikely(head < 0))
+ if (unlikely(ret < 0))
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
- if (head == vq->num) {
+ if (!ret) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(&vq->poll);
} else if (unlikely(vhost_enable_notify(&net->dev,
@@ -808,7 +823,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
goto done;
} else if (unlikely(err != -ENOSPC)) {
vhost_tx_batch(net, nvq, sock, &msg);
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_avail_bufs(vq, &buf, 1);
vhost_net_enable_vq(net, vq);
break;
}
@@ -829,7 +844,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_avail_bufs(vq, &buf, 1);
vhost_net_enable_vq(net, vq);
break;
}
@@ -837,8 +852,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
pr_debug("Truncated TX packet: len %d != %zd\n",
err, len);
done:
- vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
- vq->heads[nvq->done_idx].len = 0;
+ nvq->bufs[nvq->done_idx] = buf;
++nvq->done_idx;
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
@@ -850,7 +864,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
unsigned out, in;
- int head;
+ int ret;
struct msghdr msg = {
.msg_name = NULL,
.msg_namelen = 0,
@@ -864,6 +878,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
bool zcopy_used;
int sent_pkts = 0;
+ struct vhost_buf buf;
do {
bool busyloop_intr;
@@ -872,13 +887,13 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
vhost_zerocopy_signal_used(net, vq);
busyloop_intr = false;
- head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
- &busyloop_intr);
+ ret = get_tx_bufs(net, nvq, &buf, &msg, &out, &in, &len,
+ &busyloop_intr);
/* On error, stop handling until the next kick. */
- if (unlikely(head < 0))
+ if (unlikely(ret < 0))
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
- if (head == vq->num) {
+ if (!ret) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(&vq->poll);
} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
@@ -897,8 +912,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
struct ubuf_info *ubuf;
ubuf = nvq->ubuf_info + nvq->upend_idx;
- vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
- vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
+ nvq->bufs[nvq->upend_idx] = buf;
+ nvq->bufs[nvq->upend_idx].in_len = VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx;
@@ -930,17 +945,19 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
% UIO_MAXIOV;
}
- vhost_discard_vq_desc(vq, 1);
+ vhost_discard_avail_bufs(vq, &buf, 1);
vhost_net_enable_vq(net, vq);
break;
}
if (err != len)
pr_debug("Truncated TX packet: "
" len %d != %zd\n", err, len);
- if (!zcopy_used)
- vhost_add_used_and_signal(&net->dev, vq, head, 0);
- else
+ if (!zcopy_used) {
+ vhost_put_used_buf(vq, &buf);
+ vhost_signal(&net->dev, vq);
+ } else {
vhost_zerocopy_signal_used(net, vq);
+ }
vhost_net_tx_packet(net);
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
}
@@ -1004,7 +1021,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
int len = peek_head_len(rnvq, sk);
if (!len && rvq->busyloop_timeout) {
- /* Flush batched heads first */
+ /* Flush batched bufs first */
vhost_net_signal_used(rnvq);
/* Both tx vq and rx socket were polled here */
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
@@ -1022,11 +1039,11 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
* @iovcount - returned count of io vectors we fill
* @log - vhost log
* @log_num - log offset
- * @quota - headcount quota, 1 for big buffer
- * returns number of buffer heads allocated, negative on error
+ * @quota - bufcount quota, 1 for big buffer
+ * returns number of buffers allocated, negative on error
*/
static int get_rx_bufs(struct vhost_virtqueue *vq,
- struct vring_used_elem *heads,
+ struct vhost_buf *bufs,
int datalen,
unsigned *iovcount,
struct vhost_log *log,
@@ -1035,30 +1052,24 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
{
unsigned int out, in;
int seg = 0;
- int headcount = 0;
- unsigned d;
+ int bufcount = 0;
int r, nlogs = 0;
/* len is always initialized before use since we are always called with
* datalen > 0.
*/
u32 uninitialized_var(len);
- while (datalen > 0 && headcount < quota) {
+ while (datalen > 0 && bufcount < quota) {
if (unlikely(seg >= UIO_MAXIOV)) {
r = -ENOBUFS;
goto err;
}
- r = vhost_get_vq_desc(vq, vq->iov + seg,
- ARRAY_SIZE(vq->iov) - seg, &out,
- &in, log, log_num);
- if (unlikely(r < 0))
+ r = vhost_get_avail_buf(vq, bufs + bufcount, vq->iov + seg,
+ ARRAY_SIZE(vq->iov) - seg, &out,
+ &in, log, log_num);
+ if (unlikely(r <= 0))
goto err;
- d = r;
- if (d == vq->num) {
- r = 0;
- goto err;
- }
if (unlikely(out || in <= 0)) {
vq_err(vq, "unexpected descriptor format for RX: "
"out %d, in %d\n", out, in);
@@ -1069,14 +1080,12 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
nlogs += *log_num;
log += *log_num;
}
- heads[headcount].id = cpu_to_vhost32(vq, d);
len = iov_length(vq->iov + seg, in);
- heads[headcount].len = cpu_to_vhost32(vq, len);
datalen -= len;
- ++headcount;
+ ++bufcount;
seg += in;
}
- heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
+ bufs[bufcount - 1].in_len = len + datalen;
*iovcount = seg;
if (unlikely(log))
*log_num = nlogs;
@@ -1086,9 +1095,9 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
r = UIO_MAXIOV + 1;
goto err;
}
- return headcount;
+ return bufcount;
err:
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_avail_bufs(vq, bufs, bufcount);
return r;
}
@@ -1113,7 +1122,7 @@ static void handle_rx(struct vhost_net *net)
};
size_t total_len = 0;
int err, mergeable;
- s16 headcount;
+ int bufcount;
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
bool busyloop_intr = false;
@@ -1147,14 +1156,14 @@ static void handle_rx(struct vhost_net *net)
break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
- headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
- vhost_len, &in, vq_log, &log,
- likely(mergeable) ? UIO_MAXIOV : 1);
+ bufcount = get_rx_bufs(vq, nvq->bufs + nvq->done_idx,
+ vhost_len, &in, vq_log, &log,
+ likely(mergeable) ? UIO_MAXIOV : 1);
/* On error, stop handling until the next kick. */
- if (unlikely(headcount < 0))
+ if (unlikely(bufcount < 0))
goto out;
/* OK, now we need to know about added descriptors. */
- if (!headcount) {
+ if (!bufcount) {
if (unlikely(busyloop_intr)) {
vhost_poll_queue(&vq->poll);
} else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
@@ -1171,7 +1180,7 @@ static void handle_rx(struct vhost_net *net)
if (nvq->rx_ring)
msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
/* On overrun, truncate and discard */
- if (unlikely(headcount > UIO_MAXIOV)) {
+ if (unlikely(bufcount > UIO_MAXIOV)) {
iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
err = sock->ops->recvmsg(sock, &msg,
1, MSG_DONTWAIT | MSG_TRUNC);
@@ -1195,7 +1204,7 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(err != sock_len)) {
pr_debug("Discarded rx packet: "
" len %d, expected %zd\n", err, sock_len);
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_avail_bufs(vq, nvq->bufs + nvq->done_idx, bufcount);
continue;
}
/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
@@ -1214,15 +1223,15 @@ static void handle_rx(struct vhost_net *net)
}
/* TODO: Should check and handle checksum. */
- num_buffers = cpu_to_vhost16(vq, headcount);
+ num_buffers = cpu_to_vhost16(vq, bufcount);
if (likely(mergeable) &&
copy_to_iter(&num_buffers, sizeof num_buffers,
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
- vhost_discard_vq_desc(vq, headcount);
+ vhost_discard_avail_bufs(vq, nvq->bufs + nvq->done_idx, bufcount);
goto out;
}
- nvq->done_idx += headcount;
+ nvq->done_idx += bufcount;
if (nvq->done_idx > VHOST_NET_BATCH)
vhost_net_signal_used(nvq);
if (unlikely(vq_log))
@@ -1314,6 +1323,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
+ n->vqs[i].bufs = NULL;
n->vqs[i].ubufs = NULL;
n->vqs[i].ubuf_info = NULL;
n->vqs[i].upend_idx = 0;
--
MST
^ permalink raw reply related
* [PATCH RFC v5 07/13] vhost: format-independent API for used buffers
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Add a new API that doesn't assume used ring, heads, etc.
For now, we keep the old APIs around to make it easier
to convert drivers.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/vhost.c | 52 ++++++++++++++++++++++++++++++++++---------
drivers/vhost/vhost.h | 17 +++++++++++++-
2 files changed, 58 insertions(+), 11 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3ffcba4e27e9..72ee55c810c4 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2335,13 +2335,12 @@ static int fetch_descs(struct vhost_virtqueue *vq)
* number of output then some number of input descriptors, it's actually two
* iovecs, but we pack them into one and note how many of each there were.
*
- * This function returns the descriptor number found, or vq->num (which is
- * never a valid descriptor number) if none was found. A negative code is
- * returned on error. */
-int vhost_get_vq_desc(struct vhost_virtqueue *vq,
- struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num,
- struct vhost_log *log, unsigned int *log_num)
+ * This function returns a value > 0 if a descriptor was found, or 0 if none were found.
+ * A negative code is returned on error. */
+int vhost_get_avail_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf,
+ struct iovec iov[], unsigned int iov_size,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num)
{
int ret = fetch_descs(vq);
int i;
@@ -2354,6 +2353,8 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
*out_num = *in_num = 0;
if (unlikely(log))
*log_num = 0;
+ buf->in_len = buf->out_len = 0;
+ buf->descs = 0;
for (i = vq->first_desc; i < vq->ndescs; ++i) {
unsigned iov_count = *in_num + *out_num;
@@ -2383,6 +2384,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
/* If this is an input descriptor,
* increment that count. */
*in_num += ret;
+ buf->in_len += desc->len;
if (unlikely(log && ret)) {
log[*log_num].addr = desc->addr;
log[*log_num].len = desc->len;
@@ -2398,9 +2400,11 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
goto err;
}
*out_num += ret;
+ buf->out_len += desc->len;
}
- ret = desc->id;
+ buf->id = desc->id;
+ ++buf->descs;
if (!(desc->flags & VRING_DESC_F_NEXT))
break;
@@ -2408,7 +2412,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
vq->first_desc = i + 1;
- return ret;
+ return 1;
err:
for (i = vq->first_desc; i < vq->ndescs; ++i)
@@ -2418,7 +2422,15 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
return ret;
}
-EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
+EXPORT_SYMBOL_GPL(vhost_get_avail_buf);
+
+/* Reverse the effect of vhost_get_avail_buf. Useful for error handling. */
+void vhost_discard_avail_bufs(struct vhost_virtqueue *vq,
+ struct vhost_buf *buf, unsigned count)
+{
+ vhost_discard_vq_desc(vq, count);
+}
+EXPORT_SYMBOL_GPL(vhost_discard_avail_bufs);
static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
@@ -2502,6 +2514,26 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
}
EXPORT_SYMBOL_GPL(vhost_add_used);
+int vhost_put_used_buf(struct vhost_virtqueue *vq, struct vhost_buf *buf)
+{
+ return vhost_add_used(vq, buf->id, buf->in_len);
+}
+EXPORT_SYMBOL_GPL(vhost_put_used_buf);
+
+int vhost_put_used_n_bufs(struct vhost_virtqueue *vq,
+ struct vhost_buf *bufs, unsigned count)
+{
+ unsigned i;
+
+ for (i = 0; i < count; ++i) {
+ vq->heads[i].id = cpu_to_vhost32(vq, bufs[i].id);
+ vq->heads[i].len = cpu_to_vhost32(vq, bufs[i].in_len);
+ }
+
+ return vhost_add_used_n(vq, vq->heads, count);
+}
+EXPORT_SYMBOL_GPL(vhost_put_used_n_bufs);
+
static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
__u16 old, new;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index fed36af5c444..28eea0155efb 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -67,6 +67,13 @@ struct vhost_desc {
u16 id;
};
+struct vhost_buf {
+ u32 out_len;
+ u32 in_len;
+ u16 descs;
+ u16 id;
+};
+
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -195,7 +202,12 @@ int vhost_get_vq_desc(struct vhost_virtqueue *,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num);
void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
-
+int vhost_get_avail_buf(struct vhost_virtqueue *, struct vhost_buf *buf,
+ struct iovec iov[], unsigned int iov_count,
+ unsigned int *out_num, unsigned int *in_num,
+ struct vhost_log *log, unsigned int *log_num);
+void vhost_discard_avail_bufs(struct vhost_virtqueue *,
+ struct vhost_buf *, unsigned count);
int vhost_vq_init_access(struct vhost_virtqueue *);
int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
@@ -204,6 +216,9 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
unsigned int id, int len);
void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
struct vring_used_elem *heads, unsigned count);
+int vhost_put_used_buf(struct vhost_virtqueue *, struct vhost_buf *buf);
+int vhost_put_used_n_bufs(struct vhost_virtqueue *,
+ struct vhost_buf *bufs, unsigned count);
void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *);
--
MST
^ permalink raw reply related
* [PATCH RFC v5 06/13] vhost: reorder functions
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
Reorder functions in the file to not rely on forward
declarations, in preparation to making them static
down the road.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/vhost.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5075505cfe55..3ffcba4e27e9 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2299,6 +2299,13 @@ static int fetch_buf(struct vhost_virtqueue *vq)
return 1;
}
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
+{
+ vq->last_avail_idx -= n;
+}
+EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
+
/* This function returns a value > 0 if a descriptor was found, or 0 if none were found.
* A negative code is returned on error. */
static int fetch_descs(struct vhost_virtqueue *vq)
@@ -2413,26 +2420,6 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
}
EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
-/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
-void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
-{
- vq->last_avail_idx -= n;
-}
-EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
-
-/* After we've used one of their buffers, we tell them about it. We'll then
- * want to notify the guest, using eventfd. */
-int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
-{
- struct vring_used_elem heads = {
- cpu_to_vhost32(vq, head),
- cpu_to_vhost32(vq, len)
- };
-
- return vhost_add_used_n(vq, &heads, 1);
-}
-EXPORT_SYMBOL_GPL(vhost_add_used);
-
static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
unsigned count)
@@ -2502,6 +2489,19 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
}
EXPORT_SYMBOL_GPL(vhost_add_used_n);
+/* After we've used one of their buffers, we tell them about it. We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+ struct vring_used_elem heads = {
+ cpu_to_vhost32(vq, head),
+ cpu_to_vhost32(vq, len)
+ };
+
+ return vhost_add_used_n(vq, &heads, 1);
+}
+EXPORT_SYMBOL_GPL(vhost_add_used);
+
static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
__u16 old, new;
--
MST
^ permalink raw reply related
* [PATCH RFC v5 05/13] vhost/net: pass net specific struct pointer
From: Michael S. Tsirkin @ 2020-06-07 14:11 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, eperezma, kvm, virtualization
In-Reply-To: <20200607141057.704085-1-mst@redhat.com>
In preparation for further cleanup, pass net specific pointer
to ubuf callbacks so we can move net specific fields
out to net structures.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/vhost/net.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index bf5e1d81ae25..ff594eec8ae3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -94,7 +94,7 @@ struct vhost_net_ubuf_ref {
*/
atomic_t refcount;
wait_queue_head_t wait;
- struct vhost_virtqueue *vq;
+ struct vhost_net_virtqueue *nvq;
};
#define VHOST_NET_BATCH 64
@@ -231,7 +231,7 @@ static void vhost_net_enable_zcopy(int vq)
}
static struct vhost_net_ubuf_ref *
-vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
+vhost_net_ubuf_alloc(struct vhost_net_virtqueue *nvq, bool zcopy)
{
struct vhost_net_ubuf_ref *ubufs;
/* No zero copy backend? Nothing to count. */
@@ -242,7 +242,7 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
return ERR_PTR(-ENOMEM);
atomic_set(&ubufs->refcount, 1);
init_waitqueue_head(&ubufs->wait);
- ubufs->vq = vq;
+ ubufs->nvq = nvq;
return ubufs;
}
@@ -384,13 +384,13 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
{
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
- struct vhost_virtqueue *vq = ubufs->vq;
+ struct vhost_net_virtqueue *nvq = ubufs->nvq;
int cnt;
rcu_read_lock_bh();
/* set len to mark this desc buffers done DMA */
- vq->heads[ubuf->desc].len = success ?
+ nvq->vq.heads[ubuf->desc].in_len = success ?
VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
cnt = vhost_net_ubuf_put(ubufs);
@@ -402,7 +402,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
* less than 10% of times).
*/
if (cnt <= 1 || !(cnt % 16))
- vhost_poll_queue(&vq->poll);
+ vhost_poll_queue(&nvq->vq.poll);
rcu_read_unlock_bh();
}
@@ -1525,7 +1525,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
/* start polling new socket */
oldsock = vhost_vq_get_backend(vq);
if (sock != oldsock) {
- ubufs = vhost_net_ubuf_alloc(vq,
+ ubufs = vhost_net_ubuf_alloc(nvq,
sock && vhost_sock_zcopy(sock));
if (IS_ERR(ubufs)) {
r = PTR_ERR(ubufs);
--
MST
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox