qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors
@ 2023-08-11 14:34 Ilya Maximets
  2023-08-14 14:17 ` Stefan Hajnoczi
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Ilya Maximets @ 2023-08-11 14:34 UTC (permalink / raw)
  To: qemu-devel
  Cc: Jason Wang, Paolo Bonzini, Stefan Hajnoczi, Michael S. Tsirkin,
	Ilya Maximets

Lots of virtio functions that are on a hot path in data transmission
are initializing indirect descriptor cache at the point of stack
allocation.  It's a 112 byte structure that is getting zeroed out on
each call adding unnecessary overhead.  It's going to be correctly
initialized later via special init function.  The only reason to
actually initialize right away is the ability to safely destruct it.
Replacing a designated initializer with a function to only initialize
what is necessary.

Removal of the unnecessary stack initializations improves throughput
of virtio-net devices in terms of 64B packets per second by 6-14 %
depending on the case.  Tested with a proposed af-xdp network backend
and a dpdk testpmd application in the guest, but should be beneficial
for other virtio devices as well.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---

Version 2:

  * Introduced an initialization function, so we don't need to compare
    pointers in the end. [Stefan]
  * Removed the now unused macro. [Jason]

 hw/virtio/virtio.c    | 20 +++++++++++++++-----
 include/exec/memory.h | 16 +++++++++++++---
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 309038fd46..3d768fda5a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1071,10 +1071,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
     VirtIODevice *vdev = vq->vdev;
     unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
-    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    MemoryRegionCache indirect_desc_cache;
     int64_t len = 0;
     int rc;
 
+    address_space_cache_init_empty(&indirect_desc_cache);
+
     idx = vq->last_avail_idx;
     total_bufs = in_total = out_total = 0;
 
@@ -1207,12 +1209,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
     VirtIODevice *vdev = vq->vdev;
     unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
+    MemoryRegionCache indirect_desc_cache;
     MemoryRegionCache *desc_cache;
-    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
     int64_t len = 0;
     VRingPackedDesc desc;
     bool wrap_counter;
 
+    address_space_cache_init_empty(&indirect_desc_cache);
+
     idx = vq->last_avail_idx;
     wrap_counter = vq->last_avail_wrap_counter;
     total_bufs = in_total = out_total = 0;
@@ -1487,7 +1491,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
 {
     unsigned int i, head, max;
     VRingMemoryRegionCaches *caches;
-    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    MemoryRegionCache indirect_desc_cache;
     MemoryRegionCache *desc_cache;
     int64_t len;
     VirtIODevice *vdev = vq->vdev;
@@ -1498,6 +1502,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
     VRingDesc desc;
     int rc;
 
+    address_space_cache_init_empty(&indirect_desc_cache);
+
     RCU_READ_LOCK_GUARD();
     if (virtio_queue_empty_rcu(vq)) {
         goto done;
@@ -1624,7 +1630,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
 {
     unsigned int i, max;
     VRingMemoryRegionCaches *caches;
-    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+    MemoryRegionCache indirect_desc_cache;
     MemoryRegionCache *desc_cache;
     int64_t len;
     VirtIODevice *vdev = vq->vdev;
@@ -1636,6 +1642,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
     uint16_t id;
     int rc;
 
+    address_space_cache_init_empty(&indirect_desc_cache);
+
     RCU_READ_LOCK_GUARD();
     if (virtio_queue_packed_empty_rcu(vq)) {
         goto done;
@@ -3935,13 +3943,15 @@ VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
     } else {
         unsigned int head, i, max;
         VRingMemoryRegionCaches *caches;
-        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+        MemoryRegionCache indirect_desc_cache;
         MemoryRegionCache *desc_cache;
         VRingDesc desc;
         VirtioRingDescList *list = NULL;
         VirtioRingDescList *node;
         int rc; int ndescs;
 
+        address_space_cache_init_empty(&indirect_desc_cache);
+
         RCU_READ_LOCK_GUARD();
 
         max = vq->vring.num;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 68284428f8..b1c4329d11 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2664,9 +2664,6 @@ struct MemoryRegionCache {
     bool is_write;
 };
 
-#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL })
-
-
 /* address_space_ld*_cached: load from a cached #MemoryRegion
  * address_space_st*_cached: store into a cached #MemoryRegion
  *
@@ -2755,6 +2752,19 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
                                  hwaddr len,
                                  bool is_write);
 
+/**
+ * address_space_cache_init_empty: Initialize empty #MemoryRegionCache
+ *
+ * @cache: The #MemoryRegionCache to operate on.
+ *
+ * Initializes #MemoryRegionCache structure without memory region attached.
+ * Cache initialized this way can only be safely destroyed, but not used.
+ */
+static inline void address_space_cache_init_empty(MemoryRegionCache *cache)
+{
+    cache->mrs.mr = NULL;
+}
+
 /**
  * address_space_cache_invalidate: complete a write to a #MemoryRegionCache
  *
-- 
2.40.1



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors
  2023-08-11 14:34 [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors Ilya Maximets
@ 2023-08-14 14:17 ` Stefan Hajnoczi
  2023-08-15  1:39 ` Jason Wang
  2023-09-25 11:19 ` Ilya Maximets
  2 siblings, 0 replies; 5+ messages in thread
From: Stefan Hajnoczi @ 2023-08-14 14:17 UTC (permalink / raw)
  To: Ilya Maximets; +Cc: qemu-devel, Jason Wang, Paolo Bonzini, Michael S. Tsirkin

[-- Attachment #1: Type: text/plain, Size: 1364 bytes --]

On Fri, Aug 11, 2023 at 04:34:23PM +0200, Ilya Maximets wrote:
> Lots of virtio functions that are on a hot path in data transmission
> are initializing indirect descriptor cache at the point of stack
> allocation.  It's a 112 byte structure that is getting zeroed out on
> each call adding unnecessary overhead.  It's going to be correctly
> initialized later via special init function.  The only reason to
> actually initialize right away is the ability to safely destruct it.
> Replacing a designated initializer with a function to only initialize
> what is necessary.
> 
> Removal of the unnecessary stack initializations improves throughput
> of virtio-net devices in terms of 64B packets per second by 6-14 %
> depending on the case.  Tested with a proposed af-xdp network backend
> and a dpdk testpmd application in the guest, but should be beneficial
> for other virtio devices as well.
> 
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
> 
> Version 2:
> 
>   * Introduced an initialization function, so we don't need to compare
>     pointers in the end. [Stefan]
>   * Removed the now unused macro. [Jason]
> 
>  hw/virtio/virtio.c    | 20 +++++++++++++++-----
>  include/exec/memory.h | 16 +++++++++++++---
>  2 files changed, 28 insertions(+), 8 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors
  2023-08-11 14:34 [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors Ilya Maximets
  2023-08-14 14:17 ` Stefan Hajnoczi
@ 2023-08-15  1:39 ` Jason Wang
  2023-09-25 11:19 ` Ilya Maximets
  2 siblings, 0 replies; 5+ messages in thread
From: Jason Wang @ 2023-08-15  1:39 UTC (permalink / raw)
  To: Ilya Maximets
  Cc: qemu-devel, Paolo Bonzini, Stefan Hajnoczi, Michael S. Tsirkin

On Fri, Aug 11, 2023 at 10:33 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> Lots of virtio functions that are on a hot path in data transmission
> are initializing indirect descriptor cache at the point of stack
> allocation.  It's a 112 byte structure that is getting zeroed out on
> each call adding unnecessary overhead.  It's going to be correctly
> initialized later via special init function.  The only reason to
> actually initialize right away is the ability to safely destruct it.
> Replacing a designated initializer with a function to only initialize
> what is necessary.
>
> Removal of the unnecessary stack initializations improves throughput
> of virtio-net devices in terms of 64B packets per second by 6-14 %
> depending on the case.  Tested with a proposed af-xdp network backend
> and a dpdk testpmd application in the guest, but should be beneficial
> for other virtio devices as well.
>
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks

> ---
>
> Version 2:
>
>   * Introduced an initialization function, so we don't need to compare
>     pointers in the end. [Stefan]
>   * Removed the now unused macro. [Jason]
>
>  hw/virtio/virtio.c    | 20 +++++++++++++++-----
>  include/exec/memory.h | 16 +++++++++++++---
>  2 files changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 309038fd46..3d768fda5a 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -1071,10 +1071,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
>      VirtIODevice *vdev = vq->vdev;
>      unsigned int idx;
>      unsigned int total_bufs, in_total, out_total;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    MemoryRegionCache indirect_desc_cache;
>      int64_t len = 0;
>      int rc;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      idx = vq->last_avail_idx;
>      total_bufs = in_total = out_total = 0;
>
> @@ -1207,12 +1209,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
>      VirtIODevice *vdev = vq->vdev;
>      unsigned int idx;
>      unsigned int total_bufs, in_total, out_total;
> +    MemoryRegionCache indirect_desc_cache;
>      MemoryRegionCache *desc_cache;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
>      int64_t len = 0;
>      VRingPackedDesc desc;
>      bool wrap_counter;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      idx = vq->last_avail_idx;
>      wrap_counter = vq->last_avail_wrap_counter;
>      total_bufs = in_total = out_total = 0;
> @@ -1487,7 +1491,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
>  {
>      unsigned int i, head, max;
>      VRingMemoryRegionCaches *caches;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    MemoryRegionCache indirect_desc_cache;
>      MemoryRegionCache *desc_cache;
>      int64_t len;
>      VirtIODevice *vdev = vq->vdev;
> @@ -1498,6 +1502,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
>      VRingDesc desc;
>      int rc;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      RCU_READ_LOCK_GUARD();
>      if (virtio_queue_empty_rcu(vq)) {
>          goto done;
> @@ -1624,7 +1630,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
>  {
>      unsigned int i, max;
>      VRingMemoryRegionCaches *caches;
> -    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +    MemoryRegionCache indirect_desc_cache;
>      MemoryRegionCache *desc_cache;
>      int64_t len;
>      VirtIODevice *vdev = vq->vdev;
> @@ -1636,6 +1642,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
>      uint16_t id;
>      int rc;
>
> +    address_space_cache_init_empty(&indirect_desc_cache);
> +
>      RCU_READ_LOCK_GUARD();
>      if (virtio_queue_packed_empty_rcu(vq)) {
>          goto done;
> @@ -3935,13 +3943,15 @@ VirtioQueueElement *qmp_x_query_virtio_queue_element(const char *path,
>      } else {
>          unsigned int head, i, max;
>          VRingMemoryRegionCaches *caches;
> -        MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
> +        MemoryRegionCache indirect_desc_cache;
>          MemoryRegionCache *desc_cache;
>          VRingDesc desc;
>          VirtioRingDescList *list = NULL;
>          VirtioRingDescList *node;
>          int rc; int ndescs;
>
> +        address_space_cache_init_empty(&indirect_desc_cache);
> +
>          RCU_READ_LOCK_GUARD();
>
>          max = vq->vring.num;
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 68284428f8..b1c4329d11 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -2664,9 +2664,6 @@ struct MemoryRegionCache {
>      bool is_write;
>  };
>
> -#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mrs.mr = NULL })
> -
> -
>  /* address_space_ld*_cached: load from a cached #MemoryRegion
>   * address_space_st*_cached: store into a cached #MemoryRegion
>   *
> @@ -2755,6 +2752,19 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
>                                   hwaddr len,
>                                   bool is_write);
>
> +/**
> + * address_space_cache_init_empty: Initialize empty #MemoryRegionCache
> + *
> + * @cache: The #MemoryRegionCache to operate on.
> + *
> + * Initializes #MemoryRegionCache structure without memory region attached.
> + * Cache initialized this way can only be safely destroyed, but not used.
> + */
> +static inline void address_space_cache_init_empty(MemoryRegionCache *cache)
> +{
> +    cache->mrs.mr = NULL;
> +}
> +
>  /**
>   * address_space_cache_invalidate: complete a write to a #MemoryRegionCache
>   *
> --
> 2.40.1
>



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors
  2023-08-11 14:34 [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors Ilya Maximets
  2023-08-14 14:17 ` Stefan Hajnoczi
  2023-08-15  1:39 ` Jason Wang
@ 2023-09-25 11:19 ` Ilya Maximets
  2023-09-25 14:17   ` Stefan Hajnoczi
  2 siblings, 1 reply; 5+ messages in thread
From: Ilya Maximets @ 2023-09-25 11:19 UTC (permalink / raw)
  To: qemu-devel
  Cc: i.maximets, Jason Wang, Paolo Bonzini, Stefan Hajnoczi,
	Michael S. Tsirkin

On 8/11/23 16:34, Ilya Maximets wrote:
> Lots of virtio functions that are on a hot path in data transmission
> are initializing indirect descriptor cache at the point of stack
> allocation.  It's a 112 byte structure that is getting zeroed out on
> each call adding unnecessary overhead.  It's going to be correctly
> initialized later via special init function.  The only reason to
> actually initialize right away is the ability to safely destruct it.
> Replacing a designated initializer with a function to only initialize
> what is necessary.
> 
> Removal of the unnecessary stack initializations improves throughput
> of virtio-net devices in terms of 64B packets per second by 6-14 %
> depending on the case.  Tested with a proposed af-xdp network backend
> and a dpdk testpmd application in the guest, but should be beneficial
> for other virtio devices as well.
> 
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
> 
> Version 2:
> 
>   * Introduced an initialization function, so we don't need to compare
>     pointers in the end. [Stefan]
>   * Removed the now unused macro. [Jason]
> 
>  hw/virtio/virtio.c    | 20 +++++++++++++++-----
>  include/exec/memory.h | 16 +++++++++++++---
>  2 files changed, 28 insertions(+), 8 deletions(-)

Kind reminder.

This patch was posted quite some time ago and it has 2 reviewed/acked-by tags.
Just making sure it didn't fall through the cracks.

Best regards, Ilya Maximets.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors
  2023-09-25 11:19 ` Ilya Maximets
@ 2023-09-25 14:17   ` Stefan Hajnoczi
  0 siblings, 0 replies; 5+ messages in thread
From: Stefan Hajnoczi @ 2023-09-25 14:17 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: qemu-devel, Jason Wang, Paolo Bonzini, Stefan Hajnoczi,
	Ilya Maximets

On Mon, 25 Sept 2023 at 07:18, Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 8/11/23 16:34, Ilya Maximets wrote:
> > Lots of virtio functions that are on a hot path in data transmission
> > are initializing indirect descriptor cache at the point of stack
> > allocation.  It's a 112 byte structure that is getting zeroed out on
> > each call adding unnecessary overhead.  It's going to be correctly
> > initialized later via special init function.  The only reason to
> > actually initialize right away is the ability to safely destruct it.
> > Replacing a designated initializer with a function to only initialize
> > what is necessary.
> >
> > Removal of the unnecessary stack initializations improves throughput
> > of virtio-net devices in terms of 64B packets per second by 6-14 %
> > depending on the case.  Tested with a proposed af-xdp network backend
> > and a dpdk testpmd application in the guest, but should be beneficial
> > for other virtio devices as well.
> >
> > Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> > ---
> >
> > Version 2:
> >
> >   * Introduced an initialization function, so we don't need to compare
> >     pointers in the end. [Stefan]
> >   * Removed the now unused macro. [Jason]
> >
> >  hw/virtio/virtio.c    | 20 +++++++++++++++-----
> >  include/exec/memory.h | 16 +++++++++++++---
> >  2 files changed, 28 insertions(+), 8 deletions(-)
>
> Kind reminder.
>
> This patch was posted quite some time ago and it has 2 reviewed/acked-by tags.
> Just making sure it didn't fall through the cracks.

This should go through Michael's tree.

Stefan

>
> Best regards, Ilya Maximets.
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-09-25 14:18 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-11 14:34 [PATCH v2] virtio: don't zero out memory region cache for indirect descriptors Ilya Maximets
2023-08-14 14:17 ` Stefan Hajnoczi
2023-08-15  1:39 ` Jason Wang
2023-09-25 11:19 ` Ilya Maximets
2023-09-25 14:17   ` Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).