qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] libvhost-user: lower dependency on QEMU headers
@ 2020-11-18  8:09 marcandre.lureau
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: marcandre.lureau @ 2020-11-18  8:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marc-André Lureau, armbru, stefanha, dgilbert

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Hi,

libvhost-user is meant to be free of glib dependency, and easily
copyable/reusable outside of QEMU. Clean-up some dependencies that crept in
recently (the one remaining is qemu/atomic.h, from which a subset is used)

Marc-André Lureau (2):
  libvhost-user: replace qemu/bswap.h with glibc endian.h
  libvhost-user: replace qemu/memfd.h usage

 contrib/libvhost-user/libvhost-user.c | 127 +++++++++++++++++---------
 1 file changed, 83 insertions(+), 44 deletions(-)

-- 
2.29.0




^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h
  2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
@ 2020-11-18  8:09 ` marcandre.lureau
  2020-11-24 11:31   ` Dr. David Alan Gilbert
  2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
  2020-11-18  8:58 ` [PATCH 0/2] libvhost-user: lower dependency on QEMU headers Stefan Hajnoczi
  2 siblings, 1 reply; 9+ messages in thread
From: marcandre.lureau @ 2020-11-18  8:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marc-André Lureau, armbru, stefanha, dgilbert

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 contrib/libvhost-user/libvhost-user.c | 77 ++++++++++++++-------------
 1 file changed, 40 insertions(+), 37 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index 5c73ffdd6b..1c1cfbf1e7 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -26,6 +26,7 @@
 #include <sys/socket.h>
 #include <sys/eventfd.h>
 #include <sys/mman.h>
+#include <endian.h>
 #include "qemu/compiler.h"
 
 #if defined(__linux__)
@@ -42,7 +43,6 @@
 
 #include "qemu/atomic.h"
 #include "qemu/osdep.h"
-#include "qemu/bswap.h"
 #include "qemu/memfd.h"
 
 #include "libvhost-user.h"
@@ -1081,7 +1081,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
         return false;
     }
 
-    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
+    vq->used_idx = le16toh(vq->vring.used->idx);
 
     if (vq->last_avail_idx != vq->used_idx) {
         bool resume = dev->iface->queue_is_processed_in_order &&
@@ -1198,7 +1198,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
         return 0;
     }
 
-    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
+    vq->used_idx = le16toh(vq->vring.used->idx);
     vq->resubmit_num = 0;
     vq->resubmit_list = NULL;
     vq->counter = 0;
@@ -2031,13 +2031,13 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq)
 static inline uint16_t
 vring_avail_flags(VuVirtq *vq)
 {
-    return lduw_le_p(&vq->vring.avail->flags);
+    return le16toh(vq->vring.avail->flags);
 }
 
 static inline uint16_t
 vring_avail_idx(VuVirtq *vq)
 {
-    vq->shadow_avail_idx = lduw_le_p(&vq->vring.avail->idx);
+    vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
 
     return vq->shadow_avail_idx;
 }
@@ -2045,7 +2045,7 @@ vring_avail_idx(VuVirtq *vq)
 static inline uint16_t
 vring_avail_ring(VuVirtq *vq, int i)
 {
-    return lduw_le_p(&vq->vring.avail->ring[i]);
+    return le16toh(vq->vring.avail->ring[i]);
 }
 
 static inline uint16_t
@@ -2133,12 +2133,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
                          int i, unsigned int max, unsigned int *next)
 {
     /* If this descriptor says it doesn't chain, we're done. */
-    if (!(lduw_le_p(&desc[i].flags) & VRING_DESC_F_NEXT)) {
+    if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
         return VIRTQUEUE_READ_DESC_DONE;
     }
 
     /* Check they're not leading us off end of descriptors. */
-    *next = lduw_le_p(&desc[i].next);
+    *next = le16toh(desc[i].next);
     /* Make sure compiler knows to grab that: we don't want it changing! */
     smp_wmb();
 
@@ -2181,8 +2181,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
         }
         desc = vq->vring.desc;
 
-        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
-            if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
+        if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+            if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
                 vu_panic(dev, "Invalid size for indirect buffer table");
                 goto err;
             }
@@ -2195,8 +2195,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
 
             /* loop over the indirect descriptor table */
             indirect = 1;
-            desc_addr = ldq_le_p(&desc[i].addr);
-            desc_len = ldl_le_p(&desc[i].len);
+            desc_addr = le64toh(desc[i].addr);
+            desc_len = le32toh(desc[i].len);
             max = desc_len / sizeof(struct vring_desc);
             read_len = desc_len;
             desc = vu_gpa_to_va(dev, &read_len, desc_addr);
@@ -2223,10 +2223,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
                 goto err;
             }
 
-            if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
-                in_total += ldl_le_p(&desc[i].len);
+            if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
+                in_total += le32toh(desc[i].len);
             } else {
-                out_total += ldl_le_p(&desc[i].len);
+                out_total += le32toh(desc[i].len);
             }
             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                 goto done;
@@ -2377,7 +2377,7 @@ vring_used_flags_set_bit(VuVirtq *vq, int mask)
 
     flags = (uint16_t *)((char*)vq->vring.used +
                          offsetof(struct vring_used, flags));
-    stw_le_p(flags, lduw_le_p(flags) | mask);
+    *flags = htole16(le16toh(*flags) | mask);
 }
 
 static inline void
@@ -2387,17 +2387,20 @@ vring_used_flags_unset_bit(VuVirtq *vq, int mask)
 
     flags = (uint16_t *)((char*)vq->vring.used +
                          offsetof(struct vring_used, flags));
-    stw_le_p(flags, lduw_le_p(flags) & ~mask);
+    *flags = htole16(le16toh(*flags) & ~mask);
 }
 
 static inline void
 vring_set_avail_event(VuVirtq *vq, uint16_t val)
 {
+    uint16_t *avail;
+
     if (!vq->notification) {
         return;
     }
 
-    stw_le_p(&vq->vring.used->ring[vq->vring.num], val);
+    avail = (uint16_t *)&vq->vring.used->ring[vq->vring.num];
+    *avail = htole16(val);
 }
 
 void
@@ -2487,15 +2490,15 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
     int rc;
 
-    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
-        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
+    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
             vu_panic(dev, "Invalid size for indirect buffer table");
             return NULL;
         }
 
         /* loop over the indirect descriptor table */
-        desc_addr = ldq_le_p(&desc[i].addr);
-        desc_len = ldl_le_p(&desc[i].len);
+        desc_addr = le64toh(desc[i].addr);
+        desc_len = le32toh(desc[i].len);
         max = desc_len / sizeof(struct vring_desc);
         read_len = desc_len;
         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
@@ -2517,11 +2520,11 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
 
     /* Collect all the descriptors */
     do {
-        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
+        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
             if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
                                VIRTQUEUE_MAX_SIZE - out_num, true,
-                               ldq_le_p(&desc[i].addr),
-                               ldl_le_p(&desc[i].len))) {
+                               le64toh(desc[i].addr),
+                               le32toh(desc[i].len))) {
                 return NULL;
             }
         } else {
@@ -2531,8 +2534,8 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
             }
             if (!virtqueue_map_desc(dev, &out_num, iov,
                                VIRTQUEUE_MAX_SIZE, false,
-                               ldq_le_p(&desc[i].addr),
-                               ldl_le_p(&desc[i].len))) {
+                               le64toh(desc[i].addr),
+                               le32toh(desc[i].len))) {
                 return NULL;
             }
         }
@@ -2731,15 +2734,15 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
     max = vq->vring.num;
     i = elem->index;
 
-    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
-        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
+    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
             vu_panic(dev, "Invalid size for indirect buffer table");
             return;
         }
 
         /* loop over the indirect descriptor table */
-        desc_addr = ldq_le_p(&desc[i].addr);
-        desc_len = ldl_le_p(&desc[i].len);
+        desc_addr = le64toh(desc[i].addr);
+        desc_len = le32toh(desc[i].len);
         max = desc_len / sizeof(struct vring_desc);
         read_len = desc_len;
         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
@@ -2765,9 +2768,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
             return;
         }
 
-        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
-            min = MIN(ldl_le_p(&desc[i].len), len);
-            vu_log_write(dev, ldq_le_p(&desc[i].addr), min);
+        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
+            min = MIN(le32toh(desc[i].len), len);
+            vu_log_write(dev, le64toh(desc[i].addr), min);
             len -= min;
         }
 
@@ -2792,15 +2795,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
 
     idx = (idx + vq->used_idx) % vq->vring.num;
 
-    stl_le_p(&uelem.id, elem->index);
-    stl_le_p(&uelem.len, len);
+    uelem.id = htole32(elem->index);
+    uelem.len = htole32(len);
     vring_used_write(dev, vq, &uelem, idx);
 }
 
 static inline
 void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
 {
-    stw_le_p(&vq->vring.used->idx, val);
+    vq->vring.used->idx = htole16(val);
     vu_log_write(dev,
                  vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
                  sizeof(vq->vring.used->idx));
-- 
2.29.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
@ 2020-11-18  8:09 ` marcandre.lureau
  2020-11-24 11:54   ` Dr. David Alan Gilbert
  2020-11-18  8:58 ` [PATCH 0/2] libvhost-user: lower dependency on QEMU headers Stefan Hajnoczi
  2 siblings, 1 reply; 9+ messages in thread
From: marcandre.lureau @ 2020-11-18  8:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marc-André Lureau, armbru, stefanha, dgilbert

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
inflight I/O in shared memory") which introduced glib dependency through
osdep.h inclusion.

libvhost-user.c tries to stay free from glib usage.

Use glibc memfd_create directly when available (assumed so when
MFD_ALLOW_SEALING is defined).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index 1c1cfbf1e7..805521859d 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -42,8 +42,6 @@
 #endif
 
 #include "qemu/atomic.h"
-#include "qemu/osdep.h"
-#include "qemu/memfd.h"
 
 #include "libvhost-user.h"
 
@@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
            sizeof(uint16_t), INFLIGHT_ALIGNMENT);
 }
 
+#ifdef MFD_ALLOW_SEALING
+static void *
+memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
+{
+    void *ptr;
+    int ret;
+
+    *fd = memfd_create(name, MFD_ALLOW_SEALING);
+    if (*fd < 0) {
+        return NULL;
+    }
+
+    ret = ftruncate(*fd, size);
+    if (ret < 0) {
+        close(*fd);
+        return NULL;
+    }
+
+    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
+    if (ret < 0) {
+        close(*fd);
+        return NULL;
+    }
+
+    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
+    if (ptr == MAP_FAILED) {
+        close(*fd);
+        return NULL;
+    }
+
+    return ptr;
+}
+#endif
+
 static bool
 vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
 {
-    int fd;
-    void *addr;
+    int fd = -1;
+    void *addr = NULL;
     uint64_t mmap_size;
     uint16_t num_queues, queue_size;
 
@@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
 
     mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
 
-    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
-                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
-                            &fd, NULL);
+#ifdef MFD_ALLOW_SEALING
+    addr = memfd_alloc("vhost-inflight", mmap_size,
+                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+                       &fd);
+#else
+    vu_panic(dev, "Not implemented: memfd support is missing");
+#endif
 
     if (!addr) {
         vu_panic(dev, "Failed to alloc vhost inflight area");
-- 
2.29.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/2] libvhost-user: lower dependency on QEMU headers
  2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
  2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
@ 2020-11-18  8:58 ` Stefan Hajnoczi
  2 siblings, 0 replies; 9+ messages in thread
From: Stefan Hajnoczi @ 2020-11-18  8:58 UTC (permalink / raw)
  To: marcandre.lureau; +Cc: qemu-devel, dgilbert, armbru

[-- Attachment #1: Type: text/plain, Size: 937 bytes --]

On Wed, Nov 18, 2020 at 12:09:00PM +0400, marcandre.lureau@redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Hi,
> 
> libvhost-user is meant to be free of glib dependency, and easily
> copyable/reusable outside of QEMU. Clean-up some dependencies that crept in
> recently (the one remaining is qemu/atomic.h, from which a subset is used)
> 
> Marc-André Lureau (2):
>   libvhost-user: replace qemu/bswap.h with glibc endian.h
>   libvhost-user: replace qemu/memfd.h usage
> 
>  contrib/libvhost-user/libvhost-user.c | 127 +++++++++++++++++---------
>  1 file changed, 83 insertions(+), 44 deletions(-)

Let's find a way to enforce this. How about a libvhost-user-only build
in the CI system without glib2-devel? Also maybe a way to poison QEMU
headers? It might be simpler to move contrib/libvhost-user to a separate
git repo though.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
@ 2020-11-24 11:31   ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 9+ messages in thread
From: Dr. David Alan Gilbert @ 2020-11-24 11:31 UTC (permalink / raw)
  To: marcandre.lureau; +Cc: qemu-devel, stefanha, armbru

* marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
>  contrib/libvhost-user/libvhost-user.c | 77 ++++++++++++++-------------
>  1 file changed, 40 insertions(+), 37 deletions(-)
> 
> diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> index 5c73ffdd6b..1c1cfbf1e7 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -26,6 +26,7 @@
>  #include <sys/socket.h>
>  #include <sys/eventfd.h>
>  #include <sys/mman.h>
> +#include <endian.h>
>  #include "qemu/compiler.h"
>  
>  #if defined(__linux__)
> @@ -42,7 +43,6 @@
>  
>  #include "qemu/atomic.h"
>  #include "qemu/osdep.h"
> -#include "qemu/bswap.h"
>  #include "qemu/memfd.h"
>  
>  #include "libvhost-user.h"
> @@ -1081,7 +1081,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
>          return false;
>      }
>  
> -    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
> +    vq->used_idx = le16toh(vq->vring.used->idx);
>  
>      if (vq->last_avail_idx != vq->used_idx) {
>          bool resume = dev->iface->queue_is_processed_in_order &&
> @@ -1198,7 +1198,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
>          return 0;
>      }
>  
> -    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
> +    vq->used_idx = le16toh(vq->vring.used->idx);
>      vq->resubmit_num = 0;
>      vq->resubmit_list = NULL;
>      vq->counter = 0;
> @@ -2031,13 +2031,13 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq)
>  static inline uint16_t
>  vring_avail_flags(VuVirtq *vq)
>  {
> -    return lduw_le_p(&vq->vring.avail->flags);
> +    return le16toh(vq->vring.avail->flags);
>  }
>  
>  static inline uint16_t
>  vring_avail_idx(VuVirtq *vq)
>  {
> -    vq->shadow_avail_idx = lduw_le_p(&vq->vring.avail->idx);
> +    vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
>  
>      return vq->shadow_avail_idx;
>  }
> @@ -2045,7 +2045,7 @@ vring_avail_idx(VuVirtq *vq)
>  static inline uint16_t
>  vring_avail_ring(VuVirtq *vq, int i)
>  {
> -    return lduw_le_p(&vq->vring.avail->ring[i]);
> +    return le16toh(vq->vring.avail->ring[i]);
>  }
>  
>  static inline uint16_t
> @@ -2133,12 +2133,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
>                           int i, unsigned int max, unsigned int *next)
>  {
>      /* If this descriptor says it doesn't chain, we're done. */
> -    if (!(lduw_le_p(&desc[i].flags) & VRING_DESC_F_NEXT)) {
> +    if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
>          return VIRTQUEUE_READ_DESC_DONE;
>      }
>  
>      /* Check they're not leading us off end of descriptors. */
> -    *next = lduw_le_p(&desc[i].next);
> +    *next = le16toh(desc[i].next);
>      /* Make sure compiler knows to grab that: we don't want it changing! */
>      smp_wmb();
>  
> @@ -2181,8 +2181,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>          }
>          desc = vq->vring.desc;
>  
> -        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
> -            if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
> +        if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> +            if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
>                  vu_panic(dev, "Invalid size for indirect buffer table");
>                  goto err;
>              }
> @@ -2195,8 +2195,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>  
>              /* loop over the indirect descriptor table */
>              indirect = 1;
> -            desc_addr = ldq_le_p(&desc[i].addr);
> -            desc_len = ldl_le_p(&desc[i].len);
> +            desc_addr = le64toh(desc[i].addr);
> +            desc_len = le32toh(desc[i].len);
>              max = desc_len / sizeof(struct vring_desc);
>              read_len = desc_len;
>              desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2223,10 +2223,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>                  goto err;
>              }
>  
> -            if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
> -                in_total += ldl_le_p(&desc[i].len);
> +            if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
> +                in_total += le32toh(desc[i].len);
>              } else {
> -                out_total += ldl_le_p(&desc[i].len);
> +                out_total += le32toh(desc[i].len);
>              }
>              if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
>                  goto done;
> @@ -2377,7 +2377,7 @@ vring_used_flags_set_bit(VuVirtq *vq, int mask)
>  
>      flags = (uint16_t *)((char*)vq->vring.used +
>                           offsetof(struct vring_used, flags));
> -    stw_le_p(flags, lduw_le_p(flags) | mask);
> +    *flags = htole16(le16toh(*flags) | mask);
>  }
>  
>  static inline void
> @@ -2387,17 +2387,20 @@ vring_used_flags_unset_bit(VuVirtq *vq, int mask)
>  
>      flags = (uint16_t *)((char*)vq->vring.used +
>                           offsetof(struct vring_used, flags));
> -    stw_le_p(flags, lduw_le_p(flags) & ~mask);
> +    *flags = htole16(le16toh(*flags) & ~mask);
>  }
>  
>  static inline void
>  vring_set_avail_event(VuVirtq *vq, uint16_t val)
>  {
> +    uint16_t *avail;
> +
>      if (!vq->notification) {
>          return;
>      }
>  
> -    stw_le_p(&vq->vring.used->ring[vq->vring.num], val);
> +    avail = (uint16_t *)&vq->vring.used->ring[vq->vring.num];
> +    *avail = htole16(val);
>  }
>  
>  void
> @@ -2487,15 +2490,15 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>      struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
>      int rc;
>  
> -    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
> -        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
> +    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> +        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
>              vu_panic(dev, "Invalid size for indirect buffer table");
>              return NULL;
>          }
>  
>          /* loop over the indirect descriptor table */
> -        desc_addr = ldq_le_p(&desc[i].addr);
> -        desc_len = ldl_le_p(&desc[i].len);
> +        desc_addr = le64toh(desc[i].addr);
> +        desc_len = le32toh(desc[i].len);
>          max = desc_len / sizeof(struct vring_desc);
>          read_len = desc_len;
>          desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2517,11 +2520,11 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>  
>      /* Collect all the descriptors */
>      do {
> -        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
> +        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
>              if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
>                                 VIRTQUEUE_MAX_SIZE - out_num, true,
> -                               ldq_le_p(&desc[i].addr),
> -                               ldl_le_p(&desc[i].len))) {
> +                               le64toh(desc[i].addr),
> +                               le32toh(desc[i].len))) {
>                  return NULL;
>              }
>          } else {
> @@ -2531,8 +2534,8 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>              }
>              if (!virtqueue_map_desc(dev, &out_num, iov,
>                                 VIRTQUEUE_MAX_SIZE, false,
> -                               ldq_le_p(&desc[i].addr),
> -                               ldl_le_p(&desc[i].len))) {
> +                               le64toh(desc[i].addr),
> +                               le32toh(desc[i].len))) {
>                  return NULL;
>              }
>          }
> @@ -2731,15 +2734,15 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
>      max = vq->vring.num;
>      i = elem->index;
>  
> -    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
> -        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
> +    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> +        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
>              vu_panic(dev, "Invalid size for indirect buffer table");
>              return;
>          }
>  
>          /* loop over the indirect descriptor table */
> -        desc_addr = ldq_le_p(&desc[i].addr);
> -        desc_len = ldl_le_p(&desc[i].len);
> +        desc_addr = le64toh(desc[i].addr);
> +        desc_len = le32toh(desc[i].len);
>          max = desc_len / sizeof(struct vring_desc);
>          read_len = desc_len;
>          desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2765,9 +2768,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
>              return;
>          }
>  
> -        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
> -            min = MIN(ldl_le_p(&desc[i].len), len);
> -            vu_log_write(dev, ldq_le_p(&desc[i].addr), min);
> +        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
> +            min = MIN(le32toh(desc[i].len), len);
> +            vu_log_write(dev, le64toh(desc[i].addr), min);
>              len -= min;
>          }
>  
> @@ -2792,15 +2795,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
>  
>      idx = (idx + vq->used_idx) % vq->vring.num;
>  
> -    stl_le_p(&uelem.id, elem->index);
> -    stl_le_p(&uelem.len, len);
> +    uelem.id = htole32(elem->index);
> +    uelem.len = htole32(len);
>      vring_used_write(dev, vq, &uelem, idx);
>  }
>  
>  static inline
>  void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
>  {
> -    stw_le_p(&vq->vring.used->idx, val);
> +    vq->vring.used->idx = htole16(val);
>      vu_log_write(dev,
>                   vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
>                   sizeof(vq->vring.used->idx));
> -- 
> 2.29.0
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
@ 2020-11-24 11:54   ` Dr. David Alan Gilbert
  2020-11-24 13:32     ` Marc-André Lureau
  0 siblings, 1 reply; 9+ messages in thread
From: Dr. David Alan Gilbert @ 2020-11-24 11:54 UTC (permalink / raw)
  To: marcandre.lureau; +Cc: qemu-devel, stefanha, armbru

* marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> inflight I/O in shared memory") which introduced glib dependency through
> osdep.h inclusion.
> 
> libvhost-user.c tries to stay free from glib usage.
> 
> Use glibc memfd_create directly when available (assumed so when
> MFD_ALLOW_SEALING is defined).
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
>  1 file changed, 43 insertions(+), 7 deletions(-)
> 
> diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> index 1c1cfbf1e7..805521859d 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -42,8 +42,6 @@
>  #endif
>  
>  #include "qemu/atomic.h"
> -#include "qemu/osdep.h"
> -#include "qemu/memfd.h"
>  
>  #include "libvhost-user.h"
>  
> @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
>             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
>  }
>  
> +#ifdef MFD_ALLOW_SEALING
> +static void *
> +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> +{
> +    void *ptr;
> +    int ret;
> +
> +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> +    if (*fd < 0) {
> +        return NULL;
> +    }
> +
> +    ret = ftruncate(*fd, size);

Do you need to do any of the page alignment?

> +    if (ret < 0) {
> +        close(*fd);
> +        return NULL;
> +    }
> +
> +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);

I think you'd intended to use the 'flags' parameter there.

> +    if (ret < 0) {
> +        close(*fd);
> +        return NULL;
> +    }
> +
> +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> +    if (ptr == MAP_FAILED) {
> +        close(*fd);
> +        return NULL;
> +    }
> +
> +    return ptr;
> +}
> +#endif
> +
>  static bool
>  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
>  {
> -    int fd;
> -    void *addr;
> +    int fd = -1;
> +    void *addr = NULL;
>      uint64_t mmap_size;
>      uint16_t num_queues, queue_size;
>  
> @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
>  
>      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
>  
> -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> -                            &fd, NULL);
> +#ifdef MFD_ALLOW_SEALING
> +    addr = memfd_alloc("vhost-inflight", mmap_size,
> +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> +                       &fd);
> +#else
> +    vu_panic(dev, "Not implemented: memfd support is missing");

Should there be an ifdef somewhere on the declared features, so it
doesn't get this far because it wont negotiate the feature?

Dave

> +#endif
>  
>      if (!addr) {
>          vu_panic(dev, "Failed to alloc vhost inflight area");
> -- 
> 2.29.0
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-24 11:54   ` Dr. David Alan Gilbert
@ 2020-11-24 13:32     ` Marc-André Lureau
  2020-11-24 13:35       ` Marc-André Lureau
  2020-11-24 14:39       ` Dr. David Alan Gilbert
  0 siblings, 2 replies; 9+ messages in thread
From: Marc-André Lureau @ 2020-11-24 13:32 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: qemu-devel, Stefan Hajnoczi, Armbruster, Markus

Hi

On Tue, Nov 24, 2020 at 3:54 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> >
> > Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> > inflight I/O in shared memory") which introduced glib dependency through
> > osdep.h inclusion.
> >
> > libvhost-user.c tries to stay free from glib usage.
> >
> > Use glibc memfd_create directly when available (assumed so when
> > MFD_ALLOW_SEALING is defined).
> >
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
> >  1 file changed, 43 insertions(+), 7 deletions(-)
> >
> > diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> > index 1c1cfbf1e7..805521859d 100644
> > --- a/contrib/libvhost-user/libvhost-user.c
> > +++ b/contrib/libvhost-user/libvhost-user.c
> > @@ -42,8 +42,6 @@
> >  #endif
> >
> >  #include "qemu/atomic.h"
> > -#include "qemu/osdep.h"
> > -#include "qemu/memfd.h"
> >
> >  #include "libvhost-user.h"
> >
> > @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
> >             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
> >  }
> >
> > +#ifdef MFD_ALLOW_SEALING
> > +static void *
> > +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> > +{
> > +    void *ptr;
> > +    int ret;
> > +
> > +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> > +    if (*fd < 0) {
> > +        return NULL;
> > +    }
> > +
> > +    ret = ftruncate(*fd, size);
>
> Do you need to do any of the page alignment?

We don't do any in util/memfd.c, I don't see an explicit requirement
in memfd_create(). (however, util/memfd.c did check power of 2 for
hugetlb usage, but this isn't necessary here)

On mmap(), "if addr is NULL, then the kernel chooses the (page-aligned) address"

>
> > +    if (ret < 0) {
> > +        close(*fd);
> > +        return NULL;
> > +    }
> > +
> > +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
>
> I think you'd intended to use the 'flags' parameter there.

indeed, thanks

>
> > +    if (ret < 0) {
> > +        close(*fd);
> > +        return NULL;
> > +    }
> > +
> > +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> > +    if (ptr == MAP_FAILED) {
> > +        close(*fd);
> > +        return NULL;
> > +    }
> > +
> > +    return ptr;
> > +}
> > +#endif
> > +
> >  static bool
> >  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> >  {
> > -    int fd;
> > -    void *addr;
> > +    int fd = -1;
> > +    void *addr = NULL;
> >      uint64_t mmap_size;
> >      uint16_t num_queues, queue_size;
> >
> > @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> >
> >      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
> >
> > -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> > -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > -                            &fd, NULL);
> > +#ifdef MFD_ALLOW_SEALING
> > +    addr = memfd_alloc("vhost-inflight", mmap_size,
> > +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > +                       &fd);
> > +#else
> > +    vu_panic(dev, "Not implemented: memfd support is missing");
>
> Should there be an ifdef somewhere on the declared features, so it
> doesn't get this far because it wont negotiate the feature?

Sealing grow/shrink came together with memfd, it was one of the main
selling point. I assume if MFD_ALLOW_SEALING is defined, we have
memfd_create and basic libc defines. But yes, if we want to handle
weird corner cases, we should add more ifdef-ry. I'd pass for now.

thanks

>
> Dave
>
> > +#endif
> >
> >      if (!addr) {
> >          vu_panic(dev, "Failed to alloc vhost inflight area");
> > --
> > 2.29.0
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-24 13:32     ` Marc-André Lureau
@ 2020-11-24 13:35       ` Marc-André Lureau
  2020-11-24 14:39       ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 9+ messages in thread
From: Marc-André Lureau @ 2020-11-24 13:35 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: qemu-devel, Stefan Hajnoczi, Armbruster, Markus

Hi

On Tue, Nov 24, 2020 at 5:32 PM Marc-André Lureau
<marcandre.lureau@redhat.com> wrote:
>
> Hi
>
> On Tue, Nov 24, 2020 at 3:54 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> > > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > >
> > > Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> > > inflight I/O in shared memory") which introduced glib dependency through
> > > osdep.h inclusion.
> > >
> > > libvhost-user.c tries to stay free from glib usage.
> > >
> > > Use glibc memfd_create directly when available (assumed so when
> > > MFD_ALLOW_SEALING is defined).
> > >
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
> > >  1 file changed, 43 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> > > index 1c1cfbf1e7..805521859d 100644
> > > --- a/contrib/libvhost-user/libvhost-user.c
> > > +++ b/contrib/libvhost-user/libvhost-user.c
> > > @@ -42,8 +42,6 @@
> > >  #endif
> > >
> > >  #include "qemu/atomic.h"
> > > -#include "qemu/osdep.h"
> > > -#include "qemu/memfd.h"
> > >
> > >  #include "libvhost-user.h"
> > >
> > > @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
> > >             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
> > >  }
> > >
> > > +#ifdef MFD_ALLOW_SEALING
> > > +static void *
> > > +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> > > +{
> > > +    void *ptr;
> > > +    int ret;
> > > +
> > > +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> > > +    if (*fd < 0) {
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = ftruncate(*fd, size);
> >
> > Do you need to do any of the page alignment?
>
> We don't do any in util/memfd.c, I don't see an explicit requirement
> in memfd_create(). (however, util/memfd.c did check power of 2 for
> hugetlb usage, but this isn't necessary here)
>
> On mmap(), "if addr is NULL, then the kernel chooses the (page-aligned) address"
>
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
> >
> > I think you'd intended to use the 'flags' parameter there.
>
> indeed, thanks
>
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> > > +    if (ptr == MAP_FAILED) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    return ptr;
> > > +}
> > > +#endif
> > > +
> > >  static bool
> > >  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >  {
> > > -    int fd;
> > > -    void *addr;
> > > +    int fd = -1;
> > > +    void *addr = NULL;
> > >      uint64_t mmap_size;
> > >      uint16_t num_queues, queue_size;
> > >
> > > @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >
> > >      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
> > >
> > > -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> > > -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > -                            &fd, NULL);
> > > +#ifdef MFD_ALLOW_SEALING
> > > +    addr = memfd_alloc("vhost-inflight", mmap_size,
> > > +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > +                       &fd);
> > > +#else
> > > +    vu_panic(dev, "Not implemented: memfd support is missing");
> >
> > Should there be an ifdef somewhere on the declared features, so it
> > doesn't get this far because it wont negotiate the feature?
>
> Sealing grow/shrink came together with memfd, it was one of the main
> selling point. I assume if MFD_ALLOW_SEALING is defined, we have
> memfd_create and basic libc defines. But yes, if we want to handle
> weird corner cases, we should add more ifdef-ry. I'd pass for now.


However, let's avoid compiling code that can panic. I am updating the
series with a standalone meson subproject.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-24 13:32     ` Marc-André Lureau
  2020-11-24 13:35       ` Marc-André Lureau
@ 2020-11-24 14:39       ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 9+ messages in thread
From: Dr. David Alan Gilbert @ 2020-11-24 14:39 UTC (permalink / raw)
  To: Marc-André Lureau; +Cc: qemu-devel, Stefan Hajnoczi, Armbruster, Markus

* Marc-André Lureau (marcandre.lureau@redhat.com) wrote:
> Hi
> 
> On Tue, Nov 24, 2020 at 3:54 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> > > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > >
> > > Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> > > inflight I/O in shared memory") which introduced glib dependency through
> > > osdep.h inclusion.
> > >
> > > libvhost-user.c tries to stay free from glib usage.
> > >
> > > Use glibc memfd_create directly when available (assumed so when
> > > MFD_ALLOW_SEALING is defined).
> > >
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
> > >  1 file changed, 43 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> > > index 1c1cfbf1e7..805521859d 100644
> > > --- a/contrib/libvhost-user/libvhost-user.c
> > > +++ b/contrib/libvhost-user/libvhost-user.c
> > > @@ -42,8 +42,6 @@
> > >  #endif
> > >
> > >  #include "qemu/atomic.h"
> > > -#include "qemu/osdep.h"
> > > -#include "qemu/memfd.h"
> > >
> > >  #include "libvhost-user.h"
> > >
> > > @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
> > >             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
> > >  }
> > >
> > > +#ifdef MFD_ALLOW_SEALING
> > > +static void *
> > > +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> > > +{
> > > +    void *ptr;
> > > +    int ret;
> > > +
> > > +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> > > +    if (*fd < 0) {
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = ftruncate(*fd, size);
> >
> > Do you need to do any of the page alignment?
> 
> We don't do any in util/memfd.c, I don't see an explicit requirement
> in memfd_create(). (however, util/memfd.c did check power of 2 for
> hugetlb usage, but this isn't necessary here)
> 
> On mmap(), "if addr is NULL, then the kernel chooses the (page-aligned) address"
> 
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
> >
> > I think you'd intended to use the 'flags' parameter there.
> 
> indeed, thanks
> 
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> > > +    if (ptr == MAP_FAILED) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    return ptr;
> > > +}
> > > +#endif
> > > +
> > >  static bool
> > >  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >  {
> > > -    int fd;
> > > -    void *addr;
> > > +    int fd = -1;
> > > +    void *addr = NULL;
> > >      uint64_t mmap_size;
> > >      uint16_t num_queues, queue_size;
> > >
> > > @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >
> > >      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
> > >
> > > -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> > > -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > -                            &fd, NULL);
> > > +#ifdef MFD_ALLOW_SEALING
> > > +    addr = memfd_alloc("vhost-inflight", mmap_size,
> > > +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > +                       &fd);
> > > +#else
> > > +    vu_panic(dev, "Not implemented: memfd support is missing");
> >
> > Should there be an ifdef somewhere on the declared features, so it
> > doesn't get this far because it wont negotiate the feature?
> 
> Sealing grow/shrink came together with memfd, it was one of the main
> selling point. I assume if MFD_ALLOW_SEALING is defined, we have
> memfd_create and basic libc defines. But yes, if we want to handle
> weird corner cases, we should add more ifdef-ry. I'd pass for now.

I wasn't being that selective; I just ment if any of the memfd
wasn't available then we should just say the vu_inflight feature isn't
available.

Dave

> thanks
> 
> >
> > Dave
> >
> > > +#endif
> > >
> > >      if (!addr) {
> > >          vu_panic(dev, "Failed to alloc vhost inflight area");
> > > --
> > > 2.29.0
> > >
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-11-24 14:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
2020-11-24 11:31   ` Dr. David Alan Gilbert
2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
2020-11-24 11:54   ` Dr. David Alan Gilbert
2020-11-24 13:32     ` Marc-André Lureau
2020-11-24 13:35       ` Marc-André Lureau
2020-11-24 14:39       ` Dr. David Alan Gilbert
2020-11-18  8:58 ` [PATCH 0/2] libvhost-user: lower dependency on QEMU headers Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).