All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] libvhost-user: lower dependency on QEMU headers
@ 2020-11-18  8:09 marcandre.lureau
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: marcandre.lureau @ 2020-11-18  8:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marc-André Lureau, armbru, stefanha, dgilbert

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Hi,

libvhost-user is meant to be free of glib dependency, and easily
copyable/reusable outside of QEMU. Clean-up some dependencies that crept in
recently (the one remaining is qemu/atomic.h, from which a subset is used)

Marc-André Lureau (2):
  libvhost-user: replace qemu/bswap.h with glibc endian.h
  libvhost-user: replace qemu/memfd.h usage

 contrib/libvhost-user/libvhost-user.c | 127 +++++++++++++++++---------
 1 file changed, 83 insertions(+), 44 deletions(-)

-- 
2.29.0




^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h
  2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
@ 2020-11-18  8:09 ` marcandre.lureau
  2020-11-24 11:31   ` Dr. David Alan Gilbert
  2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
  2020-11-18  8:58 ` [PATCH 0/2] libvhost-user: lower dependency on QEMU headers Stefan Hajnoczi
  2 siblings, 1 reply; 9+ messages in thread
From: marcandre.lureau @ 2020-11-18  8:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marc-André Lureau, armbru, stefanha, dgilbert

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 contrib/libvhost-user/libvhost-user.c | 77 ++++++++++++++-------------
 1 file changed, 40 insertions(+), 37 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index 5c73ffdd6b..1c1cfbf1e7 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -26,6 +26,7 @@
 #include <sys/socket.h>
 #include <sys/eventfd.h>
 #include <sys/mman.h>
+#include <endian.h>
 #include "qemu/compiler.h"
 
 #if defined(__linux__)
@@ -42,7 +43,6 @@
 
 #include "qemu/atomic.h"
 #include "qemu/osdep.h"
-#include "qemu/bswap.h"
 #include "qemu/memfd.h"
 
 #include "libvhost-user.h"
@@ -1081,7 +1081,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
         return false;
     }
 
-    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
+    vq->used_idx = le16toh(vq->vring.used->idx);
 
     if (vq->last_avail_idx != vq->used_idx) {
         bool resume = dev->iface->queue_is_processed_in_order &&
@@ -1198,7 +1198,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
         return 0;
     }
 
-    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
+    vq->used_idx = le16toh(vq->vring.used->idx);
     vq->resubmit_num = 0;
     vq->resubmit_list = NULL;
     vq->counter = 0;
@@ -2031,13 +2031,13 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq)
 static inline uint16_t
 vring_avail_flags(VuVirtq *vq)
 {
-    return lduw_le_p(&vq->vring.avail->flags);
+    return le16toh(vq->vring.avail->flags);
 }
 
 static inline uint16_t
 vring_avail_idx(VuVirtq *vq)
 {
-    vq->shadow_avail_idx = lduw_le_p(&vq->vring.avail->idx);
+    vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
 
     return vq->shadow_avail_idx;
 }
@@ -2045,7 +2045,7 @@ vring_avail_idx(VuVirtq *vq)
 static inline uint16_t
 vring_avail_ring(VuVirtq *vq, int i)
 {
-    return lduw_le_p(&vq->vring.avail->ring[i]);
+    return le16toh(vq->vring.avail->ring[i]);
 }
 
 static inline uint16_t
@@ -2133,12 +2133,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
                          int i, unsigned int max, unsigned int *next)
 {
     /* If this descriptor says it doesn't chain, we're done. */
-    if (!(lduw_le_p(&desc[i].flags) & VRING_DESC_F_NEXT)) {
+    if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
         return VIRTQUEUE_READ_DESC_DONE;
     }
 
     /* Check they're not leading us off end of descriptors. */
-    *next = lduw_le_p(&desc[i].next);
+    *next = le16toh(desc[i].next);
     /* Make sure compiler knows to grab that: we don't want it changing! */
     smp_wmb();
 
@@ -2181,8 +2181,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
         }
         desc = vq->vring.desc;
 
-        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
-            if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
+        if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+            if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
                 vu_panic(dev, "Invalid size for indirect buffer table");
                 goto err;
             }
@@ -2195,8 +2195,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
 
             /* loop over the indirect descriptor table */
             indirect = 1;
-            desc_addr = ldq_le_p(&desc[i].addr);
-            desc_len = ldl_le_p(&desc[i].len);
+            desc_addr = le64toh(desc[i].addr);
+            desc_len = le32toh(desc[i].len);
             max = desc_len / sizeof(struct vring_desc);
             read_len = desc_len;
             desc = vu_gpa_to_va(dev, &read_len, desc_addr);
@@ -2223,10 +2223,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
                 goto err;
             }
 
-            if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
-                in_total += ldl_le_p(&desc[i].len);
+            if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
+                in_total += le32toh(desc[i].len);
             } else {
-                out_total += ldl_le_p(&desc[i].len);
+                out_total += le32toh(desc[i].len);
             }
             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                 goto done;
@@ -2377,7 +2377,7 @@ vring_used_flags_set_bit(VuVirtq *vq, int mask)
 
     flags = (uint16_t *)((char*)vq->vring.used +
                          offsetof(struct vring_used, flags));
-    stw_le_p(flags, lduw_le_p(flags) | mask);
+    *flags = htole16(le16toh(*flags) | mask);
 }
 
 static inline void
@@ -2387,17 +2387,20 @@ vring_used_flags_unset_bit(VuVirtq *vq, int mask)
 
     flags = (uint16_t *)((char*)vq->vring.used +
                          offsetof(struct vring_used, flags));
-    stw_le_p(flags, lduw_le_p(flags) & ~mask);
+    *flags = htole16(le16toh(*flags) & ~mask);
 }
 
 static inline void
 vring_set_avail_event(VuVirtq *vq, uint16_t val)
 {
+    uint16_t *avail;
+
     if (!vq->notification) {
         return;
     }
 
-    stw_le_p(&vq->vring.used->ring[vq->vring.num], val);
+    avail = (uint16_t *)&vq->vring.used->ring[vq->vring.num];
+    *avail = htole16(val);
 }
 
 void
@@ -2487,15 +2490,15 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
     int rc;
 
-    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
-        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
+    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
             vu_panic(dev, "Invalid size for indirect buffer table");
             return NULL;
         }
 
         /* loop over the indirect descriptor table */
-        desc_addr = ldq_le_p(&desc[i].addr);
-        desc_len = ldl_le_p(&desc[i].len);
+        desc_addr = le64toh(desc[i].addr);
+        desc_len = le32toh(desc[i].len);
         max = desc_len / sizeof(struct vring_desc);
         read_len = desc_len;
         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
@@ -2517,11 +2520,11 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
 
     /* Collect all the descriptors */
     do {
-        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
+        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
             if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
                                VIRTQUEUE_MAX_SIZE - out_num, true,
-                               ldq_le_p(&desc[i].addr),
-                               ldl_le_p(&desc[i].len))) {
+                               le64toh(desc[i].addr),
+                               le32toh(desc[i].len))) {
                 return NULL;
             }
         } else {
@@ -2531,8 +2534,8 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
             }
             if (!virtqueue_map_desc(dev, &out_num, iov,
                                VIRTQUEUE_MAX_SIZE, false,
-                               ldq_le_p(&desc[i].addr),
-                               ldl_le_p(&desc[i].len))) {
+                               le64toh(desc[i].addr),
+                               le32toh(desc[i].len))) {
                 return NULL;
             }
         }
@@ -2731,15 +2734,15 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
     max = vq->vring.num;
     i = elem->index;
 
-    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
-        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
+    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
+        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
             vu_panic(dev, "Invalid size for indirect buffer table");
             return;
         }
 
         /* loop over the indirect descriptor table */
-        desc_addr = ldq_le_p(&desc[i].addr);
-        desc_len = ldl_le_p(&desc[i].len);
+        desc_addr = le64toh(desc[i].addr);
+        desc_len = le32toh(desc[i].len);
         max = desc_len / sizeof(struct vring_desc);
         read_len = desc_len;
         desc = vu_gpa_to_va(dev, &read_len, desc_addr);
@@ -2765,9 +2768,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
             return;
         }
 
-        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
-            min = MIN(ldl_le_p(&desc[i].len), len);
-            vu_log_write(dev, ldq_le_p(&desc[i].addr), min);
+        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
+            min = MIN(le32toh(desc[i].len), len);
+            vu_log_write(dev, le64toh(desc[i].addr), min);
             len -= min;
         }
 
@@ -2792,15 +2795,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
 
     idx = (idx + vq->used_idx) % vq->vring.num;
 
-    stl_le_p(&uelem.id, elem->index);
-    stl_le_p(&uelem.len, len);
+    uelem.id = htole32(elem->index);
+    uelem.len = htole32(len);
     vring_used_write(dev, vq, &uelem, idx);
 }
 
 static inline
 void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
 {
-    stw_le_p(&vq->vring.used->idx, val);
+    vq->vring.used->idx = htole16(val);
     vu_log_write(dev,
                  vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
                  sizeof(vq->vring.used->idx));
-- 
2.29.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
@ 2020-11-18  8:09 ` marcandre.lureau
  2020-11-24 11:54   ` Dr. David Alan Gilbert
  2020-11-18  8:58 ` [PATCH 0/2] libvhost-user: lower dependency on QEMU headers Stefan Hajnoczi
  2 siblings, 1 reply; 9+ messages in thread
From: marcandre.lureau @ 2020-11-18  8:09 UTC (permalink / raw)
  To: qemu-devel; +Cc: Marc-André Lureau, armbru, stefanha, dgilbert

From: Marc-André Lureau <marcandre.lureau@redhat.com>

Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
inflight I/O in shared memory") which introduced glib dependency through
osdep.h inclusion.

libvhost-user.c tries to stay free from glib usage.

Use glibc memfd_create directly when available (assumed so when
MFD_ALLOW_SEALING is defined).

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index 1c1cfbf1e7..805521859d 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -42,8 +42,6 @@
 #endif
 
 #include "qemu/atomic.h"
-#include "qemu/osdep.h"
-#include "qemu/memfd.h"
 
 #include "libvhost-user.h"
 
@@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
            sizeof(uint16_t), INFLIGHT_ALIGNMENT);
 }
 
+#ifdef MFD_ALLOW_SEALING
+static void *
+memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
+{
+    void *ptr;
+    int ret;
+
+    *fd = memfd_create(name, MFD_ALLOW_SEALING);
+    if (*fd < 0) {
+        return NULL;
+    }
+
+    ret = ftruncate(*fd, size);
+    if (ret < 0) {
+        close(*fd);
+        return NULL;
+    }
+
+    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
+    if (ret < 0) {
+        close(*fd);
+        return NULL;
+    }
+
+    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
+    if (ptr == MAP_FAILED) {
+        close(*fd);
+        return NULL;
+    }
+
+    return ptr;
+}
+#endif
+
 static bool
 vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
 {
-    int fd;
-    void *addr;
+    int fd = -1;
+    void *addr = NULL;
     uint64_t mmap_size;
     uint16_t num_queues, queue_size;
 
@@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
 
     mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
 
-    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
-                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
-                            &fd, NULL);
+#ifdef MFD_ALLOW_SEALING
+    addr = memfd_alloc("vhost-inflight", mmap_size,
+                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+                       &fd);
+#else
+    vu_panic(dev, "Not implemented: memfd support is missing");
+#endif
 
     if (!addr) {
         vu_panic(dev, "Failed to alloc vhost inflight area");
-- 
2.29.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/2] libvhost-user: lower dependency on QEMU headers
  2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
  2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
@ 2020-11-18  8:58 ` Stefan Hajnoczi
  2 siblings, 0 replies; 9+ messages in thread
From: Stefan Hajnoczi @ 2020-11-18  8:58 UTC (permalink / raw)
  To: marcandre.lureau; +Cc: qemu-devel, dgilbert, armbru

[-- Attachment #1: Type: text/plain, Size: 937 bytes --]

On Wed, Nov 18, 2020 at 12:09:00PM +0400, marcandre.lureau@redhat.com wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Hi,
> 
> libvhost-user is meant to be free of glib dependency, and easily
> copyable/reusable outside of QEMU. Clean-up some dependencies that crept in
> recently (the one remaining is qemu/atomic.h, from which a subset is used)
> 
> Marc-André Lureau (2):
>   libvhost-user: replace qemu/bswap.h with glibc endian.h
>   libvhost-user: replace qemu/memfd.h usage
> 
>  contrib/libvhost-user/libvhost-user.c | 127 +++++++++++++++++---------
>  1 file changed, 83 insertions(+), 44 deletions(-)

Let's find a way to enforce this. How about a libvhost-user-only build
in the CI system without glib2-devel? Also maybe a way to poison QEMU
headers? It might be simpler to move contrib/libvhost-user to a separate
git repo though.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h
  2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
@ 2020-11-24 11:31   ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 9+ messages in thread
From: Dr. David Alan Gilbert @ 2020-11-24 11:31 UTC (permalink / raw)
  To: marcandre.lureau; +Cc: qemu-devel, stefanha, armbru

* marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
>  contrib/libvhost-user/libvhost-user.c | 77 ++++++++++++++-------------
>  1 file changed, 40 insertions(+), 37 deletions(-)
> 
> diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> index 5c73ffdd6b..1c1cfbf1e7 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -26,6 +26,7 @@
>  #include <sys/socket.h>
>  #include <sys/eventfd.h>
>  #include <sys/mman.h>
> +#include <endian.h>
>  #include "qemu/compiler.h"
>  
>  #if defined(__linux__)
> @@ -42,7 +43,6 @@
>  
>  #include "qemu/atomic.h"
>  #include "qemu/osdep.h"
> -#include "qemu/bswap.h"
>  #include "qemu/memfd.h"
>  
>  #include "libvhost-user.h"
> @@ -1081,7 +1081,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
>          return false;
>      }
>  
> -    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
> +    vq->used_idx = le16toh(vq->vring.used->idx);
>  
>      if (vq->last_avail_idx != vq->used_idx) {
>          bool resume = dev->iface->queue_is_processed_in_order &&
> @@ -1198,7 +1198,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
>          return 0;
>      }
>  
> -    vq->used_idx = lduw_le_p(&vq->vring.used->idx);
> +    vq->used_idx = le16toh(vq->vring.used->idx);
>      vq->resubmit_num = 0;
>      vq->resubmit_list = NULL;
>      vq->counter = 0;
> @@ -2031,13 +2031,13 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq)
>  static inline uint16_t
>  vring_avail_flags(VuVirtq *vq)
>  {
> -    return lduw_le_p(&vq->vring.avail->flags);
> +    return le16toh(vq->vring.avail->flags);
>  }
>  
>  static inline uint16_t
>  vring_avail_idx(VuVirtq *vq)
>  {
> -    vq->shadow_avail_idx = lduw_le_p(&vq->vring.avail->idx);
> +    vq->shadow_avail_idx = le16toh(vq->vring.avail->idx);
>  
>      return vq->shadow_avail_idx;
>  }
> @@ -2045,7 +2045,7 @@ vring_avail_idx(VuVirtq *vq)
>  static inline uint16_t
>  vring_avail_ring(VuVirtq *vq, int i)
>  {
> -    return lduw_le_p(&vq->vring.avail->ring[i]);
> +    return le16toh(vq->vring.avail->ring[i]);
>  }
>  
>  static inline uint16_t
> @@ -2133,12 +2133,12 @@ virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
>                           int i, unsigned int max, unsigned int *next)
>  {
>      /* If this descriptor says it doesn't chain, we're done. */
> -    if (!(lduw_le_p(&desc[i].flags) & VRING_DESC_F_NEXT)) {
> +    if (!(le16toh(desc[i].flags) & VRING_DESC_F_NEXT)) {
>          return VIRTQUEUE_READ_DESC_DONE;
>      }
>  
>      /* Check they're not leading us off end of descriptors. */
> -    *next = lduw_le_p(&desc[i].next);
> +    *next = le16toh(desc[i].next);
>      /* Make sure compiler knows to grab that: we don't want it changing! */
>      smp_wmb();
>  
> @@ -2181,8 +2181,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>          }
>          desc = vq->vring.desc;
>  
> -        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
> -            if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
> +        if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> +            if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
>                  vu_panic(dev, "Invalid size for indirect buffer table");
>                  goto err;
>              }
> @@ -2195,8 +2195,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>  
>              /* loop over the indirect descriptor table */
>              indirect = 1;
> -            desc_addr = ldq_le_p(&desc[i].addr);
> -            desc_len = ldl_le_p(&desc[i].len);
> +            desc_addr = le64toh(desc[i].addr);
> +            desc_len = le32toh(desc[i].len);
>              max = desc_len / sizeof(struct vring_desc);
>              read_len = desc_len;
>              desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2223,10 +2223,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
>                  goto err;
>              }
>  
> -            if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
> -                in_total += ldl_le_p(&desc[i].len);
> +            if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
> +                in_total += le32toh(desc[i].len);
>              } else {
> -                out_total += ldl_le_p(&desc[i].len);
> +                out_total += le32toh(desc[i].len);
>              }
>              if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
>                  goto done;
> @@ -2377,7 +2377,7 @@ vring_used_flags_set_bit(VuVirtq *vq, int mask)
>  
>      flags = (uint16_t *)((char*)vq->vring.used +
>                           offsetof(struct vring_used, flags));
> -    stw_le_p(flags, lduw_le_p(flags) | mask);
> +    *flags = htole16(le16toh(*flags) | mask);
>  }
>  
>  static inline void
> @@ -2387,17 +2387,20 @@ vring_used_flags_unset_bit(VuVirtq *vq, int mask)
>  
>      flags = (uint16_t *)((char*)vq->vring.used +
>                           offsetof(struct vring_used, flags));
> -    stw_le_p(flags, lduw_le_p(flags) & ~mask);
> +    *flags = htole16(le16toh(*flags) & ~mask);
>  }
>  
>  static inline void
>  vring_set_avail_event(VuVirtq *vq, uint16_t val)
>  {
> +    uint16_t *avail;
> +
>      if (!vq->notification) {
>          return;
>      }
>  
> -    stw_le_p(&vq->vring.used->ring[vq->vring.num], val);
> +    avail = (uint16_t *)&vq->vring.used->ring[vq->vring.num];
> +    *avail = htole16(val);
>  }
>  
>  void
> @@ -2487,15 +2490,15 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>      struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
>      int rc;
>  
> -    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
> -        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
> +    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> +        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
>              vu_panic(dev, "Invalid size for indirect buffer table");
>              return NULL;
>          }
>  
>          /* loop over the indirect descriptor table */
> -        desc_addr = ldq_le_p(&desc[i].addr);
> -        desc_len = ldl_le_p(&desc[i].len);
> +        desc_addr = le64toh(desc[i].addr);
> +        desc_len = le32toh(desc[i].len);
>          max = desc_len / sizeof(struct vring_desc);
>          read_len = desc_len;
>          desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2517,11 +2520,11 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>  
>      /* Collect all the descriptors */
>      do {
> -        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
> +        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
>              if (!virtqueue_map_desc(dev, &in_num, iov + out_num,
>                                 VIRTQUEUE_MAX_SIZE - out_num, true,
> -                               ldq_le_p(&desc[i].addr),
> -                               ldl_le_p(&desc[i].len))) {
> +                               le64toh(desc[i].addr),
> +                               le32toh(desc[i].len))) {
>                  return NULL;
>              }
>          } else {
> @@ -2531,8 +2534,8 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
>              }
>              if (!virtqueue_map_desc(dev, &out_num, iov,
>                                 VIRTQUEUE_MAX_SIZE, false,
> -                               ldq_le_p(&desc[i].addr),
> -                               ldl_le_p(&desc[i].len))) {
> +                               le64toh(desc[i].addr),
> +                               le32toh(desc[i].len))) {
>                  return NULL;
>              }
>          }
> @@ -2731,15 +2734,15 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
>      max = vq->vring.num;
>      i = elem->index;
>  
> -    if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_INDIRECT) {
> -        if (ldl_le_p(&desc[i].len) % sizeof(struct vring_desc)) {
> +    if (le16toh(desc[i].flags) & VRING_DESC_F_INDIRECT) {
> +        if (le32toh(desc[i].len) % sizeof(struct vring_desc)) {
>              vu_panic(dev, "Invalid size for indirect buffer table");
>              return;
>          }
>  
>          /* loop over the indirect descriptor table */
> -        desc_addr = ldq_le_p(&desc[i].addr);
> -        desc_len = ldl_le_p(&desc[i].len);
> +        desc_addr = le64toh(desc[i].addr);
> +        desc_len = le32toh(desc[i].len);
>          max = desc_len / sizeof(struct vring_desc);
>          read_len = desc_len;
>          desc = vu_gpa_to_va(dev, &read_len, desc_addr);
> @@ -2765,9 +2768,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
>              return;
>          }
>  
> -        if (lduw_le_p(&desc[i].flags) & VRING_DESC_F_WRITE) {
> -            min = MIN(ldl_le_p(&desc[i].len), len);
> -            vu_log_write(dev, ldq_le_p(&desc[i].addr), min);
> +        if (le16toh(desc[i].flags) & VRING_DESC_F_WRITE) {
> +            min = MIN(le32toh(desc[i].len), len);
> +            vu_log_write(dev, le64toh(desc[i].addr), min);
>              len -= min;
>          }
>  
> @@ -2792,15 +2795,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
>  
>      idx = (idx + vq->used_idx) % vq->vring.num;
>  
> -    stl_le_p(&uelem.id, elem->index);
> -    stl_le_p(&uelem.len, len);
> +    uelem.id = htole32(elem->index);
> +    uelem.len = htole32(len);
>      vring_used_write(dev, vq, &uelem, idx);
>  }
>  
>  static inline
>  void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
>  {
> -    stw_le_p(&vq->vring.used->idx, val);
> +    vq->vring.used->idx = htole16(val);
>      vu_log_write(dev,
>                   vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
>                   sizeof(vq->vring.used->idx));
> -- 
> 2.29.0
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
@ 2020-11-24 11:54   ` Dr. David Alan Gilbert
  2020-11-24 13:32     ` Marc-André Lureau
  0 siblings, 1 reply; 9+ messages in thread
From: Dr. David Alan Gilbert @ 2020-11-24 11:54 UTC (permalink / raw)
  To: marcandre.lureau; +Cc: qemu-devel, stefanha, armbru

* marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
> 
> Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> inflight I/O in shared memory") which introduced glib dependency through
> osdep.h inclusion.
> 
> libvhost-user.c tries to stay free from glib usage.
> 
> Use glibc memfd_create directly when available (assumed so when
> MFD_ALLOW_SEALING is defined).
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
>  1 file changed, 43 insertions(+), 7 deletions(-)
> 
> diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> index 1c1cfbf1e7..805521859d 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -42,8 +42,6 @@
>  #endif
>  
>  #include "qemu/atomic.h"
> -#include "qemu/osdep.h"
> -#include "qemu/memfd.h"
>  
>  #include "libvhost-user.h"
>  
> @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
>             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
>  }
>  
> +#ifdef MFD_ALLOW_SEALING
> +static void *
> +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> +{
> +    void *ptr;
> +    int ret;
> +
> +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> +    if (*fd < 0) {
> +        return NULL;
> +    }
> +
> +    ret = ftruncate(*fd, size);

Do you need to do any of the page alignment?

> +    if (ret < 0) {
> +        close(*fd);
> +        return NULL;
> +    }
> +
> +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);

I think you'd intended to use the 'flags' parameter there.

> +    if (ret < 0) {
> +        close(*fd);
> +        return NULL;
> +    }
> +
> +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> +    if (ptr == MAP_FAILED) {
> +        close(*fd);
> +        return NULL;
> +    }
> +
> +    return ptr;
> +}
> +#endif
> +
>  static bool
>  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
>  {
> -    int fd;
> -    void *addr;
> +    int fd = -1;
> +    void *addr = NULL;
>      uint64_t mmap_size;
>      uint16_t num_queues, queue_size;
>  
> @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
>  
>      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
>  
> -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> -                            &fd, NULL);
> +#ifdef MFD_ALLOW_SEALING
> +    addr = memfd_alloc("vhost-inflight", mmap_size,
> +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> +                       &fd);
> +#else
> +    vu_panic(dev, "Not implemented: memfd support is missing");

Should there be an ifdef somewhere on the declared features, so it
doesn't get this far because it wont negotiate the feature?

Dave

> +#endif
>  
>      if (!addr) {
>          vu_panic(dev, "Failed to alloc vhost inflight area");
> -- 
> 2.29.0
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-24 11:54   ` Dr. David Alan Gilbert
@ 2020-11-24 13:32     ` Marc-André Lureau
  2020-11-24 13:35       ` Marc-André Lureau
  2020-11-24 14:39       ` Dr. David Alan Gilbert
  0 siblings, 2 replies; 9+ messages in thread
From: Marc-André Lureau @ 2020-11-24 13:32 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: qemu-devel, Stefan Hajnoczi, Armbruster, Markus

Hi

On Tue, Nov 24, 2020 at 3:54 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> >
> > Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> > inflight I/O in shared memory") which introduced glib dependency through
> > osdep.h inclusion.
> >
> > libvhost-user.c tries to stay free from glib usage.
> >
> > Use glibc memfd_create directly when available (assumed so when
> > MFD_ALLOW_SEALING is defined).
> >
> > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > ---
> >  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
> >  1 file changed, 43 insertions(+), 7 deletions(-)
> >
> > diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> > index 1c1cfbf1e7..805521859d 100644
> > --- a/contrib/libvhost-user/libvhost-user.c
> > +++ b/contrib/libvhost-user/libvhost-user.c
> > @@ -42,8 +42,6 @@
> >  #endif
> >
> >  #include "qemu/atomic.h"
> > -#include "qemu/osdep.h"
> > -#include "qemu/memfd.h"
> >
> >  #include "libvhost-user.h"
> >
> > @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
> >             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
> >  }
> >
> > +#ifdef MFD_ALLOW_SEALING
> > +static void *
> > +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> > +{
> > +    void *ptr;
> > +    int ret;
> > +
> > +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> > +    if (*fd < 0) {
> > +        return NULL;
> > +    }
> > +
> > +    ret = ftruncate(*fd, size);
>
> Do you need to do any of the page alignment?

We don't do any in util/memfd.c, I don't see an explicit requirement
in memfd_create(). (however, util/memfd.c did check power of 2 for
hugetlb usage, but this isn't necessary here)

On mmap(), "if addr is NULL, then the kernel chooses the (page-aligned) address"

>
> > +    if (ret < 0) {
> > +        close(*fd);
> > +        return NULL;
> > +    }
> > +
> > +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
>
> I think you'd intended to use the 'flags' parameter there.

indeed, thanks

>
> > +    if (ret < 0) {
> > +        close(*fd);
> > +        return NULL;
> > +    }
> > +
> > +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> > +    if (ptr == MAP_FAILED) {
> > +        close(*fd);
> > +        return NULL;
> > +    }
> > +
> > +    return ptr;
> > +}
> > +#endif
> > +
> >  static bool
> >  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> >  {
> > -    int fd;
> > -    void *addr;
> > +    int fd = -1;
> > +    void *addr = NULL;
> >      uint64_t mmap_size;
> >      uint16_t num_queues, queue_size;
> >
> > @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> >
> >      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
> >
> > -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> > -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > -                            &fd, NULL);
> > +#ifdef MFD_ALLOW_SEALING
> > +    addr = memfd_alloc("vhost-inflight", mmap_size,
> > +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > +                       &fd);
> > +#else
> > +    vu_panic(dev, "Not implemented: memfd support is missing");
>
> Should there be an ifdef somewhere on the declared features, so it
> doesn't get this far because it wont negotiate the feature?

Sealing grow/shrink came together with memfd, it was one of the main
selling point. I assume if MFD_ALLOW_SEALING is defined, we have
memfd_create and basic libc defines. But yes, if we want to handle
weird corner cases, we should add more ifdef-ry. I'd pass for now.

thanks

>
> Dave
>
> > +#endif
> >
> >      if (!addr) {
> >          vu_panic(dev, "Failed to alloc vhost inflight area");
> > --
> > 2.29.0
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-24 13:32     ` Marc-André Lureau
@ 2020-11-24 13:35       ` Marc-André Lureau
  2020-11-24 14:39       ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 9+ messages in thread
From: Marc-André Lureau @ 2020-11-24 13:35 UTC (permalink / raw)
  To: Dr. David Alan Gilbert; +Cc: qemu-devel, Stefan Hajnoczi, Armbruster, Markus

Hi

On Tue, Nov 24, 2020 at 5:32 PM Marc-André Lureau
<marcandre.lureau@redhat.com> wrote:
>
> Hi
>
> On Tue, Nov 24, 2020 at 3:54 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> > > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > >
> > > Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> > > inflight I/O in shared memory") which introduced glib dependency through
> > > osdep.h inclusion.
> > >
> > > libvhost-user.c tries to stay free from glib usage.
> > >
> > > Use glibc memfd_create directly when available (assumed so when
> > > MFD_ALLOW_SEALING is defined).
> > >
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
> > >  1 file changed, 43 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> > > index 1c1cfbf1e7..805521859d 100644
> > > --- a/contrib/libvhost-user/libvhost-user.c
> > > +++ b/contrib/libvhost-user/libvhost-user.c
> > > @@ -42,8 +42,6 @@
> > >  #endif
> > >
> > >  #include "qemu/atomic.h"
> > > -#include "qemu/osdep.h"
> > > -#include "qemu/memfd.h"
> > >
> > >  #include "libvhost-user.h"
> > >
> > > @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
> > >             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
> > >  }
> > >
> > > +#ifdef MFD_ALLOW_SEALING
> > > +static void *
> > > +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> > > +{
> > > +    void *ptr;
> > > +    int ret;
> > > +
> > > +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> > > +    if (*fd < 0) {
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = ftruncate(*fd, size);
> >
> > Do you need to do any of the page alignment?
>
> We don't do any in util/memfd.c, I don't see an explicit requirement
> in memfd_create(). (however, util/memfd.c did check power of 2 for
> hugetlb usage, but this isn't necessary here)
>
> On mmap(), "if addr is NULL, then the kernel chooses the (page-aligned) address"
>
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
> >
> > I think you'd intended to use the 'flags' parameter there.
>
> indeed, thanks
>
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> > > +    if (ptr == MAP_FAILED) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    return ptr;
> > > +}
> > > +#endif
> > > +
> > >  static bool
> > >  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >  {
> > > -    int fd;
> > > -    void *addr;
> > > +    int fd = -1;
> > > +    void *addr = NULL;
> > >      uint64_t mmap_size;
> > >      uint16_t num_queues, queue_size;
> > >
> > > @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >
> > >      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
> > >
> > > -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> > > -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > -                            &fd, NULL);
> > > +#ifdef MFD_ALLOW_SEALING
> > > +    addr = memfd_alloc("vhost-inflight", mmap_size,
> > > +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > +                       &fd);
> > > +#else
> > > +    vu_panic(dev, "Not implemented: memfd support is missing");
> >
> > Should there be an ifdef somewhere on the declared features, so it
> > doesn't get this far because it wont negotiate the feature?
>
> Sealing grow/shrink came together with memfd, it was one of the main
> selling point. I assume if MFD_ALLOW_SEALING is defined, we have
> memfd_create and basic libc defines. But yes, if we want to handle
> weird corner cases, we should add more ifdef-ry. I'd pass for now.


However, let's avoid compiling code that can panic. I am updating the
series with a standalone meson subproject.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage
  2020-11-24 13:32     ` Marc-André Lureau
  2020-11-24 13:35       ` Marc-André Lureau
@ 2020-11-24 14:39       ` Dr. David Alan Gilbert
  1 sibling, 0 replies; 9+ messages in thread
From: Dr. David Alan Gilbert @ 2020-11-24 14:39 UTC (permalink / raw)
  To: Marc-André Lureau; +Cc: qemu-devel, Stefan Hajnoczi, Armbruster, Markus

* Marc-André Lureau (marcandre.lureau@redhat.com) wrote:
> Hi
> 
> On Tue, Nov 24, 2020 at 3:54 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * marcandre.lureau@redhat.com (marcandre.lureau@redhat.com) wrote:
> > > From: Marc-André Lureau <marcandre.lureau@redhat.com>
> > >
> > > Undo the damage from commit 5f9ff1eff3 ("libvhost-user: Support tracking
> > > inflight I/O in shared memory") which introduced glib dependency through
> > > osdep.h inclusion.
> > >
> > > libvhost-user.c tries to stay free from glib usage.
> > >
> > > Use glibc memfd_create directly when available (assumed so when
> > > MFD_ALLOW_SEALING is defined).
> > >
> > > Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> > > ---
> > >  contrib/libvhost-user/libvhost-user.c | 50 +++++++++++++++++++++++----
> > >  1 file changed, 43 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
> > > index 1c1cfbf1e7..805521859d 100644
> > > --- a/contrib/libvhost-user/libvhost-user.c
> > > +++ b/contrib/libvhost-user/libvhost-user.c
> > > @@ -42,8 +42,6 @@
> > >  #endif
> > >
> > >  #include "qemu/atomic.h"
> > > -#include "qemu/osdep.h"
> > > -#include "qemu/memfd.h"
> > >
> > >  #include "libvhost-user.h"
> > >
> > > @@ -1615,11 +1613,45 @@ vu_inflight_queue_size(uint16_t queue_size)
> > >             sizeof(uint16_t), INFLIGHT_ALIGNMENT);
> > >  }
> > >
> > > +#ifdef MFD_ALLOW_SEALING
> > > +static void *
> > > +memfd_alloc(const char *name, size_t size, unsigned int flags, int *fd)
> > > +{
> > > +    void *ptr;
> > > +    int ret;
> > > +
> > > +    *fd = memfd_create(name, MFD_ALLOW_SEALING);
> > > +    if (*fd < 0) {
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = ftruncate(*fd, size);
> >
> > Do you need to do any of the page alignment?
> 
> We don't do any in util/memfd.c, I don't see an explicit requirement
> in memfd_create(). (however, util/memfd.c did check power of 2 for
> hugetlb usage, but this isn't necessary here)
> 
> On mmap(), "if addr is NULL, then the kernel chooses the (page-aligned) address"
> 
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ret = fcntl(*fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
> >
> > I think you'd intended to use the 'flags' parameter there.
> 
> indeed, thanks
> 
> >
> > > +    if (ret < 0) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
> > > +    if (ptr == MAP_FAILED) {
> > > +        close(*fd);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    return ptr;
> > > +}
> > > +#endif
> > > +
> > >  static bool
> > >  vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >  {
> > > -    int fd;
> > > -    void *addr;
> > > +    int fd = -1;
> > > +    void *addr = NULL;
> > >      uint64_t mmap_size;
> > >      uint16_t num_queues, queue_size;
> > >
> > > @@ -1637,9 +1669,13 @@ vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
> > >
> > >      mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
> > >
> > > -    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
> > > -                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > -                            &fd, NULL);
> > > +#ifdef MFD_ALLOW_SEALING
> > > +    addr = memfd_alloc("vhost-inflight", mmap_size,
> > > +                       F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> > > +                       &fd);
> > > +#else
> > > +    vu_panic(dev, "Not implemented: memfd support is missing");
> >
> > Should there be an ifdef somewhere on the declared features, so it
> > doesn't get this far because it wont negotiate the feature?
> 
> Sealing grow/shrink came together with memfd, it was one of the main
> selling point. I assume if MFD_ALLOW_SEALING is defined, we have
> memfd_create and basic libc defines. But yes, if we want to handle
> weird corner cases, we should add more ifdef-ry. I'd pass for now.

I wasn't being that selective; I just ment if any of the memfd
wasn't available then we should just say the vu_inflight feature isn't
available.

Dave

> thanks
> 
> >
> > Dave
> >
> > > +#endif
> > >
> > >      if (!addr) {
> > >          vu_panic(dev, "Failed to alloc vhost inflight area");
> > > --
> > > 2.29.0
> > >
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-11-24 14:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-11-18  8:09 [PATCH 0/2] libvhost-user: lower dependency on QEMU headers marcandre.lureau
2020-11-18  8:09 ` [PATCH 1/2] libvhost-user: replace qemu/bswap.h with glibc endian.h marcandre.lureau
2020-11-24 11:31   ` Dr. David Alan Gilbert
2020-11-18  8:09 ` [PATCH 2/2] libvhost-user: replace qemu/memfd.h usage marcandre.lureau
2020-11-24 11:54   ` Dr. David Alan Gilbert
2020-11-24 13:32     ` Marc-André Lureau
2020-11-24 13:35       ` Marc-André Lureau
2020-11-24 14:39       ` Dr. David Alan Gilbert
2020-11-18  8:58 ` [PATCH 0/2] libvhost-user: lower dependency on QEMU headers Stefan Hajnoczi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.