qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com>
To: qemu-devel@nongnu.org, vgoyal@redhat.com, stefanha@redhat.com,
	virtio-fs@redhat.com, marcandre.lureau@redhat.com,
	mst@redhat.com
Subject: [PATCH 18/24] DAX/unmap virtiofsd: Parse unmappable elements
Date: Tue,  9 Feb 2021 19:02:18 +0000	[thread overview]
Message-ID: <20210209190224.62827-19-dgilbert@redhat.com> (raw)
In-Reply-To: <20210209190224.62827-1-dgilbert@redhat.com>

From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

For some read/writes the virtio queue elements are unmappable by
the daemon; these are cases where the data is to be read/written
from non-RAM.  In viritofs's case this is typically a direct read/write
into an mmap'd DAX file also on virtiofs (possibly on another instance).

When we receive a virtio queue element, check that we have enough
mappable data to handle the headers.  Make a note of the number of
unmappable 'in' entries (ie. for read data back to the VMM),
and flag the fuse_bufvec for 'out' entries with a new flag
FUSE_BUF_PHYS_ADDR.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
with fix by:
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
 tools/virtiofsd/buffer.c      |   4 +-
 tools/virtiofsd/fuse_common.h |   7 ++
 tools/virtiofsd/fuse_virtio.c | 191 ++++++++++++++++++++++++----------
 3 files changed, 145 insertions(+), 57 deletions(-)

diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c
index 874f01c488..1a050aa441 100644
--- a/tools/virtiofsd/buffer.c
+++ b/tools/virtiofsd/buffer.c
@@ -77,6 +77,7 @@ static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off,
     ssize_t res = 0;
     size_t copied = 0;
 
+    assert(!(src->flags & FUSE_BUF_PHYS_ADDR));
     while (len) {
         if (dst->flags & FUSE_BUF_FD_SEEK) {
             res = pwrite(dst->fd, (char *)src->mem + src_off, len,
@@ -272,7 +273,8 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv)
      * process
      */
     for (i = 0; i < srcv->count; i++) {
-        if (srcv->buf[i].flags & FUSE_BUF_IS_FD) {
+        if ((srcv->buf[i].flags & FUSE_BUF_PHYS_ADDR) ||
+            (srcv->buf[i].flags & FUSE_BUF_IS_FD)) {
             break;
         }
     }
diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index a090040bb2..ed9280de91 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -611,6 +611,13 @@ enum fuse_buf_flags {
      * detected.
      */
     FUSE_BUF_FD_RETRY = (1 << 3),
+
+    /**
+     * The addresses in the iovec represent guest physical addresses
+     * that can't be mapped by the daemon process.
+     * IO must be bounced back to the VMM to do it.
+     */
+    FUSE_BUF_PHYS_ADDR = (1 << 4),
 };
 
 /**
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
index 8feb3c0261..8fa438525f 100644
--- a/tools/virtiofsd/fuse_virtio.c
+++ b/tools/virtiofsd/fuse_virtio.c
@@ -49,6 +49,10 @@ typedef struct {
     VuVirtqElement elem;
     struct fuse_chan ch;
 
+    /* Number of unmappable iovecs */
+    unsigned bad_in_num;
+    unsigned bad_out_num;
+
     /* Used to complete requests that involve no reply */
     bool reply_sent;
 } FVRequest;
@@ -291,8 +295,10 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
 
     /* The 'in' part of the elem is to qemu */
     unsigned int in_num = elem->in_num;
+    unsigned int bad_in_num = req->bad_in_num;
     struct iovec *in_sg = elem->in_sg;
     size_t in_len = iov_size(in_sg, in_num);
+    size_t in_len_writeable = iov_size(in_sg, in_num - bad_in_num);
     fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n",
              __func__, elem->index, in_num, in_len);
 
@@ -300,7 +306,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
      * The elem should have room for a 'fuse_out_header' (out from fuse)
      * plus the data based on the len in the header.
      */
-    if (in_len < sizeof(struct fuse_out_header)) {
+    if (in_len_writeable < sizeof(struct fuse_out_header)) {
         fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n",
                  __func__, elem->index);
         ret = E2BIG;
@@ -327,7 +333,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch,
     memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num);
     /* These get updated as we skip */
     struct iovec *in_sg_ptr = in_sg_cpy;
-    int in_sg_cpy_count = in_num;
+    int in_sg_cpy_count = in_num - bad_in_num;
 
     /* skip over parts of in_sg that contained the header iov */
     size_t skip_size = iov_len;
@@ -460,17 +466,21 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
 
     /* The 'out' part of the elem is from qemu */
     unsigned int out_num = elem->out_num;
+    unsigned int out_num_readable = out_num - req->bad_out_num;
     struct iovec *out_sg = elem->out_sg;
     size_t out_len = iov_size(out_sg, out_num);
+    size_t out_len_readable = iov_size(out_sg, out_num_readable);
     fuse_log(FUSE_LOG_DEBUG,
-             "%s: elem %d: with %d out desc of length %zd\n",
-             __func__, elem->index, out_num, out_len);
+             "%s: elem %d: with %d out desc of length %zd"
+             " bad_in_num=%u bad_out_num=%u\n",
+             __func__, elem->index, out_num, out_len, req->bad_in_num,
+             req->bad_out_num);
 
     /*
      * The elem should contain a 'fuse_in_header' (in to fuse)
      * plus the data based on the len in the header.
      */
-    if (out_len < sizeof(struct fuse_in_header)) {
+    if (out_len_readable < sizeof(struct fuse_in_header)) {
         fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n",
                  __func__, elem->index);
         assert(0); /* TODO */
@@ -484,63 +494,129 @@ static void fv_queue_worker(gpointer data, gpointer user_data)
     copy_from_iov(&fbuf, 1, out_sg);
 
     pbufv = NULL; /* Compiler thinks an unitialised path */
-    if (out_num > 2 &&
-        out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
-        ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
-        out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
-        /*
-         * For a write we don't actually need to copy the
-         * data, we can just do it straight out of guest memory
-         * but we must still copy the headers in case the guest
-         * was nasty and changed them while we were using them.
-         */
-        fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__);
-
-        /* copy the fuse_write_in header afte rthe fuse_in_header */
-        fbuf.mem += out_sg->iov_len;
-        copy_from_iov(&fbuf, 1, out_sg + 1);
-        fbuf.mem -= out_sg->iov_len;
-        fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
-
-        /* Allocate the bufv, with space for the rest of the iov */
-        pbufv = malloc(sizeof(struct fuse_bufvec) +
-                       sizeof(struct fuse_buf) * (out_num - 2));
-        if (!pbufv) {
-            fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
-                    __func__);
-            goto out;
-        }
+    if (req->bad_in_num || req->bad_out_num) {
+        bool handled_unmappable = false;
+
+        if (out_num > 2 && out_num_readable >= 2 && !req->bad_in_num &&
+            out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+            ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+            out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+            handled_unmappable = true;
+
+            /* copy the fuse_write_in header after fuse_in_header */
+            fbuf.mem += out_sg->iov_len;
+            copy_from_iov(&fbuf, 1, out_sg + 1);
+            fbuf.mem -= out_sg->iov_len;
+            fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+            /* Allocate the bufv, with space for the rest of the iov */
+            pbufv = malloc(sizeof(struct fuse_bufvec) +
+                           sizeof(struct fuse_buf) * (out_num - 2));
+            if (!pbufv) {
+                fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+                        __func__);
+                goto out;
+            }
 
-        allocated_bufv = true;
-        pbufv->count = 1;
-        pbufv->buf[0] = fbuf;
+            allocated_bufv = true;
+            pbufv->count = 1;
+            pbufv->buf[0] = fbuf;
+
+            size_t iovindex, pbufvindex;
+            iovindex = 2; /* 2 headers, separate iovs */
+            pbufvindex = 1; /* 2 headers, 1 fusebuf */
+
+            for (; iovindex < out_num; iovindex++, pbufvindex++) {
+                pbufv->count++;
+                pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+                pbufv->buf[pbufvindex].flags =
+                    (iovindex < out_num_readable) ? 0 :
+                                                    FUSE_BUF_PHYS_ADDR;
+                pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+                pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+            }
+        }
 
-        size_t iovindex, pbufvindex;
-        iovindex = 2; /* 2 headers, separate iovs */
-        pbufvindex = 1; /* 2 headers, 1 fusebuf */
+        if (out_num == 2 && out_num_readable == 2 && req->bad_in_num &&
+            out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+            ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_READ &&
+            out_sg[1].iov_len == sizeof(struct fuse_read_in)) {
+            fuse_log(FUSE_LOG_DEBUG,
+                     "Unmappable read case "
+                     "in_num=%d bad_in_num=%d\n",
+                     elem->in_num, req->bad_in_num);
+            handled_unmappable = true;
+        }
 
-        for (; iovindex < out_num; iovindex++, pbufvindex++) {
-            pbufv->count++;
-            pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
-            pbufv->buf[pbufvindex].flags = 0;
-            pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
-            pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+        if (!handled_unmappable) {
+            fuse_log(FUSE_LOG_ERR,
+                     "Unhandled unmappable element: out: %d(b:%d) in: "
+                     "%d(b:%d)",
+                     out_num, req->bad_out_num, elem->in_num, req->bad_in_num);
+            fv_panic(dev, "Unhandled unmappable element");
         }
-    } else {
-        /* Normal (non fast write) path */
+    }
+
+    if (!req->bad_out_num) {
+        if (out_num > 2 &&
+            out_sg[0].iov_len == sizeof(struct fuse_in_header) &&
+            ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE &&
+            out_sg[1].iov_len == sizeof(struct fuse_write_in)) {
+            /*
+             * For a write we don't actually need to copy the
+             * data, we can just do it straight out of guest memory
+             * but we must still copy the headers in case the guest
+             * was nasty and changed them while we were using them.
+             */
+            fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n",
+                     __func__);
+
+            /* copy the fuse_write_in header after fuse_in_header */
+            fbuf.mem += out_sg->iov_len;
+            copy_from_iov(&fbuf, 1, out_sg + 1);
+            fbuf.mem -= out_sg->iov_len;
+            fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len;
+
+            /* Allocate the bufv, with space for the rest of the iov */
+            pbufv = malloc(sizeof(struct fuse_bufvec) +
+                           sizeof(struct fuse_buf) * (out_num - 2));
+            if (!pbufv) {
+                fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n",
+                        __func__);
+                goto out;
+            }
 
-        /* Copy the rest of the buffer */
-        fbuf.mem += out_sg->iov_len;
-        copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
-        fbuf.mem -= out_sg->iov_len;
-        fbuf.size = out_len;
+            allocated_bufv = true;
+            pbufv->count = 1;
+            pbufv->buf[0] = fbuf;
 
-        /* TODO! Endianness of header */
+            size_t iovindex, pbufvindex;
+            iovindex = 2; /* 2 headers, separate iovs */
+            pbufvindex = 1; /* 2 headers, 1 fusebuf */
 
-        /* TODO: Add checks for fuse_session_exited */
-        bufv.buf[0] = fbuf;
-        bufv.count = 1;
-        pbufv = &bufv;
+            for (; iovindex < out_num; iovindex++, pbufvindex++) {
+                pbufv->count++;
+                pbufv->buf[pbufvindex].pos = ~0; /* Dummy */
+                pbufv->buf[pbufvindex].flags = 0;
+                pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base;
+                pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len;
+            }
+        } else {
+            /* Normal (non fast write) path */
+
+            /* Copy the rest of the buffer */
+            fbuf.mem += out_sg->iov_len;
+            copy_from_iov(&fbuf, out_num - 1, out_sg + 1);
+            fbuf.mem -= out_sg->iov_len;
+            fbuf.size = out_len;
+
+            /* TODO! Endianness of header */
+
+            /* TODO: Add checks for fuse_session_exited */
+            bufv.buf[0] = fbuf;
+            bufv.count = 1;
+            pbufv = &bufv;
+        }
     }
     pbufv->idx = 0;
     pbufv->off = 0;
@@ -657,13 +733,16 @@ static void *fv_queue_thread(void *opaque)
                  __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes);
 
         while (1) {
+            unsigned int bad_in_num = 0, bad_out_num = 0;
             FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest),
-                                          NULL, NULL);
+                                          &bad_in_num, &bad_out_num);
             if (!req) {
                 break;
             }
 
             req->reply_sent = false;
+            req->bad_in_num = bad_in_num;
+            req->bad_out_num = bad_out_num;
 
             if (!se->thread_pool_size) {
                 req_list = g_list_prepend(req_list, req);
-- 
2.29.2



  parent reply	other threads:[~2021-02-09 19:31 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-09 19:02 [PATCH 00/24] virtiofs dax patches Dr. David Alan Gilbert (git)
2021-02-09 19:02 ` [PATCH 01/24] DAX: vhost-user: Rework slave return values Dr. David Alan Gilbert (git)
2021-02-11  9:59   ` Stefan Hajnoczi
2021-02-11 15:27     ` Vivek Goyal
2021-02-18 12:18     ` Dr. David Alan Gilbert
2021-02-09 19:02 ` [PATCH 02/24] DAX: libvhost-user: Route slave message payload Dr. David Alan Gilbert (git)
2021-02-11 10:05   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 03/24] DAX: libvhost-user: Allow popping a queue element with bad pointers Dr. David Alan Gilbert (git)
2021-02-11 10:12   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 04/24] DAX subprojects/libvhost-user: Add virtio-fs slave types Dr. David Alan Gilbert (git)
2021-02-11 10:16   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 05/24] DAX: virtio: Add shared memory capability Dr. David Alan Gilbert (git)
2021-02-11 10:17   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 06/24] DAX: virtio-fs: Add cache BAR Dr. David Alan Gilbert (git)
2021-02-11 10:25   ` Stefan Hajnoczi
2021-02-18 17:33     ` Dr. David Alan Gilbert
2021-02-09 19:02 ` [PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping Dr. David Alan Gilbert (git)
2021-02-11 10:32   ` Stefan Hajnoczi
2021-03-08 17:04     ` Dr. David Alan Gilbert
2021-02-15 10:35   ` [Virtio-fs] " Chirantan Ekbote
2021-02-15 13:25     ` Dr. David Alan Gilbert
2021-02-15 14:24     ` Vivek Goyal
2021-03-11 12:15     ` Dr. David Alan Gilbert
2021-03-11 13:50       ` Vivek Goyal
2021-03-11 18:52         ` Dr. David Alan Gilbert
2021-02-09 19:02 ` [PATCH 08/24] DAX: virtio-fs: Fill in " Dr. David Alan Gilbert (git)
2021-02-11 10:57   ` Stefan Hajnoczi
2021-02-18 10:59     ` Dr. David Alan Gilbert
2021-02-09 19:02 ` [PATCH 09/24] DAX: virtiofsd Add cache accessor functions Dr. David Alan Gilbert (git)
2021-02-11 12:31   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 10/24] DAX: virtiofsd: Add setup/remove mappings fuse commands Dr. David Alan Gilbert (git)
2021-02-11 12:37   ` Stefan Hajnoczi
2021-02-11 16:39     ` Dr. David Alan Gilbert
2021-02-11 18:30       ` Vivek Goyal
2021-02-11 19:50         ` Dr. David Alan Gilbert
2021-02-11 20:15           ` Vivek Goyal
2021-02-09 19:02 ` [PATCH 11/24] DAX: virtiofsd: Add setup/remove mapping handlers to passthrough_ll Dr. David Alan Gilbert (git)
2021-02-11 12:37   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 12/24] DAX: virtiofsd: Wire up passthrough_ll's lo_setupmapping Dr. David Alan Gilbert (git)
2021-02-11 12:41   ` Stefan Hajnoczi
2021-02-11 16:05   ` Vivek Goyal
2021-02-09 19:02 ` [PATCH 13/24] DAX: virtiofsd: Make lo_removemapping() work Dr. David Alan Gilbert (git)
2021-02-11 12:41   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 14/24] DAX: virtiofsd: route se down to destroy method Dr. David Alan Gilbert (git)
2021-02-11 12:42   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 15/24] DAX: virtiofsd: Perform an unmap on destroy Dr. David Alan Gilbert (git)
2021-02-11 12:42   ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 16/24] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO Dr. David Alan Gilbert (git)
2021-02-11 14:17   ` Stefan Hajnoczi
2021-03-16 19:59     ` Dr. David Alan Gilbert
2021-03-31 10:12       ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 17/24] DAX/unmap virtiofsd: Add wrappers for VHOST_USER_SLAVE_FS_IO Dr. David Alan Gilbert (git)
2021-02-11 14:18   ` Stefan Hajnoczi
2021-02-09 19:02 ` Dr. David Alan Gilbert (git) [this message]
2021-02-11 14:29   ` [PATCH 18/24] DAX/unmap virtiofsd: Parse unmappable elements Stefan Hajnoczi
2021-02-25 10:19     ` Dr. David Alan Gilbert
2021-03-31 10:14       ` Stefan Hajnoczi
2021-03-17 10:33     ` Dr. David Alan Gilbert
2021-02-09 19:02 ` [PATCH 19/24] DAX/unmap virtiofsd: Route unmappable reads Dr. David Alan Gilbert (git)
2021-02-09 19:02 ` [PATCH 20/24] DAX/unmap virtiofsd: route unmappable write to slave command Dr. David Alan Gilbert (git)
2021-02-09 19:02 ` [PATCH 21/24] DAX:virtiofsd: implement FUSE_INIT map_alignment field Dr. David Alan Gilbert (git)
2021-02-09 19:02 ` [PATCH 22/24] vhost-user-fs: Extend VhostUserFSSlaveMsg to pass additional info Dr. David Alan Gilbert (git)
2021-02-09 19:02 ` [PATCH 23/24] vhost-user-fs: Implement drop CAP_FSETID functionality Dr. David Alan Gilbert (git)
2021-02-11 14:35   ` Stefan Hajnoczi
2021-02-11 14:40     ` Vivek Goyal
2021-02-15 15:57       ` Stefan Hajnoczi
2021-02-16 15:57         ` Vivek Goyal
2021-02-22 16:53           ` Stefan Hajnoczi
2021-02-09 19:02 ` [PATCH 24/24] virtiofsd: Ask qemu to drop CAP_FSETID if client asked for it Dr. David Alan Gilbert (git)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210209190224.62827-19-dgilbert@redhat.com \
    --to=dgilbert@redhat.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=vgoyal@redhat.com \
    --cc=virtio-fs@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).