From: "Eugenio Pérez" <eperezma@redhat.com>
To: qemu-devel@nongnu.org
Cc: Cornelia Huck <cohuck@redhat.com>,
"Gonglei (Arei)" <arei.gonglei@huawei.com>,
Stefano Garzarella <sgarzare@redhat.com>,
Eli Cohen <eli@mellanox.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
Jason Wang <jasowang@redhat.com>,
Stefan Hajnoczi <stefanha@redhat.com>, Cindy Lu <lulu@redhat.com>,
Zhu Lingshan <lingshan.zhu@intel.com>,
Parav Pandit <parav@mellanox.com>,
Markus Armbruster <armbru@redhat.com>,
Liuxiangdong <liuxiangdong5@huawei.com>,
Laurent Vivier <lvivier@redhat.com>,
Gautam Dawar <gdawar@xilinx.com>, Eric Blake <eblake@redhat.com>,
Harpreet Singh Anand <hanand@xilinx.com>,
Paolo Bonzini <pbonzini@redhat.com>
Subject: [PATCH v3 16/19] vdpa: Buffer CVQ support on shadow virtqueue
Date: Fri, 15 Jul 2022 19:18:31 +0200 [thread overview]
Message-ID: <20220715171834.2666455-17-eperezma@redhat.com> (raw)
In-Reply-To: <20220715171834.2666455-1-eperezma@redhat.com>
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.
Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
TOCTOU with the guest's or device's memory every time there is a device
model change. Otherwise, the guest could change the memory content in
the time between qemu and the device read it.
To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented. If the virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.
More cvq commands could be added here straightforwardly but they have
not been tested.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
net/vhost-vdpa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 204 insertions(+), 7 deletions(-)
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2e3b6b10d8..3915b148c4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
NetClientState nc;
struct vhost_vdpa vhost_vdpa;
VHostNetState *vhost_net;
+
+ /* Control commands shadow buffers */
+ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
bool started;
} VhostVDPAState;
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
{
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+ qemu_vfree(s->cvq_cmd_out_buffer);
+ qemu_vfree(s->cvq_cmd_in_buffer);
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
.check_peer_type = vhost_vdpa_check_peer_type,
};
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
+{
+ VhostIOVATree *tree = v->iova_tree;
+ DMAMap needle = {
+ /*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+ .translated_addr = (hwaddr)(uintptr_t)addr,
+ };
+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+ int r;
+
+ if (unlikely(!map)) {
+ error_report("Cannot locate expected map");
+ return;
+ }
+
+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+ if (unlikely(r != 0)) {
+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+ }
+
+ vhost_iova_tree_remove(tree, map);
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+ /*
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+ * In buffer is always 1 byte, so it should fit here
+ */
+ return sizeof(struct virtio_net_ctrl_hdr) +
+ 2 * sizeof(struct virtio_net_ctrl_mac) +
+ MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
+/** Copy and map a guest buffer. */
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
+ const struct iovec *out_data,
+ size_t out_num, size_t data_len, void *buf,
+ size_t *written, bool write)
+{
+ DMAMap map = {};
+ int r;
+
+ if (unlikely(!data_len)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
+ __func__, write ? "in" : "out");
+ return false;
+ }
+
+ *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
+ map.translated_addr = (hwaddr)(uintptr_t)buf;
+ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
+ map.perm = write ? IOMMU_RW : IOMMU_RO,
+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
+ if (unlikely(r != IOVA_OK)) {
+ error_report("Cannot map injected element");
+ return false;
+ }
+
+ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
+ !write);
+ if (unlikely(r < 0)) {
+ goto dma_map_err;
+ }
+
+ return true;
+
+dma_map_err:
+ vhost_iova_tree_remove(v->iova_tree, &map);
+ return false;
+}
+
/**
- * Forward buffer for the moment.
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ *
+ * @iov: [0] is the out buffer, [1] is the in one
+ */
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
+ VirtQueueElement *elem,
+ struct iovec *iov)
+{
+ size_t in_copied;
+ bool ok;
+
+ iov[0].iov_base = s->cvq_cmd_out_buffer;
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
+ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
+ &iov[0].iov_len, false);
+ if (unlikely(!ok)) {
+ return false;
+ }
+
+ iov[1].iov_base = s->cvq_cmd_in_buffer;
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
+ sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
+ &in_copied, true);
+ if (unlikely(!ok)) {
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
+ return false;
+ }
+
+ iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
+ return true;
+}
+
+/**
+ * Do not forward commands not supported by SVQ. Otherwise, the device could
+ * accept it and qemu would not know how to update the device model.
+ */
+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
+ size_t out_num)
+{
+ struct virtio_net_ctrl_hdr ctrl;
+ size_t n;
+
+ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
+ if (unlikely(n < sizeof(ctrl))) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: invalid legnth of out buffer %zu\n", __func__, n);
+ return false;
+ }
+
+ switch (ctrl.class) {
+ case VIRTIO_NET_CTRL_MAC:
+ switch (ctrl.cmd) {
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+ return true;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
+ __func__, ctrl.cmd);
+ };
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
+ __func__, ctrl.class);
+ };
+
+ return false;
+}
+
+/**
+ * Validate and copy control virtqueue commands.
+ *
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
+ * prevent TOCTOU bugs.
*/
static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
VirtQueueElement *elem,
void *opaque)
{
- unsigned int n = elem->out_num + elem->in_num;
- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
+ VhostVDPAState *s = opaque;
size_t in_len, dev_written;
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ /* out and in buffers sent to the device */
+ struct iovec dev_buffers[2] = {
+ { .iov_base = s->cvq_cmd_out_buffer },
+ { .iov_base = s->cvq_cmd_in_buffer },
+ };
+ /* in buffer used for device model */
+ const struct iovec in = {
+ .iov_base = &status,
+ .iov_len = sizeof(status),
+ };
int r;
+ bool ok;
- memcpy(dev_buffers, elem->out_sg, elem->out_num);
- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
+ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
+ if (unlikely(!ok)) {
+ goto out;
+ }
- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
- elem->in_num, elem);
+ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
+ if (unlikely(!ok)) {
+ goto out;
+ }
+
+ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
if (unlikely(r != 0)) {
if (unlikely(r == -ENOSPC)) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
dev_written = vhost_svq_poll(svq);
if (unlikely(dev_written < sizeof(status))) {
error_report("Insufficient written data (%zu)", dev_written);
+ goto out;
+ }
+
+ memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
+ if (status != VIRTIO_NET_OK) {
+ goto out;
+ }
+
+ status = VIRTIO_NET_ERR;
+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
+ if (status != VIRTIO_NET_OK) {
+ error_report("Bad CVQ processing in model");
}
out:
@@ -234,6 +418,12 @@ out:
}
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
g_free(elem);
+ if (dev_buffers[0].iov_base) {
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
+ }
+ if (dev_buffers[1].iov_base) {
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
+ }
return r;
}
@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
s->vhost_vdpa.device_fd = vdpa_device_fd;
s->vhost_vdpa.index = queue_pair_index;
if (!is_datapath) {
+ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
+ vhost_vdpa_net_cvq_cmd_page_len());
+ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
+ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
+ vhost_vdpa_net_cvq_cmd_page_len());
+ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
+
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
s->vhost_vdpa.shadow_vq_ops_opaque = s;
}
--
2.31.1
next prev parent reply other threads:[~2022-07-15 17:38 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-15 17:18 [PATCH v3 00/19] vdpa net devices Rx filter change notification with Shadow VQ Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 01/19] vhost: move descriptor translation to vhost_svq_vring_write_descs Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 02/19] virtio-net: Expose MAC_TABLE_ENTRIES Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 03/19] virtio-net: Expose ctrl virtqueue logic Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 04/19] vhost: Reorder vhost_svq_kick Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 05/19] vhost: Move vhost_svq_kick call to vhost_svq_add Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 06/19] vhost: Check for queue full at vhost_svq_add Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 07/19] vhost: Decouple vhost_svq_add from VirtQueueElement Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 08/19] vhost: Add SVQDescState Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 09/19] vhost: Track number of descs in SVQDescState Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 10/19] vhost: add vhost_svq_push_elem Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 11/19] vhost: Expose vhost_svq_add Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 12/19] vhost: add vhost_svq_poll Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 13/19] vhost: Add svq avail_handler callback Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 14/19] vdpa: Export vhost_vdpa_dma_map and unmap calls Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 15/19] vdpa: manual forward CVQ buffers Eugenio Pérez
2022-07-15 17:18 ` Eugenio Pérez [this message]
2022-07-15 17:18 ` [PATCH v3 17/19] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 18/19] vdpa: Add device migration blocker Eugenio Pérez
2022-07-15 17:18 ` [PATCH v3 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions Eugenio Pérez
2022-07-18 3:32 ` [PATCH v3 00/19] vdpa net devices Rx filter change notification with Shadow VQ Jason Wang
2022-07-19 11:28 ` Michael S. Tsirkin
2022-07-19 11:28 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220715171834.2666455-17-eperezma@redhat.com \
--to=eperezma@redhat.com \
--cc=arei.gonglei@huawei.com \
--cc=armbru@redhat.com \
--cc=cohuck@redhat.com \
--cc=eblake@redhat.com \
--cc=eli@mellanox.com \
--cc=gdawar@xilinx.com \
--cc=hanand@xilinx.com \
--cc=jasowang@redhat.com \
--cc=lingshan.zhu@intel.com \
--cc=liuxiangdong5@huawei.com \
--cc=lulu@redhat.com \
--cc=lvivier@redhat.com \
--cc=mst@redhat.com \
--cc=parav@mellanox.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=sgarzare@redhat.com \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).