From: JinYu <jin.yu@intel.com>
To: dev@dpdk.org
Cc: changpeng.liu@intel.com, tiwei.bie@intel.com,
zhihong.wang@intel.com, maxime.coquelin@redhat.com,
JinYu <jin.yu@intel.com>, Lin Li <lilin24@baidu.com>,
Xun Ni <nixun@baidu.com>, Yu Zhang <zhangyu31@baidu.com>
Subject: [dpdk-dev] [PATCH v5 1/2] vhost: support inflight share memory protocol feature
Date: Wed, 7 Aug 2019 02:24:59 +0800 [thread overview]
Message-ID: <20190806182500.22320-2-jin.yu@intel.com> (raw)
In-Reply-To: <20190806182500.22320-1-jin.yu@intel.com>
This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
buffer between qemu and backend.
Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
shared buffer from backend. Then qemu should send it back
through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
This shared buffer is used to process inflight I/O when backend
reconnect.
Signed-off-by: Lin Li <lilin24@baidu.com>
Signed-off-by: Xun Ni <nixun@baidu.com>
Signed-off-by: Yu Zhang <zhangyu31@baidu.com>
Signed-off-by: JinYu <jin.yu@intel.com>
---
v1 - specify the APIs are split-ring only
v2 - fix APIs and judge split or packed
v3 - Add rte_vhost_ prefix and fix one issue.
v4 - add the packed ring support
v5 - revise get_vring_base func depend on Tiwei's suggestion
---
lib/librte_vhost/rte_vhost.h | 255 ++++++++++++++-
lib/librte_vhost/rte_vhost_version.map | 12 +
lib/librte_vhost/vhost.c | 396 +++++++++++++++++++++-
lib/librte_vhost/vhost.h | 61 ++--
lib/librte_vhost/vhost_user.c | 437 ++++++++++++++++++++++++-
lib/librte_vhost/vhost_user.h | 13 +-
6 files changed, 1128 insertions(+), 46 deletions(-)
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 0226b3eff..3f01429b1 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -11,6 +11,7 @@
*/
#include <stdint.h>
+#include <stdbool.h>
#include <sys/eventfd.h>
#include <rte_memory.h>
@@ -71,6 +72,10 @@ extern "C" {
#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
#endif
+#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
+#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
+#endif
+
/** Indicate whether protocol features negotiation is supported. */
#ifndef VHOST_USER_F_PROTOCOL_FEATURES
#define VHOST_USER_F_PROTOCOL_FEATURES 30
@@ -98,10 +103,92 @@ struct rte_vhost_memory {
struct rte_vhost_mem_region regions[];
};
+struct inflight_desc_packed {
+ uint8_t inflight;
+ uint8_t padding;
+ uint16_t next;
+ uint16_t last;
+ uint16_t num;
+ uint64_t counter;
+ uint16_t id;
+ uint16_t flags;
+ uint32_t len;
+ uint64_t addr;
+};
+
+struct inflight_info_packed {
+ uint64_t features;
+ uint16_t version;
+ uint16_t desc_num;
+ uint16_t free_head;
+ uint16_t old_free_head;
+ uint16_t used_idx;
+ uint16_t old_used_idx;
+ uint8_t used_wrap_counter;
+ uint8_t old_used_wrap_counter;
+ uint8_t padding[7];
+ struct inflight_desc_packed desc[0];
+};
+
+struct rte_vhost_resubmit_desc {
+ uint16_t index;
+ uint64_t counter;
+};
+
+struct rte_vhost_resubmit_info {
+ struct rte_vhost_resubmit_desc *resubmit_list;
+ uint16_t resubmit_num;
+};
+
+struct rte_vhost_ring_inflight {
+ union {
+ struct inflight_info_split *inflight_split;
+ struct inflight_info_packed *inflight_packed;
+ };
+
+ struct rte_vhost_resubmit_info *resubmit_inflight;
+};
+
+/*
+ * Declare below packed ring defines unconditionally
+ * as Kernel header might use different names.
+ */
+#ifndef VIRTIO_F_RING_PACKED
+#define VIRTIO_F_RING_PACKED 34
+
+#define VRING_DESC_F_AVAIL (1ULL << 7)
+#define VRING_DESC_F_USED (1ULL << 15)
+
+struct vring_packed_desc {
+ uint64_t addr;
+ uint32_t len;
+ uint16_t id;
+ uint16_t flags;
+};
+
+#define VRING_EVENT_F_ENABLE 0x0
+#define VRING_EVENT_F_DISABLE 0x1
+#define VRING_EVENT_F_DESC 0x2
+struct vring_packed_desc_event {
+ uint16_t off_wrap;
+ uint16_t flags;
+};
+#endif
+
struct rte_vhost_vring {
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
+ union {
+ struct vring_desc *desc;
+ struct vring_packed_desc *desc_packed;
+ };
+ union {
+ struct vring_avail *avail;
+ struct vring_packed_desc_event *driver_event;
+ };
+ union {
+ struct vring_used *used;
+ struct vring_packed_desc_event *device_event;
+ };
+
uint64_t log_guest_addr;
/** Deprecated, use rte_vhost_vring_call() instead. */
@@ -603,6 +690,33 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
*/
int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
+/**
+ * Get vq is packed
+ *
+ * @param vid
+ * vhost device ID
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_vq_is_packed(int vid);
+
+/**
+ * Get guest inflight vring info, including inflight ring and resubmit list.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param vring
+ * the structure to hold the requested inflight vring info
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
+ struct rte_vhost_ring_inflight *vring);
+
/**
* Get guest vring info, including the vring address, vring size, etc.
*
@@ -616,7 +730,7 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
* 0 on success, -1 on failure
*/
int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
- struct rte_vhost_vring *vring);
+ struct rte_vhost_vring *vring);
/**
* Notify the guest that used descriptors have been added to the vring. This
@@ -631,6 +745,112 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
*/
int rte_vhost_vring_call(int vid, uint16_t vring_idx);
+/**
+ * set split inflight descriptor.
+ *
+ * This function save descriptors that has been comsumed in available
+ * ring
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param idx
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
+ uint16_t idx);
+
+/**
+ * set packed inflight descriptor and get corresponding inflight entry
+ *
+ * This function save descriptors that has been comsumed
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param idx
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
+ uint16_t head, uint16_t last, uint16_t *inflight_entry);
+
+/**
+ * save the head of list that the last batch of used descriptors.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param idx
+ * descriptor entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_set_last_inflight_io_split(int vid,
+ uint16_t vring_idx, uint16_t idx);
+
+/**
+ * update the inflight free_head, used_idx and used_wrap_counter.
+ *
+ * This function will update status first before updating descriptors
+ * to used
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param idx
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_set_last_inflight_io_packed(int vid,
+ uint16_t vring_idx, uint16_t head);
+
+/**
+ * clear the split inflight status.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param last_used_idx
+ * last used idx of used ring
+ * @param idx
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
+ uint16_t last_used_idx, uint16_t idx);
+
+/**
+ * clear the packed inflight status.
+ *
+ * @param vid
+ * vhost device ID
+ * @param vring_idx
+ * vring index
+ * @param head
+ * inflight entry index
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
+ uint16_t head);
+
/**
* Get vhost RX queue avail count.
*
@@ -656,7 +876,8 @@ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
* 0 on success, -1 on failure
*/
int __rte_experimental
-rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
+rte_vhost_get_log_base(int vid, uint64_t *log_base,
+ uint64_t *log_size);
/**
* Get last_avail/used_idx of the vhost virtqueue
@@ -676,6 +897,28 @@ int __rte_experimental
rte_vhost_get_vring_base(int vid, uint16_t queue_id,
uint16_t *last_avail_idx, uint16_t *last_used_idx);
+/**
+ * Get last_avail/last_used of the vhost virtqueue
+ *
+ * This function is designed for the reconnection and it's specific for
+ * the packed ring as we can get the two parameters from the inflight
+ * queueregion
+ *
+ * @param vid
+ * vhost device ID
+ * @param queue_id
+ * vhost queue index
+ * @param last_avail_idx
+ * vhost last_avail_idx to get
+ * @param last_used_idx
+ * vhost last_used_idx to get
+ * @return
+ * 0 on success, -1 on failure
+ */
+int __rte_experimental
+rte_vhost_get_vring_base_from_inflight(int vid,
+ uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx);
+
/**
* Set last_avail/used_idx of the vhost virtqueue
*
@@ -692,7 +935,7 @@ rte_vhost_get_vring_base(int vid, uint16_t queue_id,
*/
int __rte_experimental
rte_vhost_set_vring_base(int vid, uint16_t queue_id,
- uint16_t last_avail_idx, uint16_t last_used_idx);
+ uint16_t last_avail_idx, uint16_t last_used_idx);
/**
* Register external message handling callbacks
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index 5f1d4a75c..99f1134ea 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -87,4 +87,16 @@ EXPERIMENTAL {
rte_vdpa_relay_vring_used;
rte_vhost_extern_callback_register;
rte_vhost_driver_set_protocol_features;
+ rte_vhost_set_inflight_desc_split;
+ rte_vhost_clr_inflight_desc_split;
+ rte_vhost_set_last_inflight_io_split;
+ rte_vhost_get_vhost_ring_inflight;
+ rte_vhost_vq_is_packed;
+ rte_vhost_set_inflight_desc_packed;
+ rte_vhost_clr_inflight_desc_packed;
+ rte_vhost_set_last_inflight_io_packed;
+ rte_vhost_get_vring_base_counter;
+ rte_vhost_get_vring_base_from_inflight;
+ rte_vhost_get_vring_base_counter_from_inflight;
+ rte_vhost_set_vring_base_counter;
};
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 981837b5d..751817d30 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -242,6 +242,31 @@ cleanup_vq(struct vhost_virtqueue *vq, int destroy)
close(vq->kickfd);
}
+void
+cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ if (!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+ return;
+
+ if (vq_is_packed(dev)) {
+ if (vq->inflight_packed)
+ vq->inflight_packed = NULL;
+ } else {
+ if (vq->inflight_split)
+ vq->inflight_split = NULL;
+ }
+
+ if (vq->resubmit_inflight) {
+ if (vq->resubmit_inflight->resubmit_list) {
+ free(vq->resubmit_inflight->resubmit_list);
+ vq->resubmit_inflight->resubmit_list = NULL;
+ }
+ free(vq->resubmit_inflight);
+ vq->resubmit_inflight = NULL;
+ }
+}
+
/*
* Unmap any memory, close any file descriptors and
* free any memory owned by a device.
@@ -253,8 +278,10 @@ cleanup_device(struct virtio_net *dev, int destroy)
vhost_backend_cleanup(dev);
- for (i = 0; i < dev->nr_vring; i++)
+ for (i = 0; i < dev->nr_vring; i++) {
cleanup_vq(dev->virtqueue[i], destroy);
+ cleanup_vq_inflight(dev, dev->virtqueue[i]);
+ }
}
void
@@ -726,6 +753,41 @@ rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
return 0;
}
+int
+rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
+ struct rte_vhost_ring_inflight *vring)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (dev == NULL || vring == NULL)
+ return -1;
+
+ if (vring_idx >= VHOST_MAX_VRING)
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ if (vq_is_packed(dev)) {
+ if (unlikely(!vq->inflight_packed))
+ return -1;
+
+ vring->inflight_packed = vq->inflight_packed;
+ } else {
+ if (unlikely(!vq->inflight_split))
+ return -1;
+
+ vring->inflight_split = vq->inflight_split;
+ }
+
+ vring->resubmit_inflight = vq->resubmit_inflight;
+
+ return 0;
+}
+
int
rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
struct rte_vhost_vring *vring)
@@ -744,11 +806,17 @@ rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
if (!vq)
return -1;
- vring->desc = vq->desc;
- vring->avail = vq->avail;
- vring->used = vq->used;
- vring->log_guest_addr = vq->log_guest_addr;
+ if (vq_is_packed(dev)) {
+ vring->desc_packed = vq->desc_packed;
+ vring->driver_event = vq->driver_event;
+ vring->device_event = vq->device_event;
+ } else {
+ vring->desc = vq->desc;
+ vring->avail = vq->avail;
+ vring->used = vq->used;
+ }
+ vring->log_guest_addr = vq->log_guest_addr;
vring->callfd = vq->callfd;
vring->kickfd = vq->kickfd;
vring->size = vq->size;
@@ -781,6 +849,269 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
return 0;
}
+int
+rte_vhost_vq_is_packed(int vid)
+{
+ struct virtio_net *dev;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ return vq_is_packed(dev);
+}
+
+int
+rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx, uint16_t idx)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ if (unlikely(!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
+ return 0;
+
+ if (unlikely(vq_is_packed(dev)))
+ return -1;
+
+ if (unlikely(vring_idx >= VHOST_MAX_VRING))
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ if (unlikely(!vq->inflight_split))
+ return -1;
+
+ vq->inflight_split->desc[idx].counter = vq->global_counter++;
+ vq->inflight_split->desc[idx].inflight = 1;
+ return 0;
+}
+
+int
+rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
+ uint16_t head, uint16_t last, uint16_t *inflight_entry)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+ struct inflight_info_packed *inflight_info;
+ struct vring_packed_desc *desc;
+ uint16_t old_free_head, free_head;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ if (unlikely(!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
+ return 0;
+
+ if (unlikely(!vq_is_packed(dev)))
+ return -1;
+
+ if (unlikely(vring_idx >= VHOST_MAX_VRING))
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ inflight_info = vq->inflight_packed;
+ if (unlikely(!inflight_info))
+ return -1;
+
+ desc = vq->desc_packed;
+ old_free_head = inflight_info->old_free_head;
+ free_head = old_free_head;
+
+ /* init header descriptor */
+ inflight_info->desc[old_free_head].num = 0;
+ inflight_info->desc[old_free_head].counter = vq->global_counter++;
+ inflight_info->desc[old_free_head].inflight = 1;
+
+ /* save desc entry in flight entry */
+ while (head != ((last + 1) % vq->size)) {
+ inflight_info->desc[old_free_head].num++;
+ inflight_info->desc[free_head].addr = desc[head].addr;
+ inflight_info->desc[free_head].len = desc[head].len;
+ inflight_info->desc[free_head].flags = desc[head].flags;
+ inflight_info->desc[free_head].id = desc[head].id;
+
+ inflight_info->desc[old_free_head].last = free_head;
+ free_head = inflight_info->desc[free_head].next;
+ inflight_info->free_head = free_head;
+ head = (head + 1) % vq->size;
+ }
+
+ inflight_info->old_free_head = free_head;
+ *inflight_entry = old_free_head;
+
+ return 0;
+}
+
+int
+rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
+ uint16_t last_used_idx, uint16_t idx)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ if (unlikely(!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
+ return 0;
+
+ if (unlikely(vq_is_packed(dev)))
+ return -1;
+
+ if (unlikely(vring_idx >= VHOST_MAX_VRING))
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ if (unlikely(!vq->inflight_split))
+ return -1;
+
+ rte_compiler_barrier();
+
+ vq->inflight_split->desc[idx].inflight = 0;
+
+ rte_compiler_barrier();
+
+ vq->inflight_split->used_idx = last_used_idx;
+ return 0;
+}
+
+int
+rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
+ uint16_t head)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+ struct inflight_info_packed *inflight_info;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ if (unlikely(!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
+ return 0;
+
+ if (unlikely(!vq_is_packed(dev)))
+ return -1;
+
+ if (unlikely(vring_idx >= VHOST_MAX_VRING))
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ inflight_info = vq->inflight_packed;
+ if (unlikely(!inflight_info))
+ return -1;
+
+ rte_compiler_barrier();
+
+ inflight_info->desc[head].inflight = 0;
+
+ rte_compiler_barrier();
+
+ inflight_info->old_free_head = inflight_info->free_head;
+ inflight_info->old_used_idx = inflight_info->used_idx;
+ inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
+
+ return 0;
+}
+
+int
+rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
+ uint16_t idx)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ if (unlikely(!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
+ return 0;
+
+ if (unlikely(vq_is_packed(dev)))
+ return -1;
+
+ if (unlikely(vring_idx >= VHOST_MAX_VRING))
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ if (unlikely(!vq->inflight_split))
+ return -1;
+
+ vq->inflight_split->last_inflight_io = idx;
+ return 0;
+}
+
+int
+rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
+ uint16_t head)
+{
+ struct virtio_net *dev;
+ struct vhost_virtqueue *vq;
+ struct inflight_info_packed *inflight_info;
+ uint16_t last;
+
+ dev = get_device(vid);
+ if (unlikely(!dev))
+ return -1;
+
+ if (unlikely(!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
+ return 0;
+
+ if (unlikely(!vq_is_packed(dev)))
+ return -1;
+
+ if (unlikely(vring_idx >= VHOST_MAX_VRING))
+ return -1;
+
+ vq = dev->virtqueue[vring_idx];
+ if (unlikely(!vq))
+ return -1;
+
+ inflight_info = vq->inflight_packed;
+ if (unlikely(!inflight_info))
+ return -1;
+
+ last = inflight_info->desc[head].last;
+
+ inflight_info->desc[last].next = inflight_info->free_head;
+ inflight_info->free_head = head;
+ inflight_info->used_idx += inflight_info->desc[head].num;
+ if (inflight_info->used_idx >= inflight_info->desc_num) {
+ inflight_info->used_idx -= inflight_info->desc_num;
+ inflight_info->used_wrap_counter =
+ !inflight_info->used_wrap_counter;
+ }
+
+ return 0;
+}
+
uint16_t
rte_vhost_avail_entries(int vid, uint16_t queue_id)
{
@@ -939,13 +1270,48 @@ int rte_vhost_get_log_base(int vid, uint64_t *log_base,
int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
uint16_t *last_avail_idx, uint16_t *last_used_idx)
{
+ struct vhost_virtqueue *vq;
struct virtio_net *dev = get_device(vid);
if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
return -1;
- *last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
- *last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
+ vq = dev->virtqueue[queue_id];
+ if (!vq)
+ return -1;
+
+ if (vq_is_packed(dev)) {
+ *last_avail_idx = (vq->avail_wrap_counter << 15) |
+ vq->last_avail_idx;
+ *last_used_idx = (vq->used_wrap_counter << 15) |
+ vq->last_used_idx;
+ } else {
+ *last_avail_idx = vq->last_avail_idx;
+ *last_used_idx = vq->last_used_idx;
+ }
+
+ return 0;
+}
+
+int rte_vhost_get_vring_base_from_inflight(int vid,
+ uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx)
+{
+ struct inflight_info_packed *inflight_info;
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
+ return -1;
+
+ if (!vq_is_packed(dev))
+ return -1;
+
+ inflight_info = dev->virtqueue[queue_id]->inflight_packed;
+ if (!inflight_info)
+ return -1;
+
+ *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
+ inflight_info->old_used_idx;
+ *last_used_idx = *last_avail_idx;
return 0;
}
@@ -953,13 +1319,25 @@ int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
uint16_t last_avail_idx, uint16_t last_used_idx)
{
+ struct vhost_virtqueue *vq;
struct virtio_net *dev = get_device(vid);
if (!dev)
return -1;
- dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
- dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
+ vq = dev->virtqueue[queue_id];
+ if (!vq)
+ return -1;
+
+ if (vq_is_packed(dev)) {
+ vq->last_avail_idx = last_avail_idx & 0x7fff;
+ vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
+ vq->last_used_idx = last_used_idx & 0x7fff;
+ vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
+ } else {
+ vq->last_avail_idx = last_avail_idx;
+ vq->last_used_idx = last_used_idx;
+ }
return 0;
}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 884befa85..e9d0b983d 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -88,6 +88,22 @@ struct vring_used_elem_packed {
uint32_t count;
};
+struct inflight_desc_split {
+ uint8_t inflight;
+ uint8_t padding[5];
+ uint16_t next;
+ uint64_t counter;
+};
+
+struct inflight_info_split {
+ uint64_t features;
+ uint16_t version;
+ uint16_t desc_num;
+ uint16_t last_inflight_io;
+ uint16_t used_idx;
+ struct inflight_desc_split desc[0];
+};
+
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
@@ -128,6 +144,14 @@ struct vhost_virtqueue {
/* Physical address of used ring, for logging */
uint64_t log_guest_addr;
+ /* inflight share memory info */
+ union {
+ struct inflight_info_split *inflight_split;
+ struct inflight_info_packed *inflight_packed;
+ };
+ struct rte_vhost_resubmit_info *resubmit_inflight;
+ uint64_t global_counter;
+
uint16_t nr_zmbuf;
uint16_t zmbuf_size;
uint16_t last_zmbuf_idx;
@@ -215,35 +239,6 @@ struct vhost_msg {
#define VIRTIO_F_VERSION_1 32
#endif
-/* Declare packed ring related bits for older kernels */
-#ifndef VIRTIO_F_RING_PACKED
-
-#define VIRTIO_F_RING_PACKED 34
-
-struct vring_packed_desc {
- uint64_t addr;
- uint32_t len;
- uint16_t id;
- uint16_t flags;
-};
-
-struct vring_packed_desc_event {
- uint16_t off_wrap;
- uint16_t flags;
-};
-#endif
-
-/*
- * Declare below packed ring defines unconditionally
- * as Kernel header might use different names.
- */
-#define VRING_DESC_F_AVAIL (1ULL << 7)
-#define VRING_DESC_F_USED (1ULL << 15)
-
-#define VRING_EVENT_F_ENABLE 0x0
-#define VRING_EVENT_F_DISABLE 0x1
-#define VRING_EVENT_F_DESC 0x2
-
/*
* Available and used descs are in same order
*/
@@ -286,6 +281,12 @@ struct guest_page {
uint64_t size;
};
+struct inflight_mem_info {
+ int fd;
+ void *addr;
+ uint64_t size;
+};
+
/**
* Device structure contains all configuration information relating
* to the device.
@@ -303,6 +304,7 @@ struct virtio_net {
uint32_t nr_vring;
int dequeue_zero_copy;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
+ struct inflight_mem_info inflight_info;
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
char ifname[IF_NAME_SZ];
uint64_t log_size;
@@ -467,6 +469,7 @@ void vhost_destroy_device(int);
void vhost_destroy_device_notify(struct virtio_net *dev);
void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
+void cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq);
void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index c9e29ece8..4984dd874 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -31,6 +31,8 @@
#include <sys/stat.h>
#include <sys/syscall.h>
#include <assert.h>
+#include <sys/syscall.h>
+#include <asm/unistd.h>
#ifdef RTE_LIBRTE_VHOST_NUMA
#include <numaif.h>
#endif
@@ -49,6 +51,15 @@
#define VIRTIO_MIN_MTU 68
#define VIRTIO_MAX_MTU 65535
+#define INFLIGHT_ALIGNMENT 64
+#define INFLIGHT_VERSION 0xabcd
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define CLOEXEC 0x0001U
+
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
@@ -78,6 +89,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
[VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN",
[VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END",
+ [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD",
+ [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD",
};
static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
@@ -160,6 +173,16 @@ vhost_backend_cleanup(struct virtio_net *dev)
dev->log_addr = 0;
}
+ if (dev->inflight_info.addr) {
+ munmap(dev->inflight_info.addr, dev->inflight_info.size);
+ dev->inflight_info.addr = NULL;
+ }
+
+ if (dev->inflight_info.fd > 0) {
+ close(dev->inflight_info.fd);
+ dev->inflight_info.fd = -1;
+ }
+
if (dev->slave_req_fd >= 0) {
close(dev->slave_req_fd);
dev->slave_req_fd = -1;
@@ -306,6 +329,7 @@ vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg,
dev->virtqueue[dev->nr_vring] = NULL;
cleanup_vq(vq, 1);
+ cleanup_vq_inflight(dev, vq);
free_vq(dev, vq);
}
}
@@ -616,7 +640,6 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
dev->vid);
return dev;
}
-
return dev;
}
@@ -1165,6 +1188,219 @@ virtio_is_ready(struct virtio_net *dev)
return 1;
}
+static int mem_create(const char *name, unsigned int flags)
+{
+#ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+#else
+ return -1;
+#endif
+}
+
+void *inflight_mem_alloc(const char *name, size_t size, int *fd)
+{
+ void *ptr;
+ int mfd = -1;
+ char fname[20] = "/tmp/memfd-XXXXXX";
+
+ *fd = -1;
+ mfd = mem_create(name, CLOEXEC);
+ if (mfd != -1) {
+ if (ftruncate(mfd, size) == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "ftruncate fail for alloc inflight buffer\n");
+ close(mfd);
+ return NULL;
+ }
+ } else {
+ mfd = mkstemp(fname);
+ unlink(fname);
+
+ if (mfd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mkstemp fail for alloc inflight buffer\n");
+ return NULL;
+ }
+
+ if (ftruncate(mfd, size) == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "ftruncate fail for alloc inflight buffer\n");
+ close(mfd);
+ return NULL;
+ }
+ }
+
+ ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (ptr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mmap fail for alloc inflight buffer\n");
+ close(mfd);
+ return NULL;
+ }
+
+ *fd = mfd;
+ return ptr;
+}
+
+static uint32_t get_pervq_shm_size_split(uint16_t queue_size)
+{
+ return ALIGN_UP(sizeof(struct inflight_desc_split) * queue_size +
+ sizeof(uint64_t) + sizeof(uint16_t) * 4, INFLIGHT_ALIGNMENT);
+}
+
+static uint32_t get_pervq_shm_size_packed(uint16_t queue_size)
+{
+ return ALIGN_UP(sizeof(struct inflight_desc_packed) * queue_size +
+ sizeof(uint64_t) + sizeof(uint16_t) * 6 + sizeof(uint8_t) * 9,
+ INFLIGHT_ALIGNMENT);
+}
+
+static int
+vhost_user_get_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ int fd, i, j;
+ uint64_t pervq_inflight_size, mmap_size;
+ void *addr;
+ uint16_t num_queues, queue_size;
+ struct virtio_net *dev = *pdev;
+ struct inflight_info_packed *inflight_packed = NULL;
+
+ if (msg->size != sizeof(msg->payload.inflight)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Invalid get_inflight_fd message size is %d",
+ msg->size);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ num_queues = msg->payload.inflight.num_queues;
+ queue_size = msg->payload.inflight.queue_size;
+
+ RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n",
+ msg->payload.inflight.num_queues);
+ RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n",
+ msg->payload.inflight.queue_size);
+
+ if (vq_is_packed(dev))
+ pervq_inflight_size = get_pervq_shm_size_packed(queue_size);
+ else
+ pervq_inflight_size = get_pervq_shm_size_split(queue_size);
+
+ mmap_size = num_queues * pervq_inflight_size;
+ addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd);
+ if (!addr) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to alloc vhost inflight area");
+ msg->payload.inflight.mmap_size = 0;
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ memset(addr, 0, mmap_size);
+
+ dev->inflight_info.addr = addr;
+ dev->inflight_info.size = msg->payload.inflight.mmap_size = mmap_size;
+ dev->inflight_info.fd = msg->fds[0] = fd;
+ msg->payload.inflight.mmap_offset = 0;
+ msg->fd_num = 1;
+
+ if (vq_is_packed(dev)) {
+ for (i = 0; i < num_queues; i++) {
+ inflight_packed = (struct inflight_info_packed *)addr;
+ inflight_packed->used_wrap_counter = 1;
+ inflight_packed->old_used_wrap_counter = 1;
+ for (j = 0; j < queue_size; j++)
+ inflight_packed->desc[j].next = j + 1;
+ addr = (void *)((char *)addr + pervq_inflight_size);
+ }
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "send inflight mmap_size: %lu\n",
+ msg->payload.inflight.mmap_size);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "send inflight mmap_offset: %lu\n",
+ msg->payload.inflight.mmap_offset);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "send inflight fd: %d\n", msg->fds[0]);
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ int fd, i;
+ uint64_t mmap_size, mmap_offset;
+ uint16_t num_queues, queue_size;
+ uint32_t pervq_inflight_size;
+ void *addr;
+ struct vhost_virtqueue *vq;
+ struct virtio_net *dev = *pdev;
+
+ fd = msg->fds[0];
+ if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Invalid set_inflight_fd message size is %d,fd is %d\n",
+ msg->size, fd);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ mmap_size = msg->payload.inflight.mmap_size;
+ mmap_offset = msg->payload.inflight.mmap_offset;
+ num_queues = msg->payload.inflight.num_queues;
+ queue_size = msg->payload.inflight.queue_size;
+
+ if (vq_is_packed(dev))
+ pervq_inflight_size = get_pervq_shm_size_packed(queue_size);
+ else
+ pervq_inflight_size = get_pervq_shm_size_split(queue_size);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd mmap_size: %lu\n", mmap_size);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd mmap_offset: %lu\n", mmap_offset);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd num_queues: %u\n", num_queues);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd queue_size: %u\n", queue_size);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd fd: %d\n", fd);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd pervq_inflight_size: %d\n",
+ pervq_inflight_size);
+
+ if (dev->inflight_info.addr)
+ munmap(dev->inflight_info.addr, dev->inflight_info.size);
+
+ addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, mmap_offset);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ if (dev->inflight_info.fd)
+ close(dev->inflight_info.fd);
+
+ dev->inflight_info.fd = fd;
+ dev->inflight_info.addr = addr;
+ dev->inflight_info.size = mmap_size;
+
+ for (i = 0; i < num_queues; i++) {
+ vq = dev->virtqueue[i];
+ if (vq_is_packed(dev)) {
+ vq->inflight_packed = addr;
+ vq->inflight_packed->desc_num = queue_size;
+ } else {
+ vq->inflight_split = addr;
+ vq->inflight_split->desc_num = queue_size;
+ }
+ addr = (void *)((char *)addr + pervq_inflight_size);
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
static int
vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg,
int main_fd __rte_unused)
@@ -1201,6 +1437,189 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
return RTE_VHOST_MSG_RESULT_OK;
}
+static int
+resubmit_desc_compare(const void *a, const void *b)
+{
+ const struct rte_vhost_resubmit_desc *desc0 =
+ (const struct rte_vhost_resubmit_desc *)a;
+ const struct rte_vhost_resubmit_desc *desc1 =
+ (const struct rte_vhost_resubmit_desc *)b;
+
+ if (desc1->counter > desc0->counter &&
+ (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2)
+ return 1;
+
+ return -1;
+}
+
+static int
+vhost_check_queue_inflights_split(struct virtio_net *dev,
+ struct vhost_virtqueue *vq)
+{
+ uint16_t i = 0;
+ uint16_t resubmit_num = 0, last_io, num;
+ struct vring_used *used = vq->used;
+ struct rte_vhost_resubmit_info *resubmit = NULL;
+ struct inflight_info_split *inflight_split;
+
+ if (!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ if ((!vq->inflight_split))
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ if (!vq->inflight_split->version) {
+ vq->inflight_split->version = INFLIGHT_VERSION;
+ return RTE_VHOST_MSG_RESULT_OK;
+ }
+
+ inflight_split = vq->inflight_split;
+ vq->resubmit_inflight = NULL;
+ vq->global_counter = 0;
+ last_io = inflight_split->last_inflight_io;
+
+ if (inflight_split->used_idx != used->idx) {
+ inflight_split->desc[last_io].inflight = 0;
+ rte_compiler_barrier();
+ inflight_split->used_idx = used->idx;
+ }
+
+ for (i = 0; i < inflight_split->desc_num; i++) {
+ if (inflight_split->desc[i].inflight == 1)
+ resubmit_num++;
+ }
+
+ vq->last_avail_idx += resubmit_num;
+
+ if (resubmit_num) {
+ resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+ if (!resubmit) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for resubmit info.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ resubmit->resubmit_list = calloc(resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc));
+ if (!resubmit->resubmit_list) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for inflight desc.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ num = 0;
+ for (i = 0; i < vq->inflight_split->desc_num; i++) {
+ if (vq->inflight_split->desc[i].inflight == 1) {
+ resubmit->resubmit_list[num].index = i;
+ resubmit->resubmit_list[num].counter =
+ inflight_split->desc[i].counter;
+ num++;
+ }
+ }
+ resubmit->resubmit_num = num;
+
+ if (resubmit->resubmit_num > 1)
+ qsort(resubmit->resubmit_list, resubmit->resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc),
+ resubmit_desc_compare);
+
+ vq->global_counter = resubmit->resubmit_list[0].counter + 1;
+ vq->resubmit_inflight = resubmit;
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+static int
+vhost_check_queue_inflights_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq)
+{
+ uint16_t i = 0;
+ uint16_t resubmit_num = 0, old_used_idx, num;
+ struct rte_vhost_resubmit_info *resubmit = NULL;
+ struct inflight_info_packed *inflight_packed;
+
+ if (!(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
+ return RTE_VHOST_MSG_RESULT_OK;
+
+ if (!vq->inflight_packed->version) {
+ vq->inflight_packed->version = INFLIGHT_VERSION;
+ return RTE_VHOST_MSG_RESULT_OK;
+ }
+
+ if ((!vq->inflight_packed))
+ return RTE_VHOST_MSG_RESULT_ERR;
+
+ inflight_packed = vq->inflight_packed;
+ vq->resubmit_inflight = NULL;
+ vq->global_counter = 0;
+ old_used_idx = inflight_packed->old_used_idx;
+
+ if (inflight_packed->used_idx != old_used_idx) {
+ if (inflight_packed->desc[old_used_idx].inflight == 0) {
+ inflight_packed->old_used_idx =
+ inflight_packed->used_idx;
+ inflight_packed->old_used_wrap_counter =
+ inflight_packed->used_wrap_counter;
+ inflight_packed->old_free_head =
+ inflight_packed->free_head;
+ } else {
+ inflight_packed->used_idx =
+ inflight_packed->old_used_idx;
+ inflight_packed->used_wrap_counter =
+ inflight_packed->old_used_wrap_counter;
+ inflight_packed->free_head =
+ inflight_packed->old_free_head;
+ }
+ }
+
+ for (i = 0; i < inflight_packed->desc_num; i++) {
+ if (inflight_packed->desc[i].inflight == 1)
+ resubmit_num++;
+ }
+
+ if (resubmit_num) {
+ resubmit = calloc(1, sizeof(struct rte_vhost_resubmit_info));
+ if (resubmit == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for resubmit info.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ resubmit->resubmit_list = calloc(resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc));
+ if (resubmit->resubmit_list == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for resubmit desc.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ num = 0;
+ for (i = 0; i < inflight_packed->desc_num; i++) {
+ if (vq->inflight_packed->desc[i].inflight == 1) {
+ resubmit->resubmit_list[num].index = i;
+ resubmit->resubmit_list[num].counter =
+ inflight_packed->desc[i].counter;
+ num++;
+ }
+ }
+ resubmit->resubmit_num = num;
+
+ if (resubmit->resubmit_num > 1)
+ qsort(resubmit->resubmit_list, resubmit->resubmit_num,
+ sizeof(struct rte_vhost_resubmit_desc),
+ resubmit_desc_compare);
+
+ vq->global_counter = resubmit->resubmit_list[0].counter + 1;
+ vq->resubmit_inflight = resubmit;
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
+
static int
vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
int main_fd __rte_unused)
@@ -1242,6 +1661,20 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
close(vq->kickfd);
vq->kickfd = file.fd;
+ if (vq_is_packed(dev)) {
+ if (vhost_check_queue_inflights_packed(dev, vq)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to inflights for vq: %d\n", file.index);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ } else {
+ if (vhost_check_queue_inflights_split(dev, vq)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to inflights for vq: %d\n", file.index);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ }
+
return RTE_VHOST_MSG_RESULT_OK;
}
@@ -1762,6 +2195,8 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = {
[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
+ [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd,
+ [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd,
};
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 2a650fe4b..99a773910 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -54,7 +54,9 @@ typedef enum VhostUserRequest {
VHOST_USER_POSTCOPY_ADVISE = 28,
VHOST_USER_POSTCOPY_LISTEN = 29,
VHOST_USER_POSTCOPY_END = 30,
- VHOST_USER_MAX = 31
+ VHOST_USER_GET_INFLIGHT_FD = 31,
+ VHOST_USER_SET_INFLIGHT_FD = 32,
+ VHOST_USER_MAX = 33
} VhostUserRequest;
typedef enum VhostUserSlaveRequest {
@@ -112,6 +114,13 @@ typedef struct VhostUserVringArea {
uint64_t offset;
} VhostUserVringArea;
+typedef struct VhostUserInflight {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+ uint16_t num_queues;
+ uint16_t queue_size;
+} VhostUserInflight;
+
typedef struct VhostUserMsg {
union {
uint32_t master; /* a VhostUserRequest value */
@@ -131,6 +140,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
+ VhostUserInflight inflight;
struct vhost_iotlb_msg iotlb;
VhostUserCryptoSessionParam crypto_session;
VhostUserVringArea area;
@@ -148,6 +158,7 @@ typedef struct VhostUserMsg {
/* vhost_user.c */
int vhost_user_msg_handler(int vid, int fd);
int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
+void *inflight_mem_alloc(const char *name, size_t size, int *fd);
/* socket.c */
int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
--
2.17.2
next prev parent reply other threads:[~2019-08-06 10:41 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20190731204050.40633>
2019-08-06 18:24 ` [dpdk-dev] [PATCH v5 0/2] vhost: support inflight share memory protocol feature JinYu
2019-08-06 18:24 ` JinYu [this message]
2019-08-26 8:28 ` [dpdk-dev] [PATCH v5 1/2] " Tiwei Bie
2019-08-06 18:25 ` [dpdk-dev] [PATCH v5 2/2] vhost: add vhost-user-blk example which support inflight JinYu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190806182500.22320-2-jin.yu@intel.com \
--to=jin.yu@intel.com \
--cc=changpeng.liu@intel.com \
--cc=dev@dpdk.org \
--cc=lilin24@baidu.com \
--cc=maxime.coquelin@redhat.com \
--cc=nixun@baidu.com \
--cc=tiwei.bie@intel.com \
--cc=zhangyu31@baidu.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.