From: patrick.fu@intel.com
To: dev@dpdk.org, maxime.coquelin@redhat.com, chenbo.xia@intel.com,
zhihong.wang@intel.com, xiaolong.ye@intel.com
Cc: patrick.fu@intel.com, cheng1.jiang@intel.com, cunming.liang@intel.com
Subject: [dpdk-dev] [PATCH v1 1/2] vhost: introduce async data path registration API
Date: Thu, 11 Jun 2020 18:02:04 +0800 [thread overview]
Message-ID: <1591869725-13331-2-git-send-email-patrick.fu@intel.com> (raw)
In-Reply-To: <1591869725-13331-1-git-send-email-patrick.fu@intel.com>
From: Patrick <patrick.fu@intel.com>
This patch introduces registration/un-registration APIs
for async data path together with all required data
structures and DMA callback function proto-types.
Signed-off-by: Patrick <patrick.fu@intel.com>
---
lib/librte_vhost/Makefile | 3 +-
lib/librte_vhost/rte_vhost.h | 1 +
lib/librte_vhost/rte_vhost_async.h | 134 +++++++++++++++++++++++++++++++++++++
lib/librte_vhost/socket.c | 20 ++++++
lib/librte_vhost/vhost.c | 74 +++++++++++++++++++-
lib/librte_vhost/vhost.h | 30 ++++++++-
lib/librte_vhost/vhost_user.c | 28 ++++++--
7 files changed, 283 insertions(+), 7 deletions(-)
create mode 100644 lib/librte_vhost/rte_vhost_async.h
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e592795..3aed094 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -41,7 +41,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
vhost_user.c virtio_net.c vdpa.c
# install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h \
+ rte_vhost_async.h
# only compile vhost crypto when cryptodev is enabled
ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index d43669f..cec4d07 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -35,6 +35,7 @@
#define RTE_VHOST_USER_EXTBUF_SUPPORT (1ULL << 5)
/* support only linear buffers (no chained mbufs) */
#define RTE_VHOST_USER_LINEARBUF_SUPPORT (1ULL << 6)
+#define RTE_VHOST_USER_ASYNC_COPY (1ULL << 7)
/** Protocol features. */
#ifndef VHOST_USER_PROTOCOL_F_MQ
diff --git a/lib/librte_vhost/rte_vhost_async.h b/lib/librte_vhost/rte_vhost_async.h
new file mode 100644
index 0000000..82f2ebe
--- /dev/null
+++ b/lib/librte_vhost/rte_vhost_async.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_VHOST_ASYNC_H_
+#define _RTE_VHOST_ASYNC_H_
+
+#include "rte_vhost.h"
+
+/**
+ * iovec iterator
+ */
+struct iov_it {
+ /** offset to the first byte of interesting data */
+ size_t offset;
+ /** total bytes of data in this iterator */
+ size_t count;
+ /** pointer to the iovec array */
+ struct iovec *iov;
+ /** number of iovec in this iterator */
+ unsigned long nr_segs;
+};
+
+/**
+ * dma transfer descriptor pair
+ */
+struct dma_trans_desc {
+ /** source memory iov_it */
+ struct iov_it *src;
+ /** destination memory iov_it */
+ struct iov_it *dst;
+};
+
+/**
+ * dma transfer status
+ */
+struct dma_trans_status {
+ /** An array of application specific data for source memory */
+ uintptr_t *src_opaque_data;
+ /** An array of application specific data for destination memory */
+ uintptr_t *dst_opaque_data;
+};
+
+/**
+ * dma operation callbacks to be implemented by applications
+ */
+struct rte_vhost_async_channel_ops {
+ /**
+ * instruct a DMA channel to perform copies for a batch of packets
+ *
+ * @param vid
+ * id of vhost device to perform data copies
+ * @param queue_id
+ * queue id to perform data copies
+ * @param descs
+ * an array of DMA transfer memory descriptors
+ * @param opaque_data
+ * opaque data pair sending to DMA engine
+ * @param count
+ * number of elements in the "descs" array
+ * @return
+ * -1 on failure, number of descs processed on success
+ */
+ int (*transfer_data)(int vid, uint16_t queue_id,
+ struct dma_trans_desc *descs,
+ struct dma_trans_status *opaque_data,
+ uint16_t count);
+ /**
+ * check copy-completed packets from a DMA channel
+ * @param vid
+ * id of vhost device to check copy completion
+ * @param queue_id
+ * queue id to check copyp completion
+ * @param opaque_data
+ * buffer to receive the opaque data pair from DMA engine
+ * @param max_packets
+ * max number of packets could be completed
+ * @return
+ * -1 on failure, number of iov segments completed on success
+ */
+ int (*check_completed_copies)(int vid, uint16_t queue_id,
+ struct dma_trans_status *opaque_data,
+ uint16_t max_packets);
+};
+
+/**
+ * dma channel feature bit definition
+ */
+struct dma_channel_features {
+ union {
+ uint32_t intval;
+ struct {
+ uint32_t inorder:1;
+ uint32_t resvd0115:15;
+ uint32_t threshold:12;
+ uint32_t resvd2831:4;
+ };
+ };
+};
+
+/**
+ * register a dma channel for vhost
+ *
+ * @param vid
+ * vhost device id DMA channel to be attached to
+ * @param queue_id
+ * vhost queue id DMA channel to be attached to
+ * @param features
+ * DMA channel feature bit
+ * b0 : DMA supports inorder data transfer
+ * b1 - b15: reserved
+ * b16 - b27: Packet length threshold for DMA transfer
+ * b28 - b31: reserved
+ * @param ops
+ * DMA operation callbacks
+ * @return
+ * 0 on success, -1 on failures
+ */
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+ uint32_t features, struct rte_vhost_async_channel_ops *ops);
+
+/**
+ * unregister a dma channel for vhost
+ *
+ * @param vid
+ * vhost device id DMA channel to be detached
+ * @param queue_id
+ * vhost queue id DMA channel to be detached
+ * @return
+ * 0 on success, -1 on failures
+ */
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id);
+
+#endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 0a66ef9..f817783 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -42,6 +42,7 @@ struct vhost_user_socket {
bool use_builtin_virtio_net;
bool extbuf;
bool linearbuf;
+ bool async_copy;
/*
* The "supported_features" indicates the feature bits the
@@ -210,6 +211,7 @@ struct vhost_user {
size_t size;
struct vhost_user_connection *conn;
int ret;
+ struct virtio_net *dev;
if (vsocket == NULL)
return;
@@ -241,6 +243,13 @@ struct vhost_user {
if (vsocket->linearbuf)
vhost_enable_linearbuf(vid);
+ if (vsocket->async_copy) {
+ dev = get_device(vid);
+
+ if (dev)
+ dev->async_copy = 1;
+ }
+
VHOST_LOG_CONFIG(INFO, "new device, handle is %d\n", vid);
if (vsocket->notify_ops->new_connection) {
@@ -891,6 +900,17 @@ struct vhost_user_reconnect_list {
goto out_mutex;
}
+ vsocket->async_copy = flags & RTE_VHOST_USER_ASYNC_COPY;
+
+ if (vsocket->async_copy &&
+ (flags & (RTE_VHOST_USER_IOMMU_SUPPORT |
+ RTE_VHOST_USER_POSTCOPY_SUPPORT))) {
+ VHOST_LOG_CONFIG(ERR, "error: enabling async copy and IOMMU "
+ "or post-copy feature simultaneously is not "
+ "supported\n");
+ goto out_mutex;
+ }
+
/*
* Set the supported features correctly for the builtin vhost-user
* net driver.
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 0266318..e6b688a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -332,8 +332,13 @@
{
if (vq_is_packed(dev))
rte_free(vq->shadow_used_packed);
- else
+ else {
rte_free(vq->shadow_used_split);
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+ if (vq->async_pending_info)
+ rte_free(vq->async_pending_info);
+ }
rte_free(vq->batch_copy_elems);
rte_mempool_free(vq->iotlb_pool);
rte_free(vq);
@@ -1527,3 +1532,70 @@ int rte_vhost_extern_callback_register(int vid,
if (vhost_data_log_level >= 0)
rte_log_set_level(vhost_data_log_level, RTE_LOG_WARNING);
}
+
+int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
+ uint32_t features,
+ struct rte_vhost_async_channel_ops *ops)
+{
+ struct vhost_virtqueue *vq;
+ struct virtio_net *dev = get_device(vid);
+ struct dma_channel_features f;
+
+ if (dev == NULL || ops == NULL)
+ return -1;
+
+ f.intval = features;
+
+ vq = dev->virtqueue[queue_id];
+
+ if (vq == NULL)
+ return -1;
+
+ /** packed queue is not supported */
+ if (vq_is_packed(dev) || !f.inorder)
+ return -1;
+
+ if (ops->check_completed_copies == NULL ||
+ ops->transfer_data == NULL)
+ return -1;
+
+ rte_spinlock_lock(&vq->access_lock);
+
+ vq->async_ops.check_completed_copies = ops->check_completed_copies;
+ vq->async_ops.transfer_data = ops->transfer_data;
+
+ vq->async_inorder = f.inorder;
+ vq->async_threshold = f.threshold;
+
+ vq->async_registered = true;
+
+ rte_spinlock_unlock(&vq->access_lock);
+
+ return 0;
+}
+
+int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
+{
+ struct vhost_virtqueue *vq;
+ struct virtio_net *dev = get_device(vid);
+
+ if (dev == NULL)
+ return -1;
+
+ vq = dev->virtqueue[queue_id];
+
+ if (vq == NULL)
+ return -1;
+
+ rte_spinlock_lock(&vq->access_lock);
+
+ vq->async_ops.transfer_data = NULL;
+ vq->async_ops.check_completed_copies = NULL;
+
+ vq->async_registered = false;
+
+ rte_spinlock_unlock(&vq->access_lock);
+
+ return 0;
+}
+
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index df98d15..a7fbe23 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -23,6 +23,8 @@
#include "rte_vhost.h"
#include "rte_vdpa.h"
+#include "rte_vhost_async.h"
+
/* Used to indicate that the device is running on a data core */
#define VIRTIO_DEV_RUNNING 1
/* Used to indicate that the device is ready to operate */
@@ -39,6 +41,11 @@
#define VHOST_LOG_CACHE_NR 32
+#define MAX_PKT_BURST 32
+
+#define VHOST_MAX_ASYNC_IT (MAX_PKT_BURST * 2)
+#define VHOST_MAX_ASYNC_VEC (BUF_VECTOR_MAX * 2)
+
#define PACKED_DESC_ENQUEUE_USED_FLAG(w) \
((w) ? (VRING_DESC_F_AVAIL | VRING_DESC_F_USED | VRING_DESC_F_WRITE) : \
VRING_DESC_F_WRITE)
@@ -200,6 +207,25 @@ struct vhost_virtqueue {
TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
int iotlb_cache_nr;
TAILQ_HEAD(, vhost_iotlb_entry) iotlb_pending_list;
+
+ /* operation callbacks for async dma */
+ struct rte_vhost_async_channel_ops async_ops;
+
+ struct iov_it it_pool[VHOST_MAX_ASYNC_IT];
+ struct iovec vec_pool[VHOST_MAX_ASYNC_VEC];
+
+ /* async data transfer status */
+ uintptr_t **async_pkts_pending;
+ #define ASYNC_PENDING_INFO_N_MSK 0xFFFF
+ #define ASYNC_PENDING_INFO_N_SFT 16
+ uint64_t *async_pending_info;
+ uint16_t async_pkts_idx;
+ uint16_t async_pkts_inflight_n;
+
+ /* vq async features */
+ bool async_inorder;
+ bool async_registered;
+ uint16_t async_threshold;
} __rte_cache_aligned;
/* Old kernels have no such macros defined */
@@ -353,6 +379,7 @@ struct virtio_net {
int16_t broadcast_rarp;
uint32_t nr_vring;
int dequeue_zero_copy;
+ int async_copy;
int extbuf;
int linearbuf;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
@@ -702,7 +729,8 @@ uint64_t translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
/* Don't kick guest if we don't reach index specified by guest. */
if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
uint16_t old = vq->signalled_used;
- uint16_t new = vq->last_used_idx;
+ uint16_t new = vq->async_pkts_inflight_n ?
+ vq->used->idx:vq->last_used_idx;
bool signalled_used_valid = vq->signalled_used_valid;
vq->signalled_used = new;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 84bebad..d7600bf 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -464,12 +464,25 @@
} else {
if (vq->shadow_used_split)
rte_free(vq->shadow_used_split);
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+ if (vq->async_pending_info)
+ rte_free(vq->async_pending_info);
+
vq->shadow_used_split = rte_malloc(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE);
- if (!vq->shadow_used_split) {
+ vq->async_pkts_pending = rte_malloc(NULL,
+ vq->size * sizeof(uintptr_t),
+ RTE_CACHE_LINE_SIZE);
+ vq->async_pending_info = rte_malloc(NULL,
+ vq->size * sizeof(uint64_t),
+ RTE_CACHE_LINE_SIZE);
+ if (!vq->shadow_used_split ||
+ !vq->async_pkts_pending ||
+ !vq->async_pending_info) {
VHOST_LOG_CONFIG(ERR,
- "failed to allocate memory for shadow used ring.\n");
+ "failed to allocate memory for vq internal data.\n");
return RTE_VHOST_MSG_RESULT_ERR;
}
}
@@ -1147,7 +1160,8 @@
goto err_mmap;
}
- populate = (dev->dequeue_zero_copy) ? MAP_POPULATE : 0;
+ populate = (dev->dequeue_zero_copy || dev->async_copy) ?
+ MAP_POPULATE : 0;
mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_SHARED | populate, fd, 0);
@@ -1162,7 +1176,7 @@
reg->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
mmap_offset;
- if (dev->dequeue_zero_copy)
+ if (dev->dequeue_zero_copy || dev->async_copy)
if (add_guest_pages(dev, reg, alignment) < 0) {
VHOST_LOG_CONFIG(ERR,
"adding guest pages to region %u failed.\n",
@@ -1945,6 +1959,12 @@ static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
} else {
rte_free(vq->shadow_used_split);
vq->shadow_used_split = NULL;
+ if (vq->async_pkts_pending)
+ rte_free(vq->async_pkts_pending);
+ if (vq->async_pending_info)
+ rte_free(vq->async_pending_info);
+ vq->async_pkts_pending = NULL;
+ vq->async_pending_info = NULL;
}
rte_free(vq->batch_copy_elems);
--
1.8.3.1
next prev parent reply other threads:[~2020-06-11 10:14 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-11 10:02 [dpdk-dev] [PATCH v1 0/2] introduce asynchronous data path for vhost patrick.fu
2020-06-11 10:02 ` patrick.fu [this message]
2020-06-18 5:50 ` [dpdk-dev] [PATCH v1 1/2] vhost: introduce async data path registration API Liu, Yong
2020-06-18 9:08 ` Fu, Patrick
2020-06-19 0:40 ` Liu, Yong
2020-06-25 13:42 ` Maxime Coquelin
2020-06-26 14:28 ` Maxime Coquelin
2020-06-29 1:15 ` Fu, Patrick
2020-06-26 14:44 ` Maxime Coquelin
2020-06-11 10:02 ` [dpdk-dev] [PATCH v1 2/2] vhost: introduce async enqueue for split ring patrick.fu
2020-06-18 6:56 ` Liu, Yong
2020-06-18 11:36 ` Fu, Patrick
2020-06-26 14:39 ` Maxime Coquelin
2020-06-26 14:46 ` Maxime Coquelin
2020-06-29 1:25 ` Fu, Patrick
2020-06-26 14:42 ` [dpdk-dev] [PATCH v1 0/2] introduce asynchronous data path for vhost Maxime Coquelin
2020-07-03 10:27 ` [dpdk-dev] [PATCH v3 " patrick.fu
2020-07-03 10:27 ` [dpdk-dev] [PATCH v3 1/2] vhost: introduce async enqueue registration API patrick.fu
2020-07-03 10:27 ` [dpdk-dev] [PATCH v3 2/2] vhost: introduce async enqueue for split ring patrick.fu
2020-07-03 12:21 ` [dpdk-dev] [PATCH v4 0/2] introduce asynchronous data path for vhost patrick.fu
2020-07-03 12:21 ` [dpdk-dev] [PATCH v4 1/2] vhost: introduce async enqueue registration API patrick.fu
2020-07-06 3:05 ` Liu, Yong
2020-07-06 9:08 ` Fu, Patrick
2020-07-03 12:21 ` [dpdk-dev] [PATCH v4 2/2] vhost: introduce async enqueue for split ring patrick.fu
2020-07-06 11:53 ` [dpdk-dev] [PATCH v5 0/2] introduce asynchronous data path for vhost patrick.fu
2020-07-06 11:53 ` [dpdk-dev] [PATCH v5 1/2] vhost: introduce async enqueue registration API patrick.fu
2020-07-06 11:53 ` [dpdk-dev] [PATCH v5 2/2] vhost: introduce async enqueue for split ring patrick.fu
2020-07-07 5:07 ` [dpdk-dev] [PATCH v6 0/2] introduce asynchronous data path for vhost patrick.fu
2020-07-07 5:07 ` [dpdk-dev] [PATCH v6 1/2] vhost: introduce async enqueue registration API patrick.fu
2020-07-07 8:22 ` Xia, Chenbo
2020-07-07 5:07 ` [dpdk-dev] [PATCH v6 2/2] vhost: introduce async enqueue for split ring patrick.fu
2020-07-07 8:22 ` Xia, Chenbo
2020-07-07 16:45 ` [dpdk-dev] [PATCH v6 0/2] introduce asynchronous data path for vhost Ferruh Yigit
2020-07-20 13:26 ` Maxime Coquelin
2020-07-21 2:28 ` Fu, Patrick
2020-07-21 8:28 ` Maxime Coquelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1591869725-13331-2-git-send-email-patrick.fu@intel.com \
--to=patrick.fu@intel.com \
--cc=chenbo.xia@intel.com \
--cc=cheng1.jiang@intel.com \
--cc=cunming.liang@intel.com \
--cc=dev@dpdk.org \
--cc=maxime.coquelin@redhat.com \
--cc=xiaolong.ye@intel.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.