From: JinYu <jin.yu@intel.com>
To: dev@dpdk.org
Cc: changpeng.liu@intel.com, maxime.coquelin@redhat.com,
tiwei.bie@intel.com, zhihong.wang@intel.com,
JinYu <jin.yu@intel.com>, Lin Li <lilin24@baidu.com>,
Xun Ni <nixun@baidu.com>, Yu Zhang <zhangyu31@baidu.com>
Subject: [dpdk-dev] [PATCH v7 04/11] vhost: add two new messages to support a shared buffer
Date: Tue, 17 Sep 2019 22:52:27 +0800 [thread overview]
Message-ID: <20190917145234.16951-5-jin.yu@intel.com> (raw)
In-Reply-To: <20190917145234.16951-1-jin.yu@intel.com>
This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
buffer between qemu and backend.
Signed-off-by: Lin Li <lilin24@baidu.com>
Signed-off-by: Xun Ni <nixun@baidu.com>
Signed-off-by: Yu Zhang <zhangyu31@baidu.com>
Signed-off-by: Jin Yu <jin.yu@intel.com>
---
lib/librte_vhost/vhost.h | 7 +
lib/librte_vhost/vhost_user.c | 270 +++++++++++++++++++++++++++++++++-
lib/librte_vhost/vhost_user.h | 4 +-
3 files changed, 279 insertions(+), 2 deletions(-)
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 884befa85..d67ba849a 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -286,6 +286,12 @@ struct guest_page {
uint64_t size;
};
+struct inflight_mem_info {
+ int fd;
+ void *addr;
+ uint64_t size;
+};
+
/**
* Device structure contains all configuration information relating
* to the device.
@@ -303,6 +309,7 @@ struct virtio_net {
uint32_t nr_vring;
int dequeue_zero_copy;
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2];
+ struct inflight_mem_info *inflight_info;
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
char ifname[IF_NAME_SZ];
uint64_t log_size;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index c9e29ece8..ad2d36215 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -37,6 +37,10 @@
#ifdef RTE_LIBRTE_VHOST_POSTCOPY
#include <linux/userfaultfd.h>
#endif
+#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */
+#include <linux/memfd.h>
+#define MEMFD_SUPPORTED
+#endif
#include <rte_common.h>
#include <rte_malloc.h>
@@ -49,6 +53,15 @@
#define VIRTIO_MIN_MTU 68
#define VIRTIO_MAX_MTU 65535
+#define INFLIGHT_ALIGNMENT 64
+#define INFLIGHT_VERSION 0xabcd
+#define VIRTQUEUE_MAX_SIZE 1024
+
+#define CLOEXEC 0x0001U
+
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
@@ -78,6 +91,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_POSTCOPY_ADVISE] = "VHOST_USER_POSTCOPY_ADVISE",
[VHOST_USER_POSTCOPY_LISTEN] = "VHOST_USER_POSTCOPY_LISTEN",
[VHOST_USER_POSTCOPY_END] = "VHOST_USER_POSTCOPY_END",
+ [VHOST_USER_GET_INFLIGHT_FD] = "VHOST_USER_GET_INFLIGHT_FD",
+ [VHOST_USER_SET_INFLIGHT_FD] = "VHOST_USER_SET_INFLIGHT_FD",
};
static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
@@ -160,6 +175,21 @@ vhost_backend_cleanup(struct virtio_net *dev)
dev->log_addr = 0;
}
+ if (dev->inflight_info->addr) {
+ munmap(dev->inflight_info->addr, dev->inflight_info->size);
+ dev->inflight_info->addr = NULL;
+ }
+
+ if (dev->inflight_info->fd > 0) {
+ close(dev->inflight_info->fd);
+ dev->inflight_info->fd = -1;
+ }
+
+ if (dev->inflight_info) {
+ free(dev->inflight_info);
+ dev->inflight_info = NULL;
+ }
+
if (dev->slave_req_fd >= 0) {
close(dev->slave_req_fd);
dev->slave_req_fd = -1;
@@ -1165,6 +1195,243 @@ virtio_is_ready(struct virtio_net *dev)
return 1;
}
+static int
+mem_create(const char *name, unsigned int flags)
+{
+#ifdef MEMFD_SUPPORTED
+ return memfd_create(name, flags);
+#else
+ return -1;
+#endif
+}
+
+static void *
+inflight_mem_alloc(const char *name, size_t size, int *fd)
+{
+ void *ptr;
+ int mfd = -1;
+ char fname[20] = "/tmp/memfd-XXXXXX";
+
+ *fd = -1;
+ mfd = mem_create(name, CLOEXEC);
+ if (mfd != -1) {
+ if (ftruncate(mfd, size) == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "ftruncate fail for alloc inflight buffer\n");
+ close(mfd);
+ return NULL;
+ }
+ } else {
+ mfd = mkstemp(fname);
+ unlink(fname);
+
+ if (mfd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mkstemp fail for alloc inflight buffer\n");
+ return NULL;
+ }
+
+ if (ftruncate(mfd, size) == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "ftruncate fail for alloc inflight buffer\n");
+ close(mfd);
+ return NULL;
+ }
+ }
+
+ ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (ptr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mmap fail for alloc inflight buffer\n");
+ close(mfd);
+ return NULL;
+ }
+
+ *fd = mfd;
+ return ptr;
+}
+
+static uint32_t
+get_pervq_shm_size_split(uint16_t queue_size)
+{
+ return ALIGN_UP(sizeof(struct rte_vhost_inflight_desc_split) *
+ queue_size + sizeof(uint64_t) + sizeof(uint16_t) * 4,
+ INFLIGHT_ALIGNMENT);
+}
+
+static uint32_t
+get_pervq_shm_size_packed(uint16_t queue_size)
+{
+ return ALIGN_UP(sizeof(struct rte_vhost_inflight_desc_packed) *
+ queue_size + sizeof(uint64_t) + sizeof(uint16_t) * 6 +
+ sizeof(uint8_t) * 9, INFLIGHT_ALIGNMENT);
+}
+
+static int
+vhost_user_get_inflight_fd(struct virtio_net **pdev,
+ VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ int fd, i, j;
+ uint64_t pervq_inflight_size, mmap_size;
+ void *addr;
+ uint16_t num_queues, queue_size;
+ struct virtio_net *dev = *pdev;
+ struct rte_vhost_inflight_info_packed *inflight_packed = NULL;
+
+ if (msg->size != sizeof(msg->payload.inflight)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Invalid get_inflight_fd message size is %d",
+ msg->size);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ dev->inflight_info = calloc(1, sizeof(struct inflight_mem_info));
+ if (!dev->inflight_info) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to alloc dev inflight area");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ num_queues = msg->payload.inflight.num_queues;
+ queue_size = msg->payload.inflight.queue_size;
+
+ RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd num_queues: %u\n",
+ msg->payload.inflight.num_queues);
+ RTE_LOG(INFO, VHOST_CONFIG, "get_inflight_fd queue_size: %u\n",
+ msg->payload.inflight.queue_size);
+
+ if (vq_is_packed(dev))
+ pervq_inflight_size = get_pervq_shm_size_packed(queue_size);
+ else
+ pervq_inflight_size = get_pervq_shm_size_split(queue_size);
+
+ mmap_size = num_queues * pervq_inflight_size;
+ addr = inflight_mem_alloc("vhost-inflight", mmap_size, &fd);
+ if (!addr) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to alloc vhost inflight area");
+ msg->payload.inflight.mmap_size = 0;
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ memset(addr, 0, mmap_size);
+
+ dev->inflight_info->addr = addr;
+ dev->inflight_info->size = msg->payload.inflight.mmap_size = mmap_size;
+ dev->inflight_info->fd = msg->fds[0] = fd;
+ msg->payload.inflight.mmap_offset = 0;
+ msg->fd_num = 1;
+
+ if (vq_is_packed(dev)) {
+ for (i = 0; i < num_queues; i++) {
+ inflight_packed =
+ (struct rte_vhost_inflight_info_packed *)addr;
+ inflight_packed->used_wrap_counter = 1;
+ inflight_packed->old_used_wrap_counter = 1;
+ for (j = 0; j < queue_size; j++)
+ inflight_packed->desc[j].next = j + 1;
+ addr = (void *)((char *)addr + pervq_inflight_size);
+ }
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "send inflight mmap_size: %lu\n",
+ msg->payload.inflight.mmap_size);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "send inflight mmap_offset: %lu\n",
+ msg->payload.inflight.mmap_offset);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "send inflight fd: %d\n", msg->fds[0]);
+
+ return RTE_VHOST_MSG_RESULT_REPLY;
+}
+
+static int
+vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
+ int main_fd __rte_unused)
+{
+ int fd, i;
+ uint64_t mmap_size, mmap_offset;
+ uint16_t num_queues, queue_size;
+ uint32_t pervq_inflight_size;
+ void *addr;
+ struct vhost_virtqueue *vq;
+ struct virtio_net *dev = *pdev;
+
+ fd = msg->fds[0];
+ if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Invalid set_inflight_fd message size is %d,fd is %d\n",
+ msg->size, fd);
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ mmap_size = msg->payload.inflight.mmap_size;
+ mmap_offset = msg->payload.inflight.mmap_offset;
+ num_queues = msg->payload.inflight.num_queues;
+ queue_size = msg->payload.inflight.queue_size;
+
+ if (vq_is_packed(dev))
+ pervq_inflight_size = get_pervq_shm_size_packed(queue_size);
+ else
+ pervq_inflight_size = get_pervq_shm_size_split(queue_size);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd mmap_size: %lu\n", mmap_size);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd mmap_offset: %lu\n", mmap_offset);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd num_queues: %u\n", num_queues);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd queue_size: %u\n", queue_size);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd fd: %d\n", fd);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "set_inflight_fd pervq_inflight_size: %d\n",
+ pervq_inflight_size);
+
+ if (!dev->inflight_info) {
+ dev->inflight_info = calloc(1,
+ sizeof(struct inflight_mem_info));
+ if (dev->inflight_info == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to alloc dev inflight area");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+ }
+
+ if (dev->inflight_info->addr)
+ munmap(dev->inflight_info->addr, dev->inflight_info->size);
+
+ addr = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, mmap_offset);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, "failed to mmap share memory.\n");
+ return RTE_VHOST_MSG_RESULT_ERR;
+ }
+
+ if (dev->inflight_info->fd)
+ close(dev->inflight_info->fd);
+
+ dev->inflight_info->fd = fd;
+ dev->inflight_info->addr = addr;
+ dev->inflight_info->size = mmap_size;
+
+ for (i = 0; i < num_queues; i++) {
+ vq = dev->virtqueue[i];
+ if (vq_is_packed(dev)) {
+ vq->inflight_packed = addr;
+ vq->inflight_packed->desc_num = queue_size;
+ } else {
+ vq->inflight_split = addr;
+ vq->inflight_split->desc_num = queue_size;
+ }
+ addr = (void *)((char *)addr + pervq_inflight_size);
+ }
+
+ return RTE_VHOST_MSG_RESULT_OK;
+}
+
static int
vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg,
int main_fd __rte_unused)
@@ -1762,9 +2029,10 @@ static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = {
[VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
[VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
[VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
+ [VHOST_USER_GET_INFLIGHT_FD] = vhost_user_get_inflight_fd,
+ [VHOST_USER_SET_INFLIGHT_FD] = vhost_user_set_inflight_fd,
};
-
/* return bytes# of read on success or negative val on failure. */
static int
read_vhost_message(int sockfd, struct VhostUserMsg *msg)
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 17a1d7bca..6563f7315 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -54,7 +54,9 @@ typedef enum VhostUserRequest {
VHOST_USER_POSTCOPY_ADVISE = 28,
VHOST_USER_POSTCOPY_LISTEN = 29,
VHOST_USER_POSTCOPY_END = 30,
- VHOST_USER_MAX = 31
+ VHOST_USER_GET_INFLIGHT_FD = 31,
+ VHOST_USER_SET_INFLIGHT_FD = 32,
+ VHOST_USER_MAX = 33
} VhostUserRequest;
typedef enum VhostUserSlaveRequest {
--
2.17.2
next prev parent reply other threads:[~2019-09-17 7:09 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20190829141224.49700>
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 00/11] vhost: support inflight share memory protocol feature JinYu
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 01/11] vhost: add the inflight description JinYu
2019-09-18 1:29 ` Tiwei Bie
2019-09-18 1:37 ` Yu, Jin
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 02/11] vhost: add packed ring JinYu
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 03/11] vhost: add the inflight structure JinYu
2019-09-17 14:52 ` JinYu [this message]
2019-09-17 12:55 ` [dpdk-dev] [PATCH v7 04/11] vhost: add two new messages to support a shared buffer Aaron Conole
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 05/11] vhost: checkout and cleanup the resubmit inflight information JinYu
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 06/11] vhost: add the APIs to operate inflight ring JinYu
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 07/11] vhost: add APIs for user getting " JinYu
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 08/11] vhost: fix vring functions to support packed ring JinYu
2019-09-18 5:47 ` Tiwei Bie
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 09/11] vhost: add an API for judging vq format JinYu
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 10/11] vhost: add APIs to rte_vhost version map JinYu
2019-09-18 6:05 ` Tiwei Bie
2019-09-18 6:31 ` Yu, Jin
2019-09-17 14:52 ` [dpdk-dev] [PATCH v7 11/11] vhost: add vhost-user-blk example which support inflight JinYu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190917145234.16951-5-jin.yu@intel.com \
--to=jin.yu@intel.com \
--cc=changpeng.liu@intel.com \
--cc=dev@dpdk.org \
--cc=lilin24@baidu.com \
--cc=maxime.coquelin@redhat.com \
--cc=nixun@baidu.com \
--cc=tiwei.bie@intel.com \
--cc=zhangyu31@baidu.com \
--cc=zhihong.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.