* [RFC PATCH v2 01/14] enable VIRTIO_NET_F_CTRL_RX VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ. Observed that virtio-net driver in guest would crash with only CTRL_RX enabled.
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2015-01-26 3:20 ` Huawei Xie
[not found] ` <1422242440-28948-2-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-01-26 3:20 ` [RFC PATCH v2 02/14] create vhost_cuse sub-directory create vhost_cuse directory move vhost-net-cdev.c to vhost_cuse directory Huawei Xie
` (13 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
In virtnet_send_command:
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
(out + in > VIRTNET_SEND_COMMAND_SG_MAX));
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/virtio-net.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index b041849..52b4957 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -73,7 +73,8 @@ static struct virtio_net_config_ll *ll_root;
/* Features supported by this lib. */
#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
- (1ULL << VIRTIO_NET_F_CTRL_RX))
+ (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
+ (1ULL << VIRTIO_NET_F_CTRL_RX))
static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
/* Line size for reading maps file. */
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 02/14] create vhost_cuse sub-directory create vhost_cuse directory move vhost-net-cdev.c to vhost_cuse directory
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-01-26 3:20 ` [RFC PATCH v2 01/14] enable VIRTIO_NET_F_CTRL_RX VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ. Observed that virtio-net driver in guest would crash with only CTRL_RX enabled Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 03/14] rename vhost-net-cdev.h to vhost-net.h Huawei Xie
` (12 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
vhost-cuse functionality will be divided into two parts:
cuse driver specific logic and common logic.
cuse driver specific logic will be in vhost_cuse logic.
vhost ioctl message is pre-processed there and then sent to virtio-net
module if necessary.
virtio-net provides common message handling for both vhost-cuse and vhost-user.
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/Makefile | 4 +-
lib/librte_vhost/vhost-net-cdev.c | 389 ---------------------------
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 389 +++++++++++++++++++++++++++
3 files changed, 391 insertions(+), 391 deletions(-)
delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index c008d64..0b2f08f 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,10 +34,10 @@ include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_vhost.a
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
LDFLAGS += -lfuse
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c vhost_rxtx.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net-cdev.c b/lib/librte_vhost/vhost-net-cdev.c
deleted file mode 100644
index 57c76cb..0000000
--- a/lib/librte_vhost/vhost-net-cdev.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <errno.h>
-#include <fuse/cuse_lowlevel.h>
-#include <linux/limits.h>
-#include <linux/vhost.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <rte_ethdev.h>
-#include <rte_log.h>
-#include <rte_string_fns.h>
-#include <rte_virtio_net.h>
-
-#include "vhost-net-cdev.h"
-
-#define FUSE_OPT_DUMMY "\0\0"
-#define FUSE_OPT_FORE "-f\0\0"
-#define FUSE_OPT_NOMULTI "-s\0\0"
-
-static const uint32_t default_major = 231;
-static const uint32_t default_minor = 1;
-static const char cuse_device_name[] = "/dev/cuse";
-static const char default_cdev[] = "vhost-net";
-
-static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
-
-/*
- * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
- * when the device is added to the device linked list.
- */
-static struct vhost_device_ctx
-fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
-{
- struct vhost_device_ctx ctx;
- struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
-
- ctx.pid = req_ctx->pid;
- ctx.fh = fi->fh;
-
- return ctx;
-}
-
-/*
- * When the device is created in QEMU it gets initialised here and
- * added to the device linked list.
- */
-static void
-vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
-{
- struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
- int err = 0;
-
- err = ops->new_device(ctx);
- if (err == -1) {
- fuse_reply_err(req, EPERM);
- return;
- }
-
- fi->fh = err;
-
- RTE_LOG(INFO, VHOST_CONFIG,
- "(%"PRIu64") Device configuration started\n", fi->fh);
- fuse_reply_open(req, fi);
-}
-
-/*
- * When QEMU is shutdown or killed the device gets released.
- */
-static void
-vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
-{
- int err = 0;
- struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
-
- ops->destroy_device(ctx);
- RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
- fuse_reply_err(req, err);
-}
-
-/*
- * Boilerplate code for CUSE IOCTL
- * Implicit arguments: ctx, req, result.
- */
-#define VHOST_IOCTL(func) do { \
- result = (func)(ctx); \
- fuse_reply_ioctl(req, result, NULL, 0); \
-} while (0)
-
-/*
- * Boilerplate IOCTL RETRY
- * Implicit arguments: req.
- */
-#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
- struct iovec iov_r = { arg, (size_r) }; \
- struct iovec iov_w = { arg, (size_w) }; \
- fuse_reply_ioctl_retry(req, &iov_r, \
- (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_R(type, var, func) do { \
- if (!in_bufsz) { \
- VHOST_IOCTL_RETRY(sizeof(type), 0);\
- } else { \
- (var) = *(const type*)in_buf; \
- result = func(ctx, &(var)); \
- fuse_reply_ioctl(req, result, NULL, 0);\
- } \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Write IOCTL
- * Implicit arguments: ctx, req, result, out_bufsz.
- */
-#define VHOST_IOCTL_W(type, var, func) do { \
- if (!out_bufsz) { \
- VHOST_IOCTL_RETRY(0, sizeof(type));\
- } else { \
- result = (func)(ctx, &(var));\
- fuse_reply_ioctl(req, result, &(var), sizeof(type));\
- } \
-} while (0)
-
-/*
- * Boilerplate code for CUSE Read/Write IOCTL
- * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
- */
-#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
- if (!in_bufsz) { \
- VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
- } else { \
- (var1) = *(const type1*) (in_buf); \
- result = (func)(ctx, (var1), &(var2)); \
- fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
- } \
-} while (0)
-
-/*
- * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
- * of IOCTL a buffer is requested to read or to write. This request is handled
- * by FUSE and the buffer is then given to CUSE.
- */
-static void
-vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
- struct fuse_file_info *fi, __rte_unused unsigned flags,
- const void *in_buf, size_t in_bufsz, size_t out_bufsz)
-{
- struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
- struct vhost_vring_file file;
- struct vhost_vring_state state;
- struct vhost_vring_addr addr;
- uint64_t features;
- uint32_t index;
- int result = 0;
-
- switch (cmd) {
- case VHOST_NET_SET_BACKEND:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
- break;
-
- case VHOST_GET_FEATURES:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
- VHOST_IOCTL_W(uint64_t, features, ops->get_features);
- break;
-
- case VHOST_SET_FEATURES:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
- VHOST_IOCTL_R(uint64_t, features, ops->set_features);
- break;
-
- case VHOST_RESET_OWNER:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
- VHOST_IOCTL(ops->reset_owner);
- break;
-
- case VHOST_SET_OWNER:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
- VHOST_IOCTL(ops->set_owner);
- break;
-
- case VHOST_SET_MEM_TABLE:
- /*TODO fix race condition.*/
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
- static struct vhost_memory mem_temp;
-
- switch (in_bufsz) {
- case 0:
- VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
- break;
-
- case sizeof(struct vhost_memory):
- mem_temp = *(const struct vhost_memory *) in_buf;
-
- if (mem_temp.nregions > 0) {
- VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
- (sizeof(struct vhost_memory_region) *
- mem_temp.nregions), 0);
- } else {
- result = -1;
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- break;
-
- default:
- result = ops->set_mem_table(ctx,
- in_buf, mem_temp.nregions);
- if (result)
- fuse_reply_err(req, EINVAL);
- else
- fuse_reply_ioctl(req, result, NULL, 0);
- }
- break;
-
- case VHOST_SET_VRING_NUM:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_state, state,
- ops->set_vring_num);
- break;
-
- case VHOST_SET_VRING_BASE:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_state, state,
- ops->set_vring_base);
- break;
-
- case VHOST_GET_VRING_BASE:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
- VHOST_IOCTL_RW(uint32_t, index,
- struct vhost_vring_state, state, ops->get_vring_base);
- break;
-
- case VHOST_SET_VRING_ADDR:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_addr, addr,
- ops->set_vring_addr);
- break;
-
- case VHOST_SET_VRING_KICK:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file,
- ops->set_vring_kick);
- break;
-
- case VHOST_SET_VRING_CALL:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file,
- ops->set_vring_call);
- break;
-
- default:
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
- result = -1;
- fuse_reply_ioctl(req, result, NULL, 0);
- }
-
- if (result < 0)
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
- else
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
-}
-
-/*
- * Structure handling open, release and ioctl function pointers is populated.
- */
-static const struct cuse_lowlevel_ops vhost_net_ops = {
- .open = vhost_net_open,
- .release = vhost_net_release,
- .ioctl = vhost_net_ioctl,
-};
-
-/*
- * cuse_info is populated and used to register the cuse device.
- * vhost_net_device_ops are also passed when the device is registered in app.
- */
-int
-rte_vhost_driver_register(const char *dev_name)
-{
- struct cuse_info cuse_info;
- char device_name[PATH_MAX] = "";
- char char_device_name[PATH_MAX] = "";
- const char *device_argv[] = { device_name };
-
- char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
- char fuse_opt_fore[] = FUSE_OPT_FORE;
- char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
- char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
-
- if (access(cuse_device_name, R_OK | W_OK) < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "char device %s can't be accessed, maybe not exist\n",
- cuse_device_name);
- return -1;
- }
-
- /*
- * The device name is created. This is passed to QEMU so that it can
- * register the device with our application.
- */
- snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
- snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
-
- /* Check if device already exists. */
- if (access(char_device_name, F_OK) != -1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "char device %s already exists\n", char_device_name);
- return -1;
- }
-
- memset(&cuse_info, 0, sizeof(cuse_info));
- cuse_info.dev_major = default_major;
- cuse_info.dev_minor = default_minor;
- cuse_info.dev_info_argc = 1;
- cuse_info.dev_info_argv = device_argv;
- cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
-
- ops = get_virtio_net_callbacks();
-
- session = cuse_lowlevel_setup(3, fuse_argv,
- &cuse_info, &vhost_net_ops, 0, NULL);
- if (session == NULL)
- return -1;
-
- return 0;
-}
-
-/**
- * The CUSE session is launched allowing the application to receive open,
- * release and ioctl calls.
- */
-int
-rte_vhost_driver_session_start(void)
-{
- fuse_session_loop(session);
-
- return 0;
-}
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
new file mode 100644
index 0000000..57c76cb
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -0,0 +1,389 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <fuse/cuse_lowlevel.h>
+#include <linux/limits.h>
+#include <linux/vhost.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_virtio_net.h>
+
+#include "vhost-net-cdev.h"
+
+#define FUSE_OPT_DUMMY "\0\0"
+#define FUSE_OPT_FORE "-f\0\0"
+#define FUSE_OPT_NOMULTI "-s\0\0"
+
+static const uint32_t default_major = 231;
+static const uint32_t default_minor = 1;
+static const char cuse_device_name[] = "/dev/cuse";
+static const char default_cdev[] = "vhost-net";
+
+static struct fuse_session *session;
+static struct vhost_net_device_ops const *ops;
+
+/*
+ * Returns vhost_device_ctx from given fuse_req_t. The index is populated later
+ * when the device is added to the device linked list.
+ */
+static struct vhost_device_ctx
+fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi)
+{
+ struct vhost_device_ctx ctx;
+ struct fuse_ctx const *const req_ctx = fuse_req_ctx(req);
+
+ ctx.pid = req_ctx->pid;
+ ctx.fh = fi->fh;
+
+ return ctx;
+}
+
+/*
+ * When the device is created in QEMU it gets initialised here and
+ * added to the device linked list.
+ */
+static void
+vhost_net_open(fuse_req_t req, struct fuse_file_info *fi)
+{
+ struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+ int err = 0;
+
+ err = ops->new_device(ctx);
+ if (err == -1) {
+ fuse_reply_err(req, EPERM);
+ return;
+ }
+
+ fi->fh = err;
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "(%"PRIu64") Device configuration started\n", fi->fh);
+ fuse_reply_open(req, fi);
+}
+
+/*
+ * When QEMU is shutdown or killed the device gets released.
+ */
+static void
+vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
+{
+ int err = 0;
+ struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+
+ ops->destroy_device(ctx);
+ RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh);
+ fuse_reply_err(req, err);
+}
+
+/*
+ * Boilerplate code for CUSE IOCTL
+ * Implicit arguments: ctx, req, result.
+ */
+#define VHOST_IOCTL(func) do { \
+ result = (func)(ctx); \
+ fuse_reply_ioctl(req, result, NULL, 0); \
+} while (0)
+
+/*
+ * Boilerplate IOCTL RETRY
+ * Implicit arguments: req.
+ */
+#define VHOST_IOCTL_RETRY(size_r, size_w) do { \
+ struct iovec iov_r = { arg, (size_r) }; \
+ struct iovec iov_w = { arg, (size_w) }; \
+ fuse_reply_ioctl_retry(req, &iov_r, \
+ (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_R(type, var, func) do { \
+ if (!in_bufsz) { \
+ VHOST_IOCTL_RETRY(sizeof(type), 0);\
+ } else { \
+ (var) = *(const type*)in_buf; \
+ result = func(ctx, &(var)); \
+ fuse_reply_ioctl(req, result, NULL, 0);\
+ } \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Write IOCTL
+ * Implicit arguments: ctx, req, result, out_bufsz.
+ */
+#define VHOST_IOCTL_W(type, var, func) do { \
+ if (!out_bufsz) { \
+ VHOST_IOCTL_RETRY(0, sizeof(type));\
+ } else { \
+ result = (func)(ctx, &(var));\
+ fuse_reply_ioctl(req, result, &(var), sizeof(type));\
+ } \
+} while (0)
+
+/*
+ * Boilerplate code for CUSE Read/Write IOCTL
+ * Implicit arguments: ctx, req, result, in_bufsz, in_buf.
+ */
+#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \
+ if (!in_bufsz) { \
+ VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\
+ } else { \
+ (var1) = *(const type1*) (in_buf); \
+ result = (func)(ctx, (var1), &(var2)); \
+ fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\
+ } \
+} while (0)
+
+/*
+ * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
+ * of IOCTL a buffer is requested to read or to write. This request is handled
+ * by FUSE and the buffer is then given to CUSE.
+ */
+static void
+vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
+ struct fuse_file_info *fi, __rte_unused unsigned flags,
+ const void *in_buf, size_t in_bufsz, size_t out_bufsz)
+{
+ struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi);
+ struct vhost_vring_file file;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ uint64_t features;
+ uint32_t index;
+ int result = 0;
+
+ switch (cmd) {
+ case VHOST_NET_SET_BACKEND:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
+ break;
+
+ case VHOST_GET_FEATURES:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh);
+ VHOST_IOCTL_W(uint64_t, features, ops->get_features);
+ break;
+
+ case VHOST_SET_FEATURES:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh);
+ VHOST_IOCTL_R(uint64_t, features, ops->set_features);
+ break;
+
+ case VHOST_RESET_OWNER:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh);
+ VHOST_IOCTL(ops->reset_owner);
+ break;
+
+ case VHOST_SET_OWNER:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh);
+ VHOST_IOCTL(ops->set_owner);
+ break;
+
+ case VHOST_SET_MEM_TABLE:
+ /*TODO fix race condition.*/
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh);
+ static struct vhost_memory mem_temp;
+
+ switch (in_bufsz) {
+ case 0:
+ VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0);
+ break;
+
+ case sizeof(struct vhost_memory):
+ mem_temp = *(const struct vhost_memory *) in_buf;
+
+ if (mem_temp.nregions > 0) {
+ VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) +
+ (sizeof(struct vhost_memory_region) *
+ mem_temp.nregions), 0);
+ } else {
+ result = -1;
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+ break;
+
+ default:
+ result = ops->set_mem_table(ctx,
+ in_buf, mem_temp.nregions);
+ if (result)
+ fuse_reply_err(req, EINVAL);
+ else
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+ break;
+
+ case VHOST_SET_VRING_NUM:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_state, state,
+ ops->set_vring_num);
+ break;
+
+ case VHOST_SET_VRING_BASE:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_state, state,
+ ops->set_vring_base);
+ break;
+
+ case VHOST_GET_VRING_BASE:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh);
+ VHOST_IOCTL_RW(uint32_t, index,
+ struct vhost_vring_state, state, ops->get_vring_base);
+ break;
+
+ case VHOST_SET_VRING_ADDR:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_addr, addr,
+ ops->set_vring_addr);
+ break;
+
+ case VHOST_SET_VRING_KICK:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_file, file,
+ ops->set_vring_kick);
+ break;
+
+ case VHOST_SET_VRING_CALL:
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
+ VHOST_IOCTL_R(struct vhost_vring_file, file,
+ ops->set_vring_call);
+ break;
+
+ default:
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh);
+ result = -1;
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+
+ if (result < 0)
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: FAIL\n", ctx.fh);
+ else
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh);
+}
+
+/*
+ * Structure handling open, release and ioctl function pointers is populated.
+ */
+static const struct cuse_lowlevel_ops vhost_net_ops = {
+ .open = vhost_net_open,
+ .release = vhost_net_release,
+ .ioctl = vhost_net_ioctl,
+};
+
+/*
+ * cuse_info is populated and used to register the cuse device.
+ * vhost_net_device_ops are also passed when the device is registered in app.
+ */
+int
+rte_vhost_driver_register(const char *dev_name)
+{
+ struct cuse_info cuse_info;
+ char device_name[PATH_MAX] = "";
+ char char_device_name[PATH_MAX] = "";
+ const char *device_argv[] = { device_name };
+
+ char fuse_opt_dummy[] = FUSE_OPT_DUMMY;
+ char fuse_opt_fore[] = FUSE_OPT_FORE;
+ char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI;
+ char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti};
+
+ if (access(cuse_device_name, R_OK | W_OK) < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "char device %s can't be accessed, maybe not exist\n",
+ cuse_device_name);
+ return -1;
+ }
+
+ /*
+ * The device name is created. This is passed to QEMU so that it can
+ * register the device with our application.
+ */
+ snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name);
+ snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name);
+
+ /* Check if device already exists. */
+ if (access(char_device_name, F_OK) != -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "char device %s already exists\n", char_device_name);
+ return -1;
+ }
+
+ memset(&cuse_info, 0, sizeof(cuse_info));
+ cuse_info.dev_major = default_major;
+ cuse_info.dev_minor = default_minor;
+ cuse_info.dev_info_argc = 1;
+ cuse_info.dev_info_argv = device_argv;
+ cuse_info.flags = CUSE_UNRESTRICTED_IOCTL;
+
+ ops = get_virtio_net_callbacks();
+
+ session = cuse_lowlevel_setup(3, fuse_argv,
+ &cuse_info, &vhost_net_ops, 0, NULL);
+ if (session == NULL)
+ return -1;
+
+ return 0;
+}
+
+/**
+ * The CUSE session is launched allowing the application to receive open,
+ * release and ioctl calls.
+ */
+int
+rte_vhost_driver_session_start(void)
+{
+ fuse_session_loop(session);
+
+ return 0;
+}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 03/14] rename vhost-net-cdev.h to vhost-net.h
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-01-26 3:20 ` [RFC PATCH v2 01/14] enable VIRTIO_NET_F_CTRL_RX VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ. Observed that virtio-net driver in guest would crash with only CTRL_RX enabled Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 02/14] create vhost_cuse sub-directory create vhost_cuse directory move vhost-net-cdev.c to vhost_cuse directory Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 04/14] consistent print style Huawei Xie
` (11 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
This file defines the common operation provided by virtio-net(.c).
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost-net-cdev.h | 113 ---------------------------
lib/librte_vhost/vhost-net.h | 113 +++++++++++++++++++++++++++
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 2 +-
lib/librte_vhost/vhost_rxtx.c | 2 +-
lib/librte_vhost/virtio-net.c | 2 +-
5 files changed, 116 insertions(+), 116 deletions(-)
delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
create mode 100644 lib/librte_vhost/vhost-net.h
diff --git a/lib/librte_vhost/vhost-net-cdev.h b/lib/librte_vhost/vhost-net-cdev.h
deleted file mode 100644
index 03a5c57..0000000
--- a/lib/librte_vhost/vhost-net-cdev.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _VHOST_NET_CDEV_H_
-#define _VHOST_NET_CDEV_H_
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <linux/vhost.h>
-
-#include <rte_log.h>
-
-/* Macros for printing using RTE_LOG */
-#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
-#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
-
-#ifdef RTE_LIBRTE_VHOST_DEBUG
-#define VHOST_MAX_PRINT_BUFF 6072
-#define LOG_LEVEL RTE_LOG_DEBUG
-#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
-#define PRINT_PACKET(device, addr, size, header) do { \
- char *pkt_addr = (char *)(addr); \
- unsigned int index; \
- char packet[VHOST_MAX_PRINT_BUFF]; \
- \
- if ((header)) \
- snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
- else \
- snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
- for (index = 0; index < (size); index++) { \
- snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
- "%02hhx ", pkt_addr[index]); \
- } \
- snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
- \
- LOG_DEBUG(VHOST_DATA, "%s", packet); \
-} while (0)
-#else
-#define LOG_LEVEL RTE_LOG_INFO
-#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
-#define PRINT_PACKET(device, addr, size, header) do {} while (0)
-#endif
-
-
-/*
- * Structure used to identify device context.
- */
-struct vhost_device_ctx {
- pid_t pid; /* PID of process calling the IOCTL. */
- uint64_t fh; /* Populated with fi->fh to track the device index. */
-};
-
-/*
- * Structure contains function pointers to be defined in virtio-net.c. These
- * functions are called in CUSE context and are used to configure devices.
- */
-struct vhost_net_device_ops {
- int (*new_device)(struct vhost_device_ctx);
- void (*destroy_device)(struct vhost_device_ctx);
-
- int (*get_features)(struct vhost_device_ctx, uint64_t *);
- int (*set_features)(struct vhost_device_ctx, uint64_t *);
-
- int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
-
- int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
- int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
- int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
- int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
-
- int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
- int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
-
- int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
-
- int (*set_owner)(struct vhost_device_ctx);
- int (*reset_owner)(struct vhost_device_ctx);
-};
-
-
-struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
-#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
new file mode 100644
index 0000000..03a5c57
--- /dev/null
+++ b/lib/librte_vhost/vhost-net.h
@@ -0,0 +1,113 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_CDEV_H_
+#define _VHOST_NET_CDEV_H_
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/vhost.h>
+
+#include <rte_log.h>
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
+
+#ifdef RTE_LIBRTE_VHOST_DEBUG
+#define VHOST_MAX_PRINT_BUFF 6072
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#define PRINT_PACKET(device, addr, size, header) do { \
+ char *pkt_addr = (char *)(addr); \
+ unsigned int index; \
+ char packet[VHOST_MAX_PRINT_BUFF]; \
+ \
+ if ((header)) \
+ snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
+ else \
+ snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
+ for (index = 0; index < (size); index++) { \
+ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \
+ "%02hhx ", pkt_addr[index]); \
+ } \
+ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \
+ \
+ LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while (0)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#define PRINT_PACKET(device, addr, size, header) do {} while (0)
+#endif
+
+
+/*
+ * Structure used to identify device context.
+ */
+struct vhost_device_ctx {
+ pid_t pid; /* PID of process calling the IOCTL. */
+ uint64_t fh; /* Populated with fi->fh to track the device index. */
+};
+
+/*
+ * Structure contains function pointers to be defined in virtio-net.c. These
+ * functions are called in CUSE context and are used to configure devices.
+ */
+struct vhost_net_device_ops {
+ int (*new_device)(struct vhost_device_ctx);
+ void (*destroy_device)(struct vhost_device_ctx);
+
+ int (*get_features)(struct vhost_device_ctx, uint64_t *);
+ int (*set_features)(struct vhost_device_ctx, uint64_t *);
+
+ int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
+
+ int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
+ int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
+ int (*set_vring_base)(struct vhost_device_ctx, struct vhost_vring_state *);
+ int (*get_vring_base)(struct vhost_device_ctx, uint32_t, struct vhost_vring_state *);
+
+ int (*set_vring_kick)(struct vhost_device_ctx, struct vhost_vring_file *);
+ int (*set_vring_call)(struct vhost_device_ctx, struct vhost_vring_file *);
+
+ int (*set_backend)(struct vhost_device_ctx, struct vhost_vring_file *);
+
+ int (*set_owner)(struct vhost_device_ctx);
+ int (*reset_owner)(struct vhost_device_ctx);
+};
+
+
+struct vhost_net_device_ops const *get_virtio_net_callbacks(void);
+#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 57c76cb..2bb07af 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -44,7 +44,7 @@
#include <rte_string_fns.h>
#include <rte_virtio_net.h>
-#include "vhost-net-cdev.h"
+#include "vhost-net.h"
#define FUSE_OPT_DUMMY "\0\0"
#define FUSE_OPT_FORE "-f\0\0"
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index ccfd82f..c7c9550 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -38,7 +38,7 @@
#include <rte_memcpy.h>
#include <rte_virtio_net.h>
-#include "vhost-net-cdev.h"
+#include "vhost-net.h"
#define MAX_PKT_BURST 32
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 52b4957..6bc9d51 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -53,7 +53,7 @@
#include <rte_memory.h>
#include <rte_virtio_net.h>
-#include "vhost-net-cdev.h"
+#include "vhost-net.h"
#include "eventfd_link/eventfd_link.h"
/*
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 04/14] consistent print style
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (2 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 03/14] rename vhost-net-cdev.h to vhost-net.h Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 05/14] implement eventfd copying(from fd in qemu process to fd in vhost process) in vhost-net-cdev.c Huawei Xie
` (10 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_rxtx.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index c7c9550..be4f6a5 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -549,8 +549,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
if (vq->last_used_idx == avail_idx)
return 0;
- LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__,
- dev->device_fh);
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") %s\n", dev->device_fh, __func__);
/* Prefetch available ring to retrieve head indexes. */
rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 05/14] implement eventfd copying(from fd in qemu process to fd in vhost process) in vhost-net-cdev.c
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (3 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 04/14] consistent print style Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 06/14] copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c Huawei Xie
` (9 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 85 ++++++++++++++++++++++++----
lib/librte_vhost/virtio-net.c | 57 +------------------
2 files changed, 77 insertions(+), 65 deletions(-)
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 2bb07af..802c7dc 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -38,6 +38,8 @@
#include <stdint.h>
#include <string.h>
#include <unistd.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
#include <rte_ethdev.h>
#include <rte_log.h>
@@ -45,6 +47,7 @@
#include <rte_virtio_net.h>
#include "vhost-net.h"
+#include "eventfd_link/eventfd_link.h"
#define FUSE_OPT_DUMMY "\0\0"
#define FUSE_OPT_FORE "-f\0\0"
@@ -54,6 +57,7 @@ static const uint32_t default_major = 231;
static const uint32_t default_minor = 1;
static const char cuse_device_name[] = "/dev/cuse";
static const char default_cdev[] = "vhost-net";
+static const char eventfd_cdev[] = "/dev/eventfd-link";
static struct fuse_session *session;
static struct vhost_net_device_ops const *ops;
@@ -173,6 +177,47 @@ vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
} while (0)
/*
+ * This function uses the eventfd_link kernel module to copy an eventfd file
+ * descriptor provided by QEMU in to our process space.
+ */
+static int
+eventfd_copy(int target_fd, int target_pid)
+{
+ int eventfd_link, ret;
+ struct eventfd_copy eventfd_copy;
+ int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+
+ if (fd == -1)
+ return -1;
+
+ /* Open the character device to the kernel module. */
+ /* TODO: check this earlier rather than fail until VM boots! */
+ eventfd_link = open(eventfd_cdev, O_RDWR);
+ if (eventfd_link < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "eventfd_link module is not loaded\n");
+ close(fd);
+ return -1;
+ }
+
+ eventfd_copy.source_fd = fd;
+ eventfd_copy.target_fd = target_fd;
+ eventfd_copy.target_pid = target_pid;
+ /* Call the IOCTL to copy the eventfd. */
+ ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
+ close(eventfd_link);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "EVENTFD_COPY ioctl failed\n");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/*
* The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
* of IOCTL a buffer is requested to read or to write. This request is handled
* by FUSE and the buffer is then given to CUSE.
@@ -284,17 +329,37 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
break;
case VHOST_SET_VRING_KICK:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file,
- ops->set_vring_kick);
- break;
-
case VHOST_SET_VRING_CALL:
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file,
- ops->set_vring_call);
+ if (cmd == VHOST_SET_VRING_KICK)
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n",
+ ctx.fh);
+ else
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n",
+ ctx.fh);
+ if (!in_buf)
+ VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0);
+ else {
+ int fd;
+ file = *(const struct vhost_vring_file *)in_buf;
+ LOG_DEBUG(VHOST_CONFIG,
+ "idx:%d fd:%d\n", file.index, file.fd);
+ fd = eventfd_copy(file.fd, ctx.pid);
+ if (fd < 0) {
+ fuse_reply_ioctl(req, -1, NULL, 0);
+ result = -1;
+ break;
+ }
+ file.fd = fd;
+ if (cmd == VHOST_SET_VRING_KICK) {
+ result = ops->set_vring_kick(ctx, &file);
+ fuse_reply_ioctl(req, result, NULL, 0);
+ } else {
+ result = ops->set_vring_call(ctx, &file);
+ fuse_reply_ioctl(req, result, NULL, 0);
+ }
+ }
break;
default:
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 6bc9d51..da9e3a6 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -38,8 +38,6 @@
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
-#include <sys/eventfd.h>
-#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
@@ -54,7 +52,6 @@
#include <rte_virtio_net.h>
#include "vhost-net.h"
-#include "eventfd_link/eventfd_link.h"
/*
* Device linked list structure for configuration.
@@ -64,8 +61,6 @@ struct virtio_net_config_ll {
struct virtio_net_config_ll *next; /* Next dev on linked list.*/
};
-const char eventfd_cdev[] = "/dev/eventfd-link";
-
/* device ops to add/remove device to/from data core. */
static struct virtio_net_device_ops const *notify_ops;
/* root address of the linked list of managed virtio devices */
@@ -904,37 +899,6 @@ get_vring_base(struct vhost_device_ctx ctx, uint32_t index,
return 0;
}
-/*
- * This function uses the eventfd_link kernel module to copy an eventfd file
- * descriptor provided by QEMU in to our process space.
- */
-static int
-eventfd_copy(struct virtio_net *dev, struct eventfd_copy *eventfd_copy)
-{
- int eventfd_link, ret;
-
- /* Open the character device to the kernel module. */
- eventfd_link = open(eventfd_cdev, O_RDWR);
- if (eventfd_link < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") eventfd_link module is not loaded\n",
- dev->device_fh);
- return -1;
- }
-
- /* Call the IOCTL to copy the eventfd. */
- ret = ioctl(eventfd_link, EVENTFD_COPY, eventfd_copy);
- close(eventfd_link);
-
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") EVENTFD_COPY ioctl failed\n",
- dev->device_fh);
- return -1;
- }
-
- return 0;
-}
/*
* Called from CUSE IOCTL: VHOST_SET_VRING_CALL
@@ -945,7 +909,6 @@ static int
set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
{
struct virtio_net *dev;
- struct eventfd_copy eventfd_kick;
struct vhost_virtqueue *vq;
dev = get_device(ctx);
@@ -958,14 +921,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
if (vq->kickfd)
close((int)vq->kickfd);
- /* Populate the eventfd_copy structure and call eventfd_copy. */
- vq->kickfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- eventfd_kick.source_fd = vq->kickfd;
- eventfd_kick.target_fd = file->fd;
- eventfd_kick.target_pid = ctx.pid;
-
- if (eventfd_copy(dev, &eventfd_kick))
- return -1;
+ vq->kickfd = file->fd;
return 0;
}
@@ -979,7 +935,6 @@ static int
set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
{
struct virtio_net *dev;
- struct eventfd_copy eventfd_call;
struct vhost_virtqueue *vq;
dev = get_device(ctx);
@@ -991,15 +946,7 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
if (vq->callfd)
close((int)vq->callfd);
-
- /* Populate the eventfd_copy structure and call eventfd_copy. */
- vq->callfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- eventfd_call.source_fd = vq->callfd;
- eventfd_call.target_fd = file->fd;
- eventfd_call.target_pid = ctx.pid;
-
- if (eventfd_copy(dev, &eventfd_call))
- return -1;
+ vq->callfd = file->fd;
return 0;
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 06/14] copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (4 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 05/14] implement eventfd copying(from fd in qemu process to fd in vhost process) in vhost-net-cdev.c Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 07/14] host_memory_map Huawei Xie
` (8 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 258 ++++++++++++++++++++++++++
1 file changed, 258 insertions(+)
create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
new file mode 100644
index 0000000..fbfc403
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -0,0 +1,258 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <dirent.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <fuse/cuse_lowlevel.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <rte_log.h>
+
+#include "vhost-net.h"
+
+/* Line size for reading maps file. */
+static const uint32_t BUFSIZE = PATH_MAX;
+
+/* Size of prot char array in procmap. */
+#define PROT_SZ 5
+
+/* Number of elements in procmap struct. */
+#define PROCMAP_SZ 8
+
+/* Structure containing information gathered from maps file. */
+struct procmap {
+ uint64_t va_start; /* Start virtual address in file. */
+ uint64_t len; /* Size of file. */
+ uint64_t pgoff; /* Not used. */
+ uint32_t maj; /* Not used. */
+ uint32_t min; /* Not used. */
+ uint32_t ino; /* Not used. */
+ char prot[PROT_SZ]; /* Not used. */
+ char fname[PATH_MAX]; /* File name. */
+};
+
+/*
+ * Locate the file containing QEMU's memory space and
+ * map it to our address space.
+ */
+static int
+host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
+ pid_t pid, uint64_t addr)
+{
+ struct dirent *dptr = NULL;
+ struct procmap procmap;
+ DIR *dp = NULL;
+ int fd;
+ int i;
+ char memfile[PATH_MAX];
+ char mapfile[PATH_MAX];
+ char procdir[PATH_MAX];
+ char resolved_path[PATH_MAX];
+ char *path = NULL;
+ FILE *fmap;
+ void *map;
+ uint8_t found = 0;
+ char line[BUFSIZE];
+ char dlm[] = "- : ";
+ char *str, *sp, *in[PROCMAP_SZ];
+ char *end = NULL;
+
+ /* Path where mem files are located. */
+ snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
+ /* Maps file used to locate mem file. */
+ snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
+
+ fmap = fopen(mapfile, "r");
+ if (fmap == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to open maps file for pid %d\n",
+ dev->device_fh, pid);
+ return -1;
+ }
+
+ /* Read through maps file until we find out base_address. */
+ while (fgets(line, BUFSIZE, fmap) != 0) {
+ str = line;
+ errno = 0;
+ /* Split line into fields. */
+ for (i = 0; i < PROCMAP_SZ; i++) {
+ in[i] = strtok_r(str, &dlm[i], &sp);
+ if ((in[i] == NULL) || (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+ str = NULL;
+ }
+
+ /* Convert/Copy each field as needed. */
+ procmap.va_start = strtoull(in[0], &end, 16);
+ if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.len = strtoull(in[1], &end, 16);
+ if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.pgoff = strtoull(in[3], &end, 16);
+ if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.maj = strtoul(in[4], &end, 16);
+ if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.min = strtoul(in[5], &end, 16);
+ if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ procmap.ino = strtoul(in[6], &end, 16);
+ if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
+ (errno != 0)) {
+ fclose(fmap);
+ return -1;
+ }
+
+ memcpy(&procmap.prot, in[2], PROT_SZ);
+ memcpy(&procmap.fname, in[7], PATH_MAX);
+
+ if (procmap.va_start == addr) {
+ procmap.len = procmap.len - procmap.va_start;
+ found = 1;
+ break;
+ }
+ }
+ fclose(fmap);
+
+ if (!found) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to find memory file in pid %d maps file\n",
+ dev->device_fh, pid);
+ return -1;
+ }
+
+ /* Find the guest memory file among the process fds. */
+ dp = opendir(procdir);
+ if (dp == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Cannot open pid %d process directory\n",
+ dev->device_fh, pid);
+ return -1;
+ }
+
+ found = 0;
+
+ /* Read the fd directory contents. */
+ while (NULL != (dptr = readdir(dp))) {
+ snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
+ pid, dptr->d_name);
+ path = realpath(memfile, resolved_path);
+ if ((path == NULL) && (strlen(resolved_path) == 0)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to resolve fd directory\n",
+ dev->device_fh);
+ closedir(dp);
+ return -1;
+ }
+ if (strncmp(resolved_path, procmap.fname,
+ strnlen(procmap.fname, PATH_MAX)) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ closedir(dp);
+
+ if (found == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to find memory file for pid %d\n",
+ dev->device_fh, pid);
+ return -1;
+ }
+ /* Open the shared memory file and map the memory into this process. */
+ fd = open(memfile, O_RDWR);
+
+ if (fd == -1) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to open %s for pid %d\n",
+ dev->device_fh, memfile, pid);
+ return -1;
+ }
+
+ map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
+ MAP_POPULATE|MAP_SHARED, fd, 0);
+ close(fd);
+
+ if (map == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Error mapping the file %s for pid %d\n",
+ dev->device_fh, memfile, pid);
+ return -1;
+ }
+
+ /* Store the memory address and size in the device data structure */
+ mem->mapped_address = (uint64_t)(uintptr_t)map;
+ mem->mapped_size = procmap.len;
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
+ dev->device_fh,
+ memfile, resolved_path,
+ (unsigned long long)mem->mapped_size, map);
+
+ return 0;
+}
+
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 07/14] host_memory_map
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (5 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 06/14] copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 08/14] split set_memory_table into two parts Huawei Xie
` (7 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 43 +++++++++++++--------------
1 file changed, 20 insertions(+), 23 deletions(-)
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index fbfc403..58ac3dd 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -75,8 +75,8 @@ struct procmap {
* map it to our address space.
*/
static int
-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
- pid_t pid, uint64_t addr)
+host_memory_map(pid_t pid, uint64_t addr,
+ uint64_t *mapped_address, uint64_t *mapped_size)
{
struct dirent *dptr = NULL;
struct procmap procmap;
@@ -104,8 +104,8 @@ host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
fmap = fopen(mapfile, "r");
if (fmap == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to open maps file for pid %d\n",
- dev->device_fh, pid);
+ "Failed to open maps file for pid %d\n",
+ pid);
return -1;
}
@@ -179,8 +179,8 @@ host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
if (!found) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find memory file in pid %d maps file\n",
- dev->device_fh, pid);
+ "Failed to find memory file in pid %d maps file\n",
+ pid);
return -1;
}
@@ -188,8 +188,8 @@ host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
dp = opendir(procdir);
if (dp == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Cannot open pid %d process directory\n",
- dev->device_fh, pid);
+ "Cannot open pid %d process directory\n",
+ pid);
return -1;
}
@@ -202,8 +202,7 @@ host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
path = realpath(memfile, resolved_path);
if ((path == NULL) && (strlen(resolved_path) == 0)) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to resolve fd directory\n",
- dev->device_fh);
+ "Failed to resolve fd directory\n");
closedir(dp);
return -1;
}
@@ -218,8 +217,8 @@ host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
if (found == 0) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find memory file for pid %d\n",
- dev->device_fh, pid);
+ "Failed to find memory file for pid %d\n",
+ pid);
return -1;
}
/* Open the shared memory file and map the memory into this process. */
@@ -227,32 +226,30 @@ host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
if (fd == -1) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to open %s for pid %d\n",
- dev->device_fh, memfile, pid);
+ "Failed to open %s for pid %d\n",
+ memfile, pid);
return -1;
}
map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
- MAP_POPULATE|MAP_SHARED, fd, 0);
+ MAP_POPULATE|MAP_SHARED, fd, 0);
close(fd);
if (map == MAP_FAILED) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Error mapping the file %s for pid %d\n",
- dev->device_fh, memfile, pid);
+ "Error mapping the file %s for pid %d\n",
+ memfile, pid);
return -1;
}
/* Store the memory address and size in the device data structure */
- mem->mapped_address = (uint64_t)(uintptr_t)map;
- mem->mapped_size = procmap.len;
+ *mapped_address = (uint64_t)(uintptr_t)map;
+ *mapped_size = procmap.len;
LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
- dev->device_fh,
+ "Mem File: %s->%s - Size: %llu - VA: %p\n",
memfile, resolved_path,
- (unsigned long long)mem->mapped_size, map);
+ (unsigned long long)*mapped_size, map);
return 0;
}
-
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 08/14] split set_memory_table into two parts
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (6 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 07/14] host_memory_map Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 09/14] add select based event driven fd management logic Huawei Xie
` (6 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/Makefile | 2 +-
lib/librte_vhost/vhost-net.h | 5 +-
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 7 +-
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 85 +++++++
lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 45 ++++
lib/librte_vhost/virtio-net.c | 306 +-------------------------
6 files changed, 145 insertions(+), 305 deletions(-)
create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 0b2f08f..e0d0ef6 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -37,7 +37,7 @@ LIB = librte_vhost.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
LDFLAGS += -lfuse
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 03a5c57..11737cc 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,6 +41,8 @@
#include <rte_log.h>
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
/* Macros for printing using RTE_LOG */
#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
@@ -92,7 +94,8 @@ struct vhost_net_device_ops {
int (*get_features)(struct vhost_device_ctx, uint64_t *);
int (*set_features)(struct vhost_device_ctx, uint64_t *);
- int (*set_mem_table)(struct vhost_device_ctx, const void *, uint32_t);
+ int (*set_mem_table)(struct vhost_device_ctx,
+ const struct virtio_memory_regions *, uint32_t nregions);
int (*set_vring_num)(struct vhost_device_ctx, struct vhost_vring_state *);
int (*set_vring_addr)(struct vhost_device_ctx, struct vhost_vring_addr *);
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 802c7dc..2ddd6e0 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -46,6 +46,7 @@
#include <rte_string_fns.h>
#include <rte_virtio_net.h>
+#include "virtio-net-cdev.h"
#include "vhost-net.h"
#include "eventfd_link/eventfd_link.h"
@@ -60,7 +61,7 @@ static const char default_cdev[] = "vhost-net";
static const char eventfd_cdev[] = "/dev/eventfd-link";
static struct fuse_session *session;
-static struct vhost_net_device_ops const *ops;
+struct vhost_net_device_ops const *ops;
/*
* Returns vhost_device_ctx from given fuse_req_t. The index is populated later
@@ -291,8 +292,8 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
break;
default:
- result = ops->set_mem_table(ctx,
- in_buf, mem_temp.nregions);
+ result = cuse_set_mem_table(ctx, in_buf,
+ mem_temp.nregions);
if (result)
fuse_reply_err(req, EINVAL);
else
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index 58ac3dd..edcbc10 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -47,7 +47,11 @@
#include <rte_log.h>
+#include "rte_virtio_net.h"
#include "vhost-net.h"
+#include "virtio-net-cdev.h"
+
+extern struct vhost_net_device_ops const *ops;
/* Line size for reading maps file. */
static const uint32_t BUFSIZE = PATH_MAX;
@@ -253,3 +257,84 @@ host_memory_map(pid_t pid, uint64_t addr,
return 0;
}
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx,
+ const struct vhost_memory *mem_regions_addr, uint32_t nregions)
+{
+ uint64_t size = offsetof(struct vhost_memory, regions);
+ uint32_t idx, valid_regions;
+ struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+ struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
+ ((uint64_t)(uintptr_t)mem_regions_addr + size);
+ uint64_t base_address = 0, mapped_address, mapped_size;
+
+ for (idx = 0; idx < nregions; idx++) {
+ regions[idx].guest_phys_address =
+ mem_regions[idx].guest_phys_addr;
+ regions[idx].guest_phys_address_end =
+ regions[idx].guest_phys_address +
+ mem_regions[idx].memory_size;
+ regions[idx].memory_size =
+ mem_regions[idx].memory_size;
+ regions[idx].userspace_address =
+ mem_regions[idx].userspace_addr;
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)regions[idx].guest_phys_address,
+ (void *)(uintptr_t)regions[idx].userspace_address,
+ regions[idx].memory_size);
+
+ /*set the base address mapping*/
+ if (regions[idx].guest_phys_address == 0x0) {
+ base_address =
+ regions[idx].userspace_address;
+ /* Map VM memory file */
+ if (host_memory_map(ctx.pid, base_address,
+ &mapped_address, &mapped_size) != 0) {
+ return -1;
+ }
+ }
+ }
+
+ /* Check that we have a valid base address. */
+ if (base_address == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to find base address of qemu memory file.\n");
+ return -1;
+ }
+
+ valid_regions = nregions;
+ for (idx = 0; idx < nregions; idx++) {
+ if ((regions[idx].userspace_address < base_address) ||
+ (regions[idx].userspace_address >
+ (base_address + mapped_size)))
+ valid_regions--;
+ }
+
+ if (valid_regions != nregions) {
+ valid_regions = 0;
+ for (idx = nregions; 0 != idx--; ) {
+ if ((regions[idx].userspace_address < base_address) ||
+ (regions[idx].userspace_address >
+ (base_address + mapped_size))) {
+ memmove(®ions[idx], ®ions[idx + 1],
+ sizeof(struct virtio_memory_regions) *
+ valid_regions);
+ } else
+ valid_regions++;
+ }
+ }
+
+ for (idx = 0; idx < valid_regions; idx++) {
+ regions[idx].address_offset =
+ mapped_address - base_address +
+ regions[idx].userspace_address -
+ regions[idx].guest_phys_address;
+ }
+
+ ops->set_mem_table(ctx, ®ions[0], valid_regions);
+ return 0;
+}
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
new file mode 100644
index 0000000..5ee81b1
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
@@ -0,0 +1,45 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VIRTIO_NET_CDEV_H
+#define _VIRTIO_NET_CDEV_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "vhost-net.h"
+
+int
+cuse_set_mem_table(struct vhost_device_ctx ctx,
+ const struct vhost_memory *mem_regions_addr, uint32_t nregions);
+
+#endif
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index da9e3a6..57a5801 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -31,8 +31,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <dirent.h>
-#include <fuse/cuse_lowlevel.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
#include <stddef.h>
@@ -72,26 +70,6 @@ static struct virtio_net_config_ll *ll_root;
(1ULL << VIRTIO_NET_F_CTRL_RX))
static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES;
-/* Line size for reading maps file. */
-static const uint32_t BUFSIZE = PATH_MAX;
-
-/* Size of prot char array in procmap. */
-#define PROT_SZ 5
-
-/* Number of elements in procmap struct. */
-#define PROCMAP_SZ 8
-
-/* Structure containing information gathered from maps file. */
-struct procmap {
- uint64_t va_start; /* Start virtual address in file. */
- uint64_t len; /* Size of file. */
- uint64_t pgoff; /* Not used. */
- uint32_t maj; /* Not used. */
- uint32_t min; /* Not used. */
- uint32_t ino; /* Not used. */
- char prot[PROT_SZ]; /* Not used. */
- char fname[PATH_MAX]; /* File name. */
-};
/*
* Converts QEMU virtual address to Vhost virtual address. This function is
@@ -118,191 +96,6 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
return vhost_va;
}
-/*
- * Locate the file containing QEMU's memory space and
- * map it to our address space.
- */
-static int
-host_memory_map(struct virtio_net *dev, struct virtio_memory *mem,
- pid_t pid, uint64_t addr)
-{
- struct dirent *dptr = NULL;
- struct procmap procmap;
- DIR *dp = NULL;
- int fd;
- int i;
- char memfile[PATH_MAX];
- char mapfile[PATH_MAX];
- char procdir[PATH_MAX];
- char resolved_path[PATH_MAX];
- char *path = NULL;
- FILE *fmap;
- void *map;
- uint8_t found = 0;
- char line[BUFSIZE];
- char dlm[] = "- : ";
- char *str, *sp, *in[PROCMAP_SZ];
- char *end = NULL;
-
- /* Path where mem files are located. */
- snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid);
- /* Maps file used to locate mem file. */
- snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid);
-
- fmap = fopen(mapfile, "r");
- if (fmap == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to open maps file for pid %d\n",
- dev->device_fh, pid);
- return -1;
- }
-
- /* Read through maps file until we find out base_address. */
- while (fgets(line, BUFSIZE, fmap) != 0) {
- str = line;
- errno = 0;
- /* Split line into fields. */
- for (i = 0; i < PROCMAP_SZ; i++) {
- in[i] = strtok_r(str, &dlm[i], &sp);
- if ((in[i] == NULL) || (errno != 0)) {
- fclose(fmap);
- return -1;
- }
- str = NULL;
- }
-
- /* Convert/Copy each field as needed. */
- procmap.va_start = strtoull(in[0], &end, 16);
- if ((in[0] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.len = strtoull(in[1], &end, 16);
- if ((in[1] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.pgoff = strtoull(in[3], &end, 16);
- if ((in[3] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.maj = strtoul(in[4], &end, 16);
- if ((in[4] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.min = strtoul(in[5], &end, 16);
- if ((in[5] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- procmap.ino = strtoul(in[6], &end, 16);
- if ((in[6] == '\0') || (end == NULL) || (*end != '\0') ||
- (errno != 0)) {
- fclose(fmap);
- return -1;
- }
-
- memcpy(&procmap.prot, in[2], PROT_SZ);
- memcpy(&procmap.fname, in[7], PATH_MAX);
-
- if (procmap.va_start == addr) {
- procmap.len = procmap.len - procmap.va_start;
- found = 1;
- break;
- }
- }
- fclose(fmap);
-
- if (!found) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find memory file in pid %d maps file\n",
- dev->device_fh, pid);
- return -1;
- }
-
- /* Find the guest memory file among the process fds. */
- dp = opendir(procdir);
- if (dp == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Cannot open pid %d process directory\n",
- dev->device_fh, pid);
- return -1;
- }
-
- found = 0;
-
- /* Read the fd directory contents. */
- while (NULL != (dptr = readdir(dp))) {
- snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s",
- pid, dptr->d_name);
- path = realpath(memfile, resolved_path);
- if ((path == NULL) && (strlen(resolved_path) == 0)) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to resolve fd directory\n",
- dev->device_fh);
- closedir(dp);
- return -1;
- }
- if (strncmp(resolved_path, procmap.fname,
- strnlen(procmap.fname, PATH_MAX)) == 0) {
- found = 1;
- break;
- }
- }
-
- closedir(dp);
-
- if (found == 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to find memory file for pid %d\n",
- dev->device_fh, pid);
- return -1;
- }
- /* Open the shared memory file and map the memory into this process. */
- fd = open(memfile, O_RDWR);
-
- if (fd == -1) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Failed to open %s for pid %d\n",
- dev->device_fh, memfile, pid);
- return -1;
- }
-
- map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE,
- MAP_POPULATE|MAP_SHARED, fd, 0);
- close(fd);
-
- if (map == MAP_FAILED) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") Error mapping the file %s for pid %d\n",
- dev->device_fh, memfile, pid);
- return -1;
- }
-
- /* Store the memory address and size in the device data structure */
- mem->mapped_address = (uint64_t)(uintptr_t)map;
- mem->mapped_size = procmap.len;
-
- LOG_DEBUG(VHOST_CONFIG,
- "(%"PRIu64") Mem File: %s->%s - Size: %llu - VA: %p\n",
- dev->device_fh,
- memfile, resolved_path,
- (unsigned long long)mem->mapped_size, map);
-
- return 0;
-}
/*
* Retrieves an entry from the devices configuration linked list.
@@ -651,14 +444,12 @@ set_features(struct vhost_device_ctx ctx, uint64_t *pu)
* This includes storing offsets used to translate buffer addresses.
*/
static int
-set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
- uint32_t nregions)
+set_mem_table(struct vhost_device_ctx ctx,
+ const struct virtio_memory_regions *regions, uint32_t nregions)
{
struct virtio_net *dev;
- struct vhost_memory_region *mem_regions;
struct virtio_memory *mem;
- uint64_t size = offsetof(struct vhost_memory, regions);
- uint32_t regionidx, valid_regions;
+ uint32_t regionidx;
dev = get_device(ctx);
if (dev == NULL)
@@ -682,104 +473,19 @@ set_mem_table(struct vhost_device_ctx ctx, const void *mem_regions_addr,
mem->nregions = nregions;
- mem_regions = (void *)(uintptr_t)
- ((uint64_t)(uintptr_t)mem_regions_addr + size);
-
for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
/* Populate the region structure for each region. */
- mem->regions[regionidx].guest_phys_address =
- mem_regions[regionidx].guest_phys_addr;
- mem->regions[regionidx].guest_phys_address_end =
- mem->regions[regionidx].guest_phys_address +
- mem_regions[regionidx].memory_size;
- mem->regions[regionidx].memory_size =
- mem_regions[regionidx].memory_size;
- mem->regions[regionidx].userspace_address =
- mem_regions[regionidx].userspace_addr;
-
- LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") REGION: %u - GPA: %p - QEMU VA: %p - SIZE (%"PRIu64")\n", dev->device_fh,
- regionidx,
- (void *)(uintptr_t)mem->regions[regionidx].guest_phys_address,
- (void *)(uintptr_t)mem->regions[regionidx].userspace_address,
- mem->regions[regionidx].memory_size);
-
- /*set the base address mapping*/
+ mem->regions[regionidx] = regions[regionidx];
if (mem->regions[regionidx].guest_phys_address == 0x0) {
mem->base_address =
mem->regions[regionidx].userspace_address;
- /* Map VM memory file */
- if (host_memory_map(dev, mem, ctx.pid,
- mem->base_address) != 0) {
- free(mem);
- return -1;
- }
+ mem->mapped_address =
+ mem->regions[regionidx].address_offset;
}
}
- /* Check that we have a valid base address. */
- if (mem->base_address == 0) {
- RTE_LOG(ERR, VHOST_CONFIG, "(%"PRIu64") Failed to find base address of qemu memory file.\n", dev->device_fh);
- free(mem);
- return -1;
- }
-
- /*
- * Check if all of our regions have valid mappings.
- * Usually one does not exist in the QEMU memory file.
- */
- valid_regions = mem->nregions;
- for (regionidx = 0; regionidx < mem->nregions; regionidx++) {
- if ((mem->regions[regionidx].userspace_address <
- mem->base_address) ||
- (mem->regions[regionidx].userspace_address >
- (mem->base_address + mem->mapped_size)))
- valid_regions--;
- }
-
- /*
- * If a region does not have a valid mapping,
- * we rebuild our memory struct to contain only valid entries.
- */
- if (valid_regions != mem->nregions) {
- LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") Not all memory regions exist in the QEMU mem file. Re-populating mem structure\n",
- dev->device_fh);
-
- /*
- * Re-populate the memory structure with only valid regions.
- * Invalid regions are over-written with memmove.
- */
- valid_regions = 0;
-
- for (regionidx = mem->nregions; 0 != regionidx--;) {
- if ((mem->regions[regionidx].userspace_address <
- mem->base_address) ||
- (mem->regions[regionidx].userspace_address >
- (mem->base_address + mem->mapped_size))) {
- memmove(&mem->regions[regionidx],
- &mem->regions[regionidx + 1],
- sizeof(struct virtio_memory_regions) *
- valid_regions);
- } else {
- valid_regions++;
- }
- }
- }
- mem->nregions = valid_regions;
dev->mem = mem;
- /*
- * Calculate the address offset for each region.
- * This offset is used to identify the vhost virtual address
- * corresponding to a QEMU guest physical address.
- */
- for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
- dev->mem->regions[regionidx].address_offset =
- dev->mem->regions[regionidx].userspace_address -
- dev->mem->base_address +
- dev->mem->mapped_address -
- dev->mem->regions[regionidx].guest_phys_address;
-
- }
return 0;
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 09/14] add select based event driven fd management logic
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (7 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 08/14] split set_memory_table into two parts Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 10/14] vhost user support Huawei Xie
` (5 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
for generic event driver processing, refer to:
http://libevent.org/
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_user/fd_man.c | 207 +++++++++++++++++++++++++++++++++++
lib/librte_vhost/vhost_user/fd_man.h | 64 +++++++++++
2 files changed, 271 insertions(+)
create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
create mode 100644 lib/librte_vhost/vhost_user/fd_man.h
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
new file mode 100644
index 0000000..09187e0
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -0,0 +1,207 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <rte_log.h>
+
+#include "fd_man.h"
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * If fd is -1, it means to search for a free entry.
+ * @return
+ * index for the fd, or -1 if fd isn't in the fdset.
+ */
+static int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ if (pfdset == NULL)
+ return -1;
+
+ for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
+ ;
+
+ return i == MAX_FDS ? -1 : i;
+}
+
+static int
+fdset_find_free_slot(struct fdset *pfdset)
+{
+ return fdset_find_fd(pfdset, -1);
+}
+
+static void
+fdset_add_fd(struct fdset *pfdset, int idx, int fd,
+ fd_cb rcb, fd_cb wcb, uint64_t dat)
+{
+ struct fdentry *pfdentry;
+
+ if (pfdset == NULL || idx >= MAX_FDS)
+ return;
+
+ pfdentry = &pfdset->fd[idx];
+ pfdentry->fd = fd;
+ pfdentry->rcb = rcb;
+ pfdentry->wcb = wcb;
+ pfdentry->dat = dat;
+}
+
+/**
+ * Fill the read/write fd_set with the fds in the fdset.
+ * @return
+ * the maximum fds filled in the read/write fd_set.
+ */
+static int
+fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
+{
+ struct fdentry *pfdentry;
+ int i, maxfds = -1;
+ int num = MAX_FDS;
+
+ if (pfdset == NULL)
+ return -1;
+
+ for (i = 0; i < num; i++) {
+ pfdentry = &pfdset->fd[i];
+ if (pfdentry->fd != -1) {
+ int added = 0;
+ if (pfdentry->rcb && rfset) {
+ FD_SET(pfdentry->fd, rfset);
+ added = 1;
+ }
+ if (pfdentry->wcb && wfset) {
+ FD_SET(pfdentry->fd, wfset);
+ added = 1;
+ }
+ if (added)
+ maxfds = pfdentry->fd < maxfds ?
+ maxfds : pfdentry->fd;
+ }
+ }
+ return maxfds;
+}
+
+void
+fdset_init(struct fdset *pfdset)
+{
+ int i;
+
+ if (pfdset == NULL)
+ return;
+
+ for (i = 0; i < MAX_FDS; i++)
+ pfdset->fd[i].fd = -1;
+ pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
+{
+ int i;
+
+ if (pfdset == NULL || fd == -1)
+ return -1;
+
+ /* Find a free slot in the list. */
+ i = fdset_find_free_slot(pfdset);
+ if (i == -1)
+ return -2;
+
+ fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+ pfdset->num++;
+
+ return 0;
+}
+
+/**
+ * Unregister the fd from the fdset.
+ */
+void
+fdset_del(struct fdset *pfdset, int fd)
+{
+ int i;
+
+ i = fdset_find_fd(pfdset, fd);
+ if (i != -1 && fd != -1) {
+ pfdset->fd[i].fd = -1;
+ pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+ pfdset->num--;
+ }
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ */
+void
+fdset_event_dispatch(struct fdset *pfdset)
+{
+ fd_set rfds, wfds;
+ int i, maxfds;
+ struct fdentry *pfdentry;
+ int num = MAX_FDS;
+
+ if (pfdset == NULL)
+ return;
+
+ while (1) {
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ maxfds = fdset_fill(&rfds, &wfds, pfdset);
+ if (maxfds == -1)
+ return;
+
+ select(maxfds + 1, &rfds, &wfds, NULL, NULL);
+
+ for (i = 0; i < num; i++) {
+ pfdentry = &pfdset->fd[i];
+ if (FD_ISSET(pfdentry->fd, &rfds) && pfdentry->rcb)
+ pfdentry->rcb(pfdentry->fd, pfdentry->dat);
+ if (FD_ISSET(pfdentry->fd, &wfds) && pfdentry->wcb)
+ pfdentry->wcb(pfdentry->fd, pfdentry->dat);
+ }
+ }
+}
diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h
new file mode 100644
index 0000000..26b4619
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/fd_man.h
@@ -0,0 +1,64 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FD_MAN_H_
+#define _FD_MAN_H_
+#include <stdint.h>
+
+#define MAX_FDS 1024
+
+typedef void (*fd_cb)(int fd, void *dat);
+
+struct fdentry {
+ int fd; /* -1 indicates this entry is empty */
+ fd_cb rcb; /* callback when this fd is readable. */
+ fd_cb wcb; /* callback when this fd is writeable.*/
+ void *dat; /* fd context */
+};
+
+struct fdset {
+ struct fdentry fd[MAX_FDS];
+ int num; /* current fd number of this fdset */
+};
+
+
+void fdset_init(struct fdset *pfdset);
+
+int fdset_add(struct fdset *pfdset, int fd,
+ fd_cb rcb, fd_cb wcb, void *dat);
+
+void fdset_del(struct fdset *pfdset, int fd);
+
+void fdset_event_dispatch(struct fdset *pfdset);
+
+#endif
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 10/14] vhost user support
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (8 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 09/14] add select based event driven fd management logic Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-28 13:34 ` [Qemu-devel] [dpdk-dev] " Michael S. Tsirkin
2015-01-26 3:20 ` [RFC PATCH v2 11/14] vhost user memory region map Huawei Xie
` (4 subsequent siblings)
14 siblings, 1 reply; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/Makefile | 5 +-
lib/librte_vhost/vhost-net.h | 4 +
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
lib/librte_vhost/vhost_user/fd_man.c | 4 +-
lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
lib/librte_vhost/virtio-net.c | 26 +-
lib/librte_vhost/virtio-net.h | 43 +++
10 files changed, 865 insertions(+), 18 deletions(-)
create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
create mode 100644 lib/librte_vhost/virtio-net.h
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index e0d0ef6..b2f14a0 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_vhost.a
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
LDFLAGS += -lfuse
# all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
# install includes
SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 11737cc..3f18f25 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -41,8 +41,12 @@
#include <rte_log.h>
+#include "rte_virtio_net.h"
+
#define VHOST_MEMORY_MAX_NREGIONS 8
+extern struct vhost_net_device_ops const *ops;
+
/* Macros for printing using RTE_LOG */
#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index edcbc10..1d2c403 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -50,8 +50,7 @@
#include "rte_virtio_net.h"
#include "vhost-net.h"
#include "virtio-net-cdev.h"
-
-extern struct vhost_net_device_ops const *ops;
+#include "virtio-net.h"
/* Line size for reading maps file. */
static const uint32_t BUFSIZE = PATH_MAX;
@@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
((uint64_t)(uintptr_t)mem_regions_addr + size);
uint64_t base_address = 0, mapped_address, mapped_size;
+ struct virtio_net *dev;
for (idx = 0; idx < nregions; idx++) {
regions[idx].guest_phys_address =
@@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
regions[idx].guest_phys_address;
}
+ dev = get_device(ctx);
+ if (dev && dev->mem && dev->mem->mapped_address) {
+ munmap((void *)(uintptr_t)dev->mem->mapped_address,
+ (size_t)dev->mem->mapped_size);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
+
ops->set_mem_table(ctx, ®ions[0], valid_regions);
return 0;
}
diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
index 09187e0..0d2beb9 100644
--- a/lib/librte_vhost/vhost_user/fd_man.c
+++ b/lib/librte_vhost/vhost_user/fd_man.c
@@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
static void
fdset_add_fd(struct fdset *pfdset, int idx, int fd,
- fd_cb rcb, fd_cb wcb, uint64_t dat)
+ fd_cb rcb, fd_cb wcb, void *dat)
{
struct fdentry *pfdentry;
@@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
* Register the fd in the fdset with read/write handler and context.
*/
int
-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
{
int i;
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
new file mode 100644
index 0000000..c84fd3b
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -0,0 +1,428 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include <rte_log.h>
+#include <rte_virtio_net.h>
+
+#include "fd_man.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+#include "virtio-net-user.h"
+
+static void vserver_new_vq_conn(int fd, void *data);
+static void vserver_message_handler(int fd, void *dat);
+struct vhost_net_device_ops const *ops;
+
+static struct vhost_server *g_vhost_server;
+
+static const char *vhost_message_str[VHOST_USER_MAX] = {
+ [VHOST_USER_NONE] = "VHOST_USER_NONE",
+ [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
+ [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
+ [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
+ [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
+ [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
+ [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
+ [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
+ [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
+ [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
+ [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
+ [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
+ [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
+ [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
+ [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
+};
+
+/**
+ * Create a unix domain socket, bind to path and listen for connection.
+ * @return
+ * socket fd or -1 on failure
+ */
+static int
+uds_socket(const char *path)
+{
+ struct sockaddr_un un;
+ int sockfd;
+ int ret;
+
+ if (path == NULL)
+ return -1;
+
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0)
+ return -1;
+ RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+ ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
+ if (ret == -1)
+ goto err;
+ RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
+
+ ret = listen(sockfd, 1);
+ if (ret == -1)
+ goto err;
+
+ return sockfd;
+
+err:
+ close(sockfd);
+ return -1;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+ struct iovec iov;
+ struct msghdr msgh = { 0 };
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ ret = recvmsg(sockfd, &msgh, 0);
+ if (ret <= 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
+ return ret;
+ }
+
+ if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
+ return -1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ memcpy(fds, CMSG_DATA(cmsg), fdsize);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* return bytes# of read on success or negative val on failure. */
+static int
+read_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
+ msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+ if (ret <= 0)
+ return ret;
+
+ if (msg && msg->size) {
+ if (msg->size > sizeof(msg->payload)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid msg size: %d\n", msg->size);
+ return -1;
+ }
+ ret = read(sockfd, &msg->payload, msg->size);
+ if (ret <= 0)
+ return ret;
+ if (ret != (int)msg->size) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "read control message failed\n");
+ return -1;
+ }
+ }
+
+ return ret;
+}
+
+static int
+send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+{
+
+ struct iovec iov;
+ struct msghdr msgh = { 0 };
+ size_t fdsize = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fdsize)];
+ struct cmsghdr *cmsg;
+ int ret;
+
+ iov.iov_base = buf;
+ iov.iov_len = buflen;
+
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+
+ if (fds && fd_num > 0) {
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_len = CMSG_LEN(fdsize);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fdsize);
+ } else {
+ msgh.msg_control = NULL;
+ msgh.msg_controllen = 0;
+ }
+
+ do {
+ ret = sendmsg(sockfd, &msgh, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
+{
+ int ret;
+
+ if (!msg)
+ return 0;
+
+ msg->flags &= ~VHOST_USER_VERSION_MASK;
+ msg->flags |= VHOST_USER_VERSION;
+ msg->flags |= VHOST_USER_REPLY_MASK;
+
+ ret = send_fd_message(sockfd, (char *)msg,
+ VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
+
+ return ret;
+}
+
+/* call back when there is new virtio connection. */
+static void
+vserver_new_vq_conn(int fd, void *dat)
+{
+ struct vhost_server *vserver = (struct vhost_server *)dat;
+ int conn_fd;
+ int fh;
+ struct vhost_device_ctx vdev_ctx = { 0 };
+
+ conn_fd = accept(fd, NULL, NULL);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "new virtio connection is %d\n", conn_fd);
+ if (conn_fd < 0)
+ return;
+
+ fh = ops->new_device(vdev_ctx);
+ if (fh == -1) {
+ close(conn_fd);
+ return;
+ }
+ RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
+
+ fdset_add(&vserver->fdset,
+ conn_fd, vserver_message_handler, NULL, (void *)fh);
+}
+
+/* callback when there is message on the connfd */
+static void
+vserver_message_handler(int connfd, void *dat)
+{
+ struct vhost_device_ctx ctx;
+ uint32_t fh = (uint32_t)dat;
+ struct VhostUserMsg msg;
+ uint64_t features;
+ int ret;
+
+ ctx.fh = fh;
+ ret = read_vhost_message(connfd, &msg);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read message failed\n");
+
+ close(connfd);
+ fdset_del(&g_vhost_server->fdset, connfd);
+ ops->destroy_device(ctx);
+
+ return;
+ } else if (ret == 0) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vhost peer closed\n");
+
+ close(connfd);
+ fdset_del(&g_vhost_server->fdset, connfd);
+ ops->destroy_device(ctx);
+
+ return;
+ }
+ if (msg.request > VHOST_USER_MAX) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "vhost read incorrect message\n");
+
+ close(connfd);
+ fdset_del(&g_vhost_server->fdset, connfd);
+
+ return;
+ }
+
+ RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
+ vhost_message_str[msg.request]);
+ switch (msg.request) {
+ case VHOST_USER_GET_FEATURES:
+ ret = ops->get_features(ctx, &features);
+ msg.payload.u64 = features;
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
+ case VHOST_USER_SET_FEATURES:
+ features = msg.payload.u64;
+ ops->set_features(ctx, &features);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ ops->set_owner(ctx);
+ break;
+ case VHOST_USER_RESET_OWNER:
+ ops->reset_owner(ctx);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ user_set_mem_table(ctx, &msg);
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+ case VHOST_USER_SET_LOG_FD:
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ ops->set_vring_num(ctx, &msg.payload.state);
+ break;
+ case VHOST_USER_SET_VRING_ADDR:
+ ops->set_vring_addr(ctx, &msg.payload.addr);
+ break;
+ case VHOST_USER_SET_VRING_BASE:
+ ops->set_vring_base(ctx, &msg.payload.state);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ ret = user_get_vring_base(ctx, &msg.payload.state);
+ msg.size = sizeof(msg.payload.state);
+ send_vhost_message(connfd, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ user_set_vring_kick(ctx, &msg);
+ break;
+ case VHOST_USER_SET_VRING_CALL:
+ user_set_vring_call(ctx, &msg);
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+ close(msg.fds[0]);
+ RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+ break;
+
+ default:
+ break;
+
+ }
+}
+
+
+/**
+ * Creates and initialise the vhost server.
+ */
+int
+rte_vhost_driver_register(const char *path)
+{
+
+ struct vhost_server *vserver;
+
+ if (g_vhost_server != NULL)
+ return -1;
+
+ vserver = calloc(sizeof(struct vhost_server), 1);
+ if (vserver == NULL)
+ return -1;
+
+ fdset_init(&vserver->fdset);
+
+ unlink(path);
+
+ vserver->listenfd = uds_socket(path);
+ if (vserver->listenfd < 0) {
+ free(vserver);
+ return -1;
+ }
+ vserver->path = path;
+
+ fdset_add(&vserver->fdset, vserver->listenfd,
+ vserver_new_vq_conn, NULL,
+ vserver);
+
+ ops = get_virtio_net_callbacks();
+
+ g_vhost_server = vserver;
+
+ return 0;
+}
+
+
+int
+rte_vhost_driver_session_start(void)
+{
+ fdset_event_dispatch(&g_vhost_server->fdset);
+ return 0;
+}
+
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
new file mode 100644
index 0000000..7e6cda4
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -0,0 +1,108 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VHOST_NET_USER_H
+#define _VHOST_NET_USER_H
+
+#include <stdint.h>
+#include <linux/vhost.h>
+
+#include "fd_man.h"
+
+struct vhost_server {
+ const char *path; /**< The path the uds is bind to. */
+ int listenfd; /**< The listener sockfd. */
+ struct fdset fdset; /**< The fd list this vhost server manages. */
+};
+
+/* refer to hw/virtio/vhost-user.c */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ uint64_t guest_phys_addr;
+ uint64_t memory_size;
+ uint64_t userspace_addr;
+ uint64_t mmap_offset;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ uint32_t nregions;
+ uint32_t padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1 << 2)
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+#define VHOST_USER_VRING_IDX_MASK (0xff)
+#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
+ uint64_t u64;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ VhostUserMemory memory;
+ } payload;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute((packed)) VhostUserMsg;
+
+#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION (0x1)
+
+/*****************************************************************************/
+#endif
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
new file mode 100644
index 0000000..6601fcd
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -0,0 +1,205 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+
+#include "virtio-net.h"
+#include "virtio-net-user.h"
+#include "vhost-net-user.h"
+#include "vhost-net.h"
+
+int
+user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ unsigned int idx;
+ struct VhostUserMemory memory = pmsg->payload.memory;
+ struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
+ uint64_t mapped_address, base_address = 0;
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ if (memory.regions[idx].guest_phys_addr == 0)
+ base_address = memory.regions[idx].userspace_addr;
+ }
+ if (base_address == 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "couldn't find the mem region whose GPA is 0.\n");
+ return -1;
+ }
+
+ for (idx = 0; idx < memory.nregions; idx++) {
+ regions[idx].guest_phys_address =
+ memory.regions[idx].guest_phys_addr;
+ regions[idx].guest_phys_address_end =
+ memory.regions[idx].guest_phys_addr +
+ memory.regions[idx].memory_size;
+ regions[idx].memory_size = memory.regions[idx].memory_size;
+ regions[idx].userspace_address =
+ memory.regions[idx].userspace_addr;
+
+ /* This is ugly */
+ mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
+ regions[idx].memory_size +
+ memory.regions[idx].mmap_offset,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ pmsg->fds[idx],
+ 0);
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "mapped region %d to %p\n",
+ idx, (void *)mapped_address);
+
+ if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "mmap qemu guest failed.\n");
+ return -1;
+ }
+
+ mapped_address += memory.regions[idx].mmap_offset;
+
+ regions[idx].address_offset = mapped_address -
+ regions[idx].guest_phys_address;
+ LOG_DEBUG(VHOST_CONFIG,
+ "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
+ idx,
+ (void *)(uintptr_t)regions[idx].guest_phys_address,
+ (void *)(uintptr_t)regions[idx].userspace_address,
+ regions[idx].memory_size);
+ }
+ ops->set_mem_table(ctx, regions, memory.nregions);
+ return 0;
+}
+
+
+static int
+virtio_is_ready(struct virtio_net *dev)
+{
+ struct vhost_virtqueue *rvq, *tvq;
+
+ /* mq support in future.*/
+ rvq = dev->virtqueue[VIRTIO_RXQ];
+ tvq = dev->virtqueue[VIRTIO_TXQ];
+ if (rvq && tvq && rvq->desc && tvq->desc &&
+ (rvq->kickfd != (eventfd_t)-1) &&
+ (rvq->callfd != (eventfd_t)-1) &&
+ (tvq->kickfd != (eventfd_t)-1) &&
+ (tvq->callfd != (eventfd_t)-1)) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio is now ready for processing.\n");
+ return 1;
+ }
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "virtio isn't ready for processing.\n");
+ return 0;
+}
+
+void
+user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = -1;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring call idx:%d file:%d\n", file.index, file.fd);
+ ops->set_vring_call(ctx, &file);
+}
+
+
+/*
+ * In vhost-user, when we receive kick message, will test whether virtio
+ * device is ready for packet processing.
+ */
+void
+user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
+{
+ struct vhost_vring_file file;
+ struct virtio_net *dev = get_device(ctx);
+
+ file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+ if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+ file.fd = -1;
+ else
+ file.fd = pmsg->fds[0];
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring kick idx:%d file:%d\n", file.index, file.fd);
+ ops->set_vring_kick(ctx, &file);
+
+ if (virtio_is_ready(dev) &&
+ !(dev->flags & VIRTIO_DEV_RUNNING))
+ notify_ops->new_device(dev);
+
+}
+
+/*
+ * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
+ */
+int
+user_get_vring_base(struct vhost_device_ctx ctx,
+ struct vhost_vring_state *state)
+{
+ struct virtio_net *dev = get_device(ctx);
+
+ /* We have to stop the queue (virtio) if it is running. */
+ if (dev->flags & VIRTIO_DEV_RUNNING)
+ notify_ops->destroy_device(dev);
+
+ /* Here we are safe to get the last used index */
+ ops->get_vring_base(ctx, state->index, state);
+
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "vring base idx:%d file:%d\n", state->index, state->num);
+ /*
+ * Based on current qemu vhost-user implementation, this message is
+ * sent and only sent in vhost_vring_stop.
+ * TODO: cleanup the vring, it isn't usable since here.
+ */
+ if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
+ close(dev->virtqueue[VIRTIO_RXQ]->callfd);
+ dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
+ }
+ if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
+ close(dev->virtqueue[VIRTIO_TXQ]->callfd);
+ dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
+ }
+
+ return 0;
+
+}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
new file mode 100644
index 0000000..0f6a75a
--- /dev/null
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -0,0 +1,48 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_USER_H
+#define _VIRTIO_NET_USER_H
+
+#include "vhost-net.h"
+#include "vhost-net-user.h"
+
+int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
+
+void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
+
+int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+
+#endif
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 57a5801..c458ed9 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -50,6 +50,7 @@
#include <rte_virtio_net.h>
#include "vhost-net.h"
+#include "virtio-net.h"
/*
* Device linked list structure for configuration.
@@ -60,7 +61,7 @@ struct virtio_net_config_ll {
};
/* device ops to add/remove device to/from data core. */
-static struct virtio_net_device_ops const *notify_ops;
+struct virtio_net_device_ops const *notify_ops;
/* root address of the linked list of managed virtio devices */
static struct virtio_net_config_ll *ll_root;
@@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
if ((qemu_va >= region->userspace_address) &&
(qemu_va <= region->userspace_address +
region->memory_size)) {
- vhost_va = dev->mem->mapped_address + qemu_va -
- dev->mem->base_address;
+ vhost_va = qemu_va + region->guest_phys_address +
+ region->address_offset -
+ region->userspace_address;
break;
}
}
@@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
* Searches the configuration core linked list and
* retrieves the device if it exists.
*/
-static struct virtio_net *
+struct virtio_net *
get_device(struct vhost_device_ctx ctx)
{
struct virtio_net_config_ll *ll_dev;
@@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
+ dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
+ dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
+ dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
+ dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
+
/* Backends are set to -1 indicating an inactive device. */
dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
@@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
if (dev == NULL)
return -1;
- if (dev->mem) {
- munmap((void *)(uintptr_t)dev->mem->mapped_address,
- (size_t)dev->mem->mapped_size);
- free(dev->mem);
- }
-
/* Malloc the memory structure depending on the number of regions. */
mem = calloc(1, sizeof(struct virtio_memory) +
(sizeof(struct virtio_memory_regions) * nregions));
@@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
/* file->index refers to the queue index. The txq is 1, rxq is 0. */
vq = dev->virtqueue[file->index];
- if (vq->kickfd)
+ if ((int)vq->kickfd >= 0)
close((int)vq->kickfd);
vq->kickfd = file->fd;
@@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
/* file->index refers to the queue index. The txq is 1, rxq is 0. */
vq = dev->virtqueue[file->index];
- if (vq->callfd)
+ if ((int)vq->callfd >= 0)
close((int)vq->callfd);
+
vq->callfd = file->fd;
return 0;
diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
new file mode 100644
index 0000000..75fb57e
--- /dev/null
+++ b/lib/librte_vhost/virtio-net.h
@@ -0,0 +1,43 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_H
+#define _VIRTIO_NET_H
+
+#include "vhost-net.h"
+#include "rte_virtio_net.h"
+
+struct virtio_net_device_ops const *notify_ops;
+struct virtio_net *get_device(struct vhost_device_ctx ctx);
+
+#endif
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support
2015-01-26 3:20 ` [RFC PATCH v2 10/14] vhost user support Huawei Xie
@ 2015-01-28 13:34 ` Michael S. Tsirkin
2015-01-28 14:27 ` Nikolay Nikolaev
0 siblings, 1 reply; 21+ messages in thread
From: Michael S. Tsirkin @ 2015-01-28 13:34 UTC (permalink / raw)
To: Huawei Xie; +Cc: fbl, qemu-devel, n.nikolaev
I had to drop the dpdk mailing list from Cc.
Added qemu mailing list, please copy patches there
in the future.
On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote:
>
> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Overall, I think it's a reasonable implementation.
Some comments below:
> ---
> lib/librte_vhost/Makefile | 5 +-
> lib/librte_vhost/vhost-net.h | 4 +
> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
> lib/librte_vhost/vhost_user/fd_man.c | 4 +-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
> lib/librte_vhost/virtio-net.c | 26 +-
> lib/librte_vhost/virtio-net.h | 43 +++
> 10 files changed, 865 insertions(+), 18 deletions(-)
> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
> create mode 100644 lib/librte_vhost/virtio-net.h
>
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index e0d0ef6..b2f14a0 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
> # library name
> LIB = librte_vhost.a
>
> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> LDFLAGS += -lfuse
> # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
>
> # install includes
> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> index 11737cc..3f18f25 100644
> --- a/lib/librte_vhost/vhost-net.h
> +++ b/lib/librte_vhost/vhost-net.h
> @@ -41,8 +41,12 @@
>
> #include <rte_log.h>
>
> +#include "rte_virtio_net.h"
> +
> #define VHOST_MEMORY_MAX_NREGIONS 8
>
> +extern struct vhost_net_device_ops const *ops;
> +
> /* Macros for printing using RTE_LOG */
> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> index edcbc10..1d2c403 100644
> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> @@ -50,8 +50,7 @@
> #include "rte_virtio_net.h"
> #include "vhost-net.h"
> #include "virtio-net-cdev.h"
> -
> -extern struct vhost_net_device_ops const *ops;
> +#include "virtio-net.h"
>
> /* Line size for reading maps file. */
> static const uint32_t BUFSIZE = PATH_MAX;
> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
> ((uint64_t)(uintptr_t)mem_regions_addr + size);
> uint64_t base_address = 0, mapped_address, mapped_size;
> + struct virtio_net *dev;
>
> for (idx = 0; idx < nregions; idx++) {
> regions[idx].guest_phys_address =
> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> regions[idx].guest_phys_address;
> }
>
> + dev = get_device(ctx);
> + if (dev && dev->mem && dev->mem->mapped_address) {
> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
> + (size_t)dev->mem->mapped_size);
> + free(dev->mem);
> + dev->mem = NULL;
> + }
> +
> ops->set_mem_table(ctx, ®ions[0], valid_regions);
> return 0;
> }
> diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
> index 09187e0..0d2beb9 100644
> --- a/lib/librte_vhost/vhost_user/fd_man.c
> +++ b/lib/librte_vhost/vhost_user/fd_man.c
> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
>
> static void
> fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> - fd_cb rcb, fd_cb wcb, uint64_t dat)
> + fd_cb rcb, fd_cb wcb, void *dat)
> {
> struct fdentry *pfdentry;
>
> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
> * Register the fd in the fdset with read/write handler and context.
> */
> int
> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> {
> int i;
>
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> new file mode 100644
> index 0000000..c84fd3b
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -0,0 +1,428 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +#include <rte_virtio_net.h>
> +
> +#include "fd_man.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +#include "virtio-net-user.h"
> +
> +static void vserver_new_vq_conn(int fd, void *data);
> +static void vserver_message_handler(int fd, void *dat);
> +struct vhost_net_device_ops const *ops;
> +
> +static struct vhost_server *g_vhost_server;
> +
> +static const char *vhost_message_str[VHOST_USER_MAX] = {
> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
> +};
> +
> +/**
> + * Create a unix domain socket, bind to path and listen for connection.
> + * @return
> + * socket fd or -1 on failure
> + */
> +static int
> +uds_socket(const char *path)
> +{
> + struct sockaddr_un un;
> + int sockfd;
> + int ret;
> +
> + if (path == NULL)
> + return -1;
> +
> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (sockfd < 0)
> + return -1;
> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> +
> + memset(&un, 0, sizeof(un));
> + un.sun_family = AF_UNIX;
> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> + if (ret == -1)
> + goto err;
> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> +
> + ret = listen(sockfd, 1);
> + if (ret == -1)
> + goto err;
> +
> + return sockfd;
> +
> +err:
> + close(sockfd);
> + return -1;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> + struct iovec iov;
> + struct msghdr msgh = { 0 };
> + size_t fdsize = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fdsize)];
> + struct cmsghdr *cmsg;
> + int ret;
> +
> + iov.iov_base = buf;
> + iov.iov_len = buflen;
> +
> + msgh.msg_iov = &iov;
> + msgh.msg_iovlen = 1;
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> +
> + ret = recvmsg(sockfd, &msgh, 0);
> + if (ret <= 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
> + return ret;
> + }
> +
> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
> + return -1;
> + }
> +
> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> + if ((cmsg->cmsg_level == SOL_SOCKET) &&
> + (cmsg->cmsg_type == SCM_RIGHTS)) {
> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
> + break;
> + }
> + }
> +
> + return ret;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> + int ret;
> +
> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> + if (ret <= 0)
> + return ret;
> +
> + if (msg && msg->size) {
> + if (msg->size > sizeof(msg->payload)) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "invalid msg size: %d\n", msg->size);
> + return -1;
> + }
> + ret = read(sockfd, &msg->payload, msg->size);
> + if (ret <= 0)
> + return ret;
> + if (ret != (int)msg->size) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "read control message failed\n");
> + return -1;
> + }
> + }
> +
> + return ret;
> +}
> +
> +static int
> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> + struct iovec iov;
> + struct msghdr msgh = { 0 };
> + size_t fdsize = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fdsize)];
> + struct cmsghdr *cmsg;
> + int ret;
> +
> + iov.iov_base = buf;
> + iov.iov_len = buflen;
> +
> + msgh.msg_iov = &iov;
> + msgh.msg_iovlen = 1;
> +
> + if (fds && fd_num > 0) {
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> + cmsg = CMSG_FIRSTHDR(&msgh);
> + cmsg->cmsg_len = CMSG_LEN(fdsize);
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
> + } else {
> + msgh.msg_control = NULL;
> + msgh.msg_controllen = 0;
> + }
> +
> + do {
> + ret = sendmsg(sockfd, &msgh, 0);
> + } while (ret < 0 && errno == EINTR);
> +
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
> + return ret;
> + }
> +
> + return ret;
> +}
> +
> +static int
> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> + int ret;
> +
> + if (!msg)
> + return 0;
> +
> + msg->flags &= ~VHOST_USER_VERSION_MASK;
> + msg->flags |= VHOST_USER_VERSION;
> + msg->flags |= VHOST_USER_REPLY_MASK;
> +
> + ret = send_fd_message(sockfd, (char *)msg,
> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> +
> + return ret;
> +}
> +
> +/* call back when there is new virtio connection. */
> +static void
> +vserver_new_vq_conn(int fd, void *dat)
> +{
> + struct vhost_server *vserver = (struct vhost_server *)dat;
> + int conn_fd;
> + int fh;
> + struct vhost_device_ctx vdev_ctx = { 0 };
> +
> + conn_fd = accept(fd, NULL, NULL);
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "new virtio connection is %d\n", conn_fd);
> + if (conn_fd < 0)
> + return;
> +
> + fh = ops->new_device(vdev_ctx);
> + if (fh == -1) {
> + close(conn_fd);
> + return;
> + }
> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> +
> + fdset_add(&vserver->fdset,
> + conn_fd, vserver_message_handler, NULL, (void *)fh);
> +}
> +
> +/* callback when there is message on the connfd */
> +static void
> +vserver_message_handler(int connfd, void *dat)
> +{
> + struct vhost_device_ctx ctx;
> + uint32_t fh = (uint32_t)dat;
> + struct VhostUserMsg msg;
> + uint64_t features;
> + int ret;
> +
> + ctx.fh = fh;
> + ret = read_vhost_message(connfd, &msg);
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "vhost read message failed\n");
> +
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> + ops->destroy_device(ctx);
> +
> + return;
> + } else if (ret == 0) {
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vhost peer closed\n");
> +
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> + ops->destroy_device(ctx);
> +
> + return;
> + }
> + if (msg.request > VHOST_USER_MAX) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "vhost read incorrect message\n");
> +
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> +
> + return;
> + }
> +
> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> + vhost_message_str[msg.request]);
> + switch (msg.request) {
> + case VHOST_USER_GET_FEATURES:
> + ret = ops->get_features(ctx, &features);
> + msg.payload.u64 = features;
> + msg.size = sizeof(msg.payload.u64);
> + send_vhost_message(connfd, &msg);
What if this fails (e.g. remote died)?
How will everything be cleaned up?
> + break;
> + case VHOST_USER_SET_FEATURES:
> + features = msg.payload.u64;
> + ops->set_features(ctx, &features);
> + break;
> +
> + case VHOST_USER_SET_OWNER:
> + ops->set_owner(ctx);
> + break;
> + case VHOST_USER_RESET_OWNER:
> + ops->reset_owner(ctx);
> + break;
> +
> + case VHOST_USER_SET_MEM_TABLE:
> + user_set_mem_table(ctx, &msg);
> + break;
> +
> + case VHOST_USER_SET_LOG_BASE:
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> + case VHOST_USER_SET_LOG_FD:
> + close(msg.fds[0]);
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> + break;
> +
> + case VHOST_USER_SET_VRING_NUM:
> + ops->set_vring_num(ctx, &msg.payload.state);
> + break;
> + case VHOST_USER_SET_VRING_ADDR:
> + ops->set_vring_addr(ctx, &msg.payload.addr);
> + break;
> + case VHOST_USER_SET_VRING_BASE:
> + ops->set_vring_base(ctx, &msg.payload.state);
> + break;
> +
> + case VHOST_USER_GET_VRING_BASE:
> + ret = user_get_vring_base(ctx, &msg.payload.state);
> + msg.size = sizeof(msg.payload.state);
> + send_vhost_message(connfd, &msg);
> + break;
> +
> + case VHOST_USER_SET_VRING_KICK:
> + user_set_vring_kick(ctx, &msg);
> + break;
> + case VHOST_USER_SET_VRING_CALL:
> + user_set_vring_call(ctx, &msg);
> + break;
> +
> + case VHOST_USER_SET_VRING_ERR:
> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
> + close(msg.fds[0]);
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> + break;
> +
> + default:
> + break;
> +
> + }
> +}
> +
> +
> +/**
> + * Creates and initialise the vhost server.
> + */
> +int
> +rte_vhost_driver_register(const char *path)
> +{
> +
> + struct vhost_server *vserver;
> +
> + if (g_vhost_server != NULL)
> + return -1;
> +
> + vserver = calloc(sizeof(struct vhost_server), 1);
> + if (vserver == NULL)
> + return -1;
> +
> + fdset_init(&vserver->fdset);
> +
> + unlink(path);
> +
> + vserver->listenfd = uds_socket(path);
> + if (vserver->listenfd < 0) {
> + free(vserver);
> + return -1;
> + }
> + vserver->path = path;
> +
> + fdset_add(&vserver->fdset, vserver->listenfd,
> + vserver_new_vq_conn, NULL,
> + vserver);
> +
> + ops = get_virtio_net_callbacks();
> +
> + g_vhost_server = vserver;
> +
> + return 0;
> +}
> +
> +
> +int
> +rte_vhost_driver_session_start(void)
> +{
> + fdset_event_dispatch(&g_vhost_server->fdset);
> + return 0;
> +}
> +
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> new file mode 100644
> index 0000000..7e6cda4
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> @@ -0,0 +1,108 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "fd_man.h"
> +
> +struct vhost_server {
> + const char *path; /**< The path the uds is bind to. */
> + int listenfd; /**< The listener sockfd. */
> + struct fdset fdset; /**< The fd list this vhost server manages. */
> +};
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +typedef enum VhostUserRequest {
> + VHOST_USER_NONE = 0,
> + VHOST_USER_GET_FEATURES = 1,
> + VHOST_USER_SET_FEATURES = 2,
> + VHOST_USER_SET_OWNER = 3,
> + VHOST_USER_RESET_OWNER = 4,
> + VHOST_USER_SET_MEM_TABLE = 5,
> + VHOST_USER_SET_LOG_BASE = 6,
> + VHOST_USER_SET_LOG_FD = 7,
> + VHOST_USER_SET_VRING_NUM = 8,
> + VHOST_USER_SET_VRING_ADDR = 9,
> + VHOST_USER_SET_VRING_BASE = 10,
> + VHOST_USER_GET_VRING_BASE = 11,
> + VHOST_USER_SET_VRING_KICK = 12,
> + VHOST_USER_SET_VRING_CALL = 13,
> + VHOST_USER_SET_VRING_ERR = 14,
> + VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> + uint64_t guest_phys_addr;
> + uint64_t memory_size;
> + uint64_t userspace_addr;
> + uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> + uint32_t nregions;
> + uint32_t padding;
> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> + VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK (0x3)
> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
> + uint32_t flags;
> + uint32_t size; /* the following payload size */
> + union {
> +#define VHOST_USER_VRING_IDX_MASK (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> + uint64_t u64;
> + struct vhost_vring_state state;
> + struct vhost_vring_addr addr;
> + VhostUserMemory memory;
> + } payload;
> + int fds[VHOST_MEMORY_MAX_NREGIONS];
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION (0x1)
> +
> +/*****************************************************************************/
> +#endif
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> new file mode 100644
> index 0000000..6601fcd
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -0,0 +1,205 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <rte_log.h>
> +
> +#include "virtio-net.h"
> +#include "virtio-net-user.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + unsigned int idx;
> + struct VhostUserMemory memory = pmsg->payload.memory;
> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> + uint64_t mapped_address, base_address = 0;
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + if (memory.regions[idx].guest_phys_addr == 0)
> + base_address = memory.regions[idx].userspace_addr;
> + }
> + if (base_address == 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "couldn't find the mem region whose GPA is 0.\n");
> + return -1;
> + }
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + regions[idx].guest_phys_address =
> + memory.regions[idx].guest_phys_addr;
> + regions[idx].guest_phys_address_end =
> + memory.regions[idx].guest_phys_addr +
> + memory.regions[idx].memory_size;
> + regions[idx].memory_size = memory.regions[idx].memory_size;
> + regions[idx].userspace_address =
> + memory.regions[idx].userspace_addr;
> +
> + /* This is ugly */
> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> + regions[idx].memory_size +
> + memory.regions[idx].mmap_offset,
> + PROT_READ | PROT_WRITE, MAP_SHARED,
> + pmsg->fds[idx],
> + 0);
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "mapped region %d to %p\n",
> + idx, (void *)mapped_address);
> +
> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "mmap qemu guest failed.\n");
> + return -1;
> + }
> +
> + mapped_address += memory.regions[idx].mmap_offset;
> +
> + regions[idx].address_offset = mapped_address -
> + regions[idx].guest_phys_address;
> + LOG_DEBUG(VHOST_CONFIG,
> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> + idx,
> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> + (void *)(uintptr_t)regions[idx].userspace_address,
> + regions[idx].memory_size);
> + }
> + ops->set_mem_table(ctx, regions, memory.nregions);
> + return 0;
> +}
> +
> +
> +static int
> +virtio_is_ready(struct virtio_net *dev)
> +{
> + struct vhost_virtqueue *rvq, *tvq;
> +
> + /* mq support in future.*/
> + rvq = dev->virtqueue[VIRTIO_RXQ];
> + tvq = dev->virtqueue[VIRTIO_TXQ];
> + if (rvq && tvq && rvq->desc && tvq->desc &&
> + (rvq->kickfd != (eventfd_t)-1) &&
> + (rvq->callfd != (eventfd_t)-1) &&
> + (tvq->kickfd != (eventfd_t)-1) &&
> + (tvq->callfd != (eventfd_t)-1)) {
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "virtio is now ready for processing.\n");
> + return 1;
> + }
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "virtio isn't ready for processing.\n");
> + return 0;
> +}
> +
> +void
> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + struct vhost_vring_file file;
> +
> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> + file.fd = -1;
> + else
> + file.fd = pmsg->fds[0];
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring call idx:%d file:%d\n", file.index, file.fd);
> + ops->set_vring_call(ctx, &file);
> +}
> +
> +
> +/*
> + * In vhost-user, when we receive kick message, will test whether virtio
> + * device is ready for packet processing.
> + */
> +void
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + struct vhost_vring_file file;
> + struct virtio_net *dev = get_device(ctx);
> +
> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> + file.fd = -1;
> + else
> + file.fd = pmsg->fds[0];
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring kick idx:%d file:%d\n", file.index, file.fd);
> + ops->set_vring_kick(ctx, &file);
> +
> + if (virtio_is_ready(dev) &&
> + !(dev->flags & VIRTIO_DEV_RUNNING))
> + notify_ops->new_device(dev);
> +
> +}
> +
> +/*
> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
> + */
> +int
> +user_get_vring_base(struct vhost_device_ctx ctx,
> + struct vhost_vring_state *state)
> +{
> + struct virtio_net *dev = get_device(ctx);
> +
> + /* We have to stop the queue (virtio) if it is running. */
> + if (dev->flags & VIRTIO_DEV_RUNNING)
> + notify_ops->destroy_device(dev);
> +
> + /* Here we are safe to get the last used index */
> + ops->get_vring_base(ctx, state->index, state);
> +
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring base idx:%d file:%d\n", state->index, state->num);
> + /*
> + * Based on current qemu vhost-user implementation, this message is
> + * sent and only sent in vhost_vring_stop.
> + * TODO: cleanup the vring, it isn't usable since here.
> + */
Please don't tie yourself to a current qemu implementation. Please just
extend qemu to send explicit start/stop messages.
You'll need to negotiate the new capabilities.
Nikolay, it seems that version field is only 2 bits.
how can we extend it cleanly?
Perhaps, add a new GET_POTOCOL message for exchanging vhost user
specific bits, then remote should set a high version bit to let qemu
know it's supported?
> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
> + close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> + }
> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
> + close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> + }
> +
> + return 0;
> +
> +}
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> new file mode 100644
> index 0000000..0f6a75a
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -0,0 +1,48 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_USER_H
> +#define _VIRTIO_NET_USER_H
> +
> +#include "vhost-net.h"
> +#include "vhost-net-user.h"
> +
> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> +
> +#endif
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index 57a5801..c458ed9 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -50,6 +50,7 @@
> #include <rte_virtio_net.h>
>
> #include "vhost-net.h"
> +#include "virtio-net.h"
>
> /*
> * Device linked list structure for configuration.
> @@ -60,7 +61,7 @@ struct virtio_net_config_ll {
> };
>
> /* device ops to add/remove device to/from data core. */
> -static struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net_device_ops const *notify_ops;
> /* root address of the linked list of managed virtio devices */
> static struct virtio_net_config_ll *ll_root;
>
> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
> if ((qemu_va >= region->userspace_address) &&
> (qemu_va <= region->userspace_address +
> region->memory_size)) {
> - vhost_va = dev->mem->mapped_address + qemu_va -
> - dev->mem->base_address;
> + vhost_va = qemu_va + region->guest_phys_address +
> + region->address_offset -
> + region->userspace_address;
> break;
> }
> }
> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
> * Searches the configuration core linked list and
> * retrieves the device if it exists.
> */
> -static struct virtio_net *
> +struct virtio_net *
> get_device(struct vhost_device_ctx ctx)
> {
> struct virtio_net_config_ll *ll_dev;
> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>
> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> +
> /* Backends are set to -1 indicating an inactive device. */
> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
> if (dev == NULL)
> return -1;
>
> - if (dev->mem) {
> - munmap((void *)(uintptr_t)dev->mem->mapped_address,
> - (size_t)dev->mem->mapped_size);
> - free(dev->mem);
> - }
> -
> /* Malloc the memory structure depending on the number of regions. */
> mem = calloc(1, sizeof(struct virtio_memory) +
> (sizeof(struct virtio_memory_regions) * nregions));
> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> vq = dev->virtqueue[file->index];
>
> - if (vq->kickfd)
> + if ((int)vq->kickfd >= 0)
> close((int)vq->kickfd);
>
> vq->kickfd = file->fd;
> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> vq = dev->virtqueue[file->index];
>
> - if (vq->callfd)
> + if ((int)vq->callfd >= 0)
> close((int)vq->callfd);
> +
> vq->callfd = file->fd;
>
> return 0;
> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
> new file mode 100644
> index 0000000..75fb57e
> --- /dev/null
> +++ b/lib/librte_vhost/virtio-net.h
> @@ -0,0 +1,43 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_H
> +#define _VIRTIO_NET_H
> +
> +#include "vhost-net.h"
> +#include "rte_virtio_net.h"
> +
> +struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net *get_device(struct vhost_device_ctx ctx);
> +
> +#endif
> --
> 1.8.1.4
>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support
2015-01-28 13:34 ` [Qemu-devel] [dpdk-dev] " Michael S. Tsirkin
@ 2015-01-28 14:27 ` Nikolay Nikolaev
2015-01-28 14:37 ` Michael S. Tsirkin
0 siblings, 1 reply; 21+ messages in thread
From: Nikolay Nikolaev @ 2015-01-28 14:27 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Huawei Xie, fbl, qemu-devel, VirtualOpenSystems Technical Team
On Wed, Jan 28, 2015 at 3:34 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> I had to drop the dpdk mailing list from Cc.
> Added qemu mailing list, please copy patches there
> in the future.
>
> On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote:
>>
>> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
>
> Overall, I think it's a reasonable implementation.
> Some comments below:
>
>> ---
>> lib/librte_vhost/Makefile | 5 +-
>> lib/librte_vhost/vhost-net.h | 4 +
>> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
>> lib/librte_vhost/vhost_user/fd_man.c | 4 +-
>> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
>> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
>> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
>> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
>> lib/librte_vhost/virtio-net.c | 26 +-
>> lib/librte_vhost/virtio-net.h | 43 +++
>> 10 files changed, 865 insertions(+), 18 deletions(-)
>> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
>> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
>> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
>> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
>> create mode 100644 lib/librte_vhost/virtio-net.h
>>
>> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
>> index e0d0ef6..b2f14a0 100644
>> --- a/lib/librte_vhost/Makefile
>> +++ b/lib/librte_vhost/Makefile
>> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
>> # library name
>> LIB = librte_vhost.a
>>
>> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> LDFLAGS += -lfuse
>> # all source are stored in SRCS-y
>> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
>> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
>>
>> # install includes
>> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
>> index 11737cc..3f18f25 100644
>> --- a/lib/librte_vhost/vhost-net.h
>> +++ b/lib/librte_vhost/vhost-net.h
>> @@ -41,8 +41,12 @@
>>
>> #include <rte_log.h>
>>
>> +#include "rte_virtio_net.h"
>> +
>> #define VHOST_MEMORY_MAX_NREGIONS 8
>>
>> +extern struct vhost_net_device_ops const *ops;
>> +
>> /* Macros for printing using RTE_LOG */
>> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
>> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
>> index edcbc10..1d2c403 100644
>> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
>> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
>> @@ -50,8 +50,7 @@
>> #include "rte_virtio_net.h"
>> #include "vhost-net.h"
>> #include "virtio-net-cdev.h"
>> -
>> -extern struct vhost_net_device_ops const *ops;
>> +#include "virtio-net.h"
>>
>> /* Line size for reading maps file. */
>> static const uint32_t BUFSIZE = PATH_MAX;
>> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>> struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>> ((uint64_t)(uintptr_t)mem_regions_addr + size);
>> uint64_t base_address = 0, mapped_address, mapped_size;
>> + struct virtio_net *dev;
>>
>> for (idx = 0; idx < nregions; idx++) {
>> regions[idx].guest_phys_address =
>> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>> regions[idx].guest_phys_address;
>> }
>>
>> + dev = get_device(ctx);
>> + if (dev && dev->mem && dev->mem->mapped_address) {
>> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> + (size_t)dev->mem->mapped_size);
>> + free(dev->mem);
>> + dev->mem = NULL;
>> + }
>> +
>> ops->set_mem_table(ctx, ®ions[0], valid_regions);
>> return 0;
>> }
>> diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
>> index 09187e0..0d2beb9 100644
>> --- a/lib/librte_vhost/vhost_user/fd_man.c
>> +++ b/lib/librte_vhost/vhost_user/fd_man.c
>> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
>>
>> static void
>> fdset_add_fd(struct fdset *pfdset, int idx, int fd,
>> - fd_cb rcb, fd_cb wcb, uint64_t dat)
>> + fd_cb rcb, fd_cb wcb, void *dat)
>> {
>> struct fdentry *pfdentry;
>>
>> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
>> * Register the fd in the fdset with read/write handler and context.
>> */
>> int
>> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
>> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
>> {
>> int i;
>>
>> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
>> new file mode 100644
>> index 0000000..c84fd3b
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
>> @@ -0,0 +1,428 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <limits.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <string.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "fd_man.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +#include "virtio-net-user.h"
>> +
>> +static void vserver_new_vq_conn(int fd, void *data);
>> +static void vserver_message_handler(int fd, void *dat);
>> +struct vhost_net_device_ops const *ops;
>> +
>> +static struct vhost_server *g_vhost_server;
>> +
>> +static const char *vhost_message_str[VHOST_USER_MAX] = {
>> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
>> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
>> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
>> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
>> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
>> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
>> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
>> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
>> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
>> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
>> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
>> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
>> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
>> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
>> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
>> +};
>> +
>> +/**
>> + * Create a unix domain socket, bind to path and listen for connection.
>> + * @return
>> + * socket fd or -1 on failure
>> + */
>> +static int
>> +uds_socket(const char *path)
>> +{
>> + struct sockaddr_un un;
>> + int sockfd;
>> + int ret;
>> +
>> + if (path == NULL)
>> + return -1;
>> +
>> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
>> + if (sockfd < 0)
>> + return -1;
>> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
>> +
>> + memset(&un, 0, sizeof(un));
>> + un.sun_family = AF_UNIX;
>> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
>> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
>> + if (ret == -1)
>> + goto err;
>> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
>> +
>> + ret = listen(sockfd, 1);
>> + if (ret == -1)
>> + goto err;
>> +
>> + return sockfd;
>> +
>> +err:
>> + close(sockfd);
>> + return -1;
>> +}
>> +
>> +/* return bytes# of read on success or negative val on failure. */
>> +static int
>> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> + struct iovec iov;
>> + struct msghdr msgh = { 0 };
>> + size_t fdsize = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fdsize)];
>> + struct cmsghdr *cmsg;
>> + int ret;
>> +
>> + iov.iov_base = buf;
>> + iov.iov_len = buflen;
>> +
>> + msgh.msg_iov = &iov;
>> + msgh.msg_iovlen = 1;
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> +
>> + ret = recvmsg(sockfd, &msgh, 0);
>> + if (ret <= 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
>> + return ret;
>> + }
>> +
>> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
>> + return -1;
>> + }
>> +
>> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
>> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
>> + if ((cmsg->cmsg_level == SOL_SOCKET) &&
>> + (cmsg->cmsg_type == SCM_RIGHTS)) {
>> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
>> + break;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/* return bytes# of read on success or negative val on failure. */
>> +static int
>> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> + int ret;
>> +
>> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
>> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
>> + if (ret <= 0)
>> + return ret;
>> +
>> + if (msg && msg->size) {
>> + if (msg->size > sizeof(msg->payload)) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "invalid msg size: %d\n", msg->size);
>> + return -1;
>> + }
>> + ret = read(sockfd, &msg->payload, msg->size);
>> + if (ret <= 0)
>> + return ret;
>> + if (ret != (int)msg->size) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "read control message failed\n");
>> + return -1;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> + struct iovec iov;
>> + struct msghdr msgh = { 0 };
>> + size_t fdsize = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fdsize)];
>> + struct cmsghdr *cmsg;
>> + int ret;
>> +
>> + iov.iov_base = buf;
>> + iov.iov_len = buflen;
>> +
>> + msgh.msg_iov = &iov;
>> + msgh.msg_iovlen = 1;
>> +
>> + if (fds && fd_num > 0) {
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> + cmsg = CMSG_FIRSTHDR(&msgh);
>> + cmsg->cmsg_len = CMSG_LEN(fdsize);
>> + cmsg->cmsg_level = SOL_SOCKET;
>> + cmsg->cmsg_type = SCM_RIGHTS;
>> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
>> + } else {
>> + msgh.msg_control = NULL;
>> + msgh.msg_controllen = 0;
>> + }
>> +
>> + do {
>> + ret = sendmsg(sockfd, &msgh, 0);
>> + } while (ret < 0 && errno == EINTR);
>> +
>> + if (ret < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
>> + return ret;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> + int ret;
>> +
>> + if (!msg)
>> + return 0;
>> +
>> + msg->flags &= ~VHOST_USER_VERSION_MASK;
>> + msg->flags |= VHOST_USER_VERSION;
>> + msg->flags |= VHOST_USER_REPLY_MASK;
>> +
>> + ret = send_fd_message(sockfd, (char *)msg,
>> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
>> +
>> + return ret;
>> +}
>> +
>> +/* call back when there is new virtio connection. */
>> +static void
>> +vserver_new_vq_conn(int fd, void *dat)
>> +{
>> + struct vhost_server *vserver = (struct vhost_server *)dat;
>> + int conn_fd;
>> + int fh;
>> + struct vhost_device_ctx vdev_ctx = { 0 };
>> +
>> + conn_fd = accept(fd, NULL, NULL);
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "new virtio connection is %d\n", conn_fd);
>> + if (conn_fd < 0)
>> + return;
>> +
>> + fh = ops->new_device(vdev_ctx);
>> + if (fh == -1) {
>> + close(conn_fd);
>> + return;
>> + }
>> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
>> +
>> + fdset_add(&vserver->fdset,
>> + conn_fd, vserver_message_handler, NULL, (void *)fh);
>> +}
>> +
>> +/* callback when there is message on the connfd */
>> +static void
>> +vserver_message_handler(int connfd, void *dat)
>> +{
>> + struct vhost_device_ctx ctx;
>> + uint32_t fh = (uint32_t)dat;
>> + struct VhostUserMsg msg;
>> + uint64_t features;
>> + int ret;
>> +
>> + ctx.fh = fh;
>> + ret = read_vhost_message(connfd, &msg);
>> + if (ret < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "vhost read message failed\n");
>> +
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> + ops->destroy_device(ctx);
>> +
>> + return;
>> + } else if (ret == 0) {
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vhost peer closed\n");
>> +
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> + ops->destroy_device(ctx);
>> +
>> + return;
>> + }
>> + if (msg.request > VHOST_USER_MAX) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "vhost read incorrect message\n");
>> +
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> +
>> + return;
>> + }
>> +
>> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
>> + vhost_message_str[msg.request]);
>> + switch (msg.request) {
>> + case VHOST_USER_GET_FEATURES:
>> + ret = ops->get_features(ctx, &features);
>> + msg.payload.u64 = features;
>> + msg.size = sizeof(msg.payload.u64);
>> + send_vhost_message(connfd, &msg);
>
> What if this fails (e.g. remote died)?
> How will everything be cleaned up?
>
>> + break;
>> + case VHOST_USER_SET_FEATURES:
>> + features = msg.payload.u64;
>> + ops->set_features(ctx, &features);
>> + break;
>> +
>> + case VHOST_USER_SET_OWNER:
>> + ops->set_owner(ctx);
>> + break;
>> + case VHOST_USER_RESET_OWNER:
>> + ops->reset_owner(ctx);
>> + break;
>> +
>> + case VHOST_USER_SET_MEM_TABLE:
>> + user_set_mem_table(ctx, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_LOG_BASE:
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> + case VHOST_USER_SET_LOG_FD:
>> + close(msg.fds[0]);
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_NUM:
>> + ops->set_vring_num(ctx, &msg.payload.state);
>> + break;
>> + case VHOST_USER_SET_VRING_ADDR:
>> + ops->set_vring_addr(ctx, &msg.payload.addr);
>> + break;
>> + case VHOST_USER_SET_VRING_BASE:
>> + ops->set_vring_base(ctx, &msg.payload.state);
>> + break;
>> +
>> + case VHOST_USER_GET_VRING_BASE:
>> + ret = user_get_vring_base(ctx, &msg.payload.state);
>> + msg.size = sizeof(msg.payload.state);
>> + send_vhost_message(connfd, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_KICK:
>> + user_set_vring_kick(ctx, &msg);
>> + break;
>> + case VHOST_USER_SET_VRING_CALL:
>> + user_set_vring_call(ctx, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_ERR:
>> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
>> + close(msg.fds[0]);
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
>> + break;
>> +
>> + default:
>> + break;
>> +
>> + }
>> +}
>> +
>> +
>> +/**
>> + * Creates and initialise the vhost server.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *path)
>> +{
>> +
>> + struct vhost_server *vserver;
>> +
>> + if (g_vhost_server != NULL)
>> + return -1;
>> +
>> + vserver = calloc(sizeof(struct vhost_server), 1);
>> + if (vserver == NULL)
>> + return -1;
>> +
>> + fdset_init(&vserver->fdset);
>> +
>> + unlink(path);
>> +
>> + vserver->listenfd = uds_socket(path);
>> + if (vserver->listenfd < 0) {
>> + free(vserver);
>> + return -1;
>> + }
>> + vserver->path = path;
>> +
>> + fdset_add(&vserver->fdset, vserver->listenfd,
>> + vserver_new_vq_conn, NULL,
>> + vserver);
>> +
>> + ops = get_virtio_net_callbacks();
>> +
>> + g_vhost_server = vserver;
>> +
>> + return 0;
>> +}
>> +
>> +
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> + fdset_event_dispatch(&g_vhost_server->fdset);
>> + return 0;
>> +}
>> +
>> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
>> new file mode 100644
>> index 0000000..7e6cda4
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
>> @@ -0,0 +1,108 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _VHOST_NET_USER_H
>> +#define _VHOST_NET_USER_H
>> +
>> +#include <stdint.h>
>> +#include <linux/vhost.h>
>> +
>> +#include "fd_man.h"
>> +
>> +struct vhost_server {
>> + const char *path; /**< The path the uds is bind to. */
>> + int listenfd; /**< The listener sockfd. */
>> + struct fdset fdset; /**< The fd list this vhost server manages. */
>> +};
>> +
>> +/* refer to hw/virtio/vhost-user.c */
>> +
>> +#define VHOST_MEMORY_MAX_NREGIONS 8
>> +
>> +typedef enum VhostUserRequest {
>> + VHOST_USER_NONE = 0,
>> + VHOST_USER_GET_FEATURES = 1,
>> + VHOST_USER_SET_FEATURES = 2,
>> + VHOST_USER_SET_OWNER = 3,
>> + VHOST_USER_RESET_OWNER = 4,
>> + VHOST_USER_SET_MEM_TABLE = 5,
>> + VHOST_USER_SET_LOG_BASE = 6,
>> + VHOST_USER_SET_LOG_FD = 7,
>> + VHOST_USER_SET_VRING_NUM = 8,
>> + VHOST_USER_SET_VRING_ADDR = 9,
>> + VHOST_USER_SET_VRING_BASE = 10,
>> + VHOST_USER_GET_VRING_BASE = 11,
>> + VHOST_USER_SET_VRING_KICK = 12,
>> + VHOST_USER_SET_VRING_CALL = 13,
>> + VHOST_USER_SET_VRING_ERR = 14,
>> + VHOST_USER_MAX
>> +} VhostUserRequest;
>> +
>> +typedef struct VhostUserMemoryRegion {
>> + uint64_t guest_phys_addr;
>> + uint64_t memory_size;
>> + uint64_t userspace_addr;
>> + uint64_t mmap_offset;
>> +} VhostUserMemoryRegion;
>> +
>> +typedef struct VhostUserMemory {
>> + uint32_t nregions;
>> + uint32_t padding;
>> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>> +} VhostUserMemory;
>> +
>> +typedef struct VhostUserMsg {
>> + VhostUserRequest request;
>> +
>> +#define VHOST_USER_VERSION_MASK (0x3)
>> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
>> + uint32_t flags;
>> + uint32_t size; /* the following payload size */
>> + union {
>> +#define VHOST_USER_VRING_IDX_MASK (0xff)
>> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
>> + uint64_t u64;
>> + struct vhost_vring_state state;
>> + struct vhost_vring_addr addr;
>> + VhostUserMemory memory;
>> + } payload;
>> + int fds[VHOST_MEMORY_MAX_NREGIONS];
>> +} __attribute((packed)) VhostUserMsg;
>> +
>> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
>> +
>> +/* The version of the protocol we support */
>> +#define VHOST_USER_VERSION (0x1)
>> +
>> +/*****************************************************************************/
>> +#endif
>> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
>> new file mode 100644
>> index 0000000..6601fcd
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
>> @@ -0,0 +1,205 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "virtio-net.h"
>> +#include "virtio-net-user.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + unsigned int idx;
>> + struct VhostUserMemory memory = pmsg->payload.memory;
>> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> + uint64_t mapped_address, base_address = 0;
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + if (memory.regions[idx].guest_phys_addr == 0)
>> + base_address = memory.regions[idx].userspace_addr;
>> + }
>> + if (base_address == 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "couldn't find the mem region whose GPA is 0.\n");
>> + return -1;
>> + }
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + regions[idx].guest_phys_address =
>> + memory.regions[idx].guest_phys_addr;
>> + regions[idx].guest_phys_address_end =
>> + memory.regions[idx].guest_phys_addr +
>> + memory.regions[idx].memory_size;
>> + regions[idx].memory_size = memory.regions[idx].memory_size;
>> + regions[idx].userspace_address =
>> + memory.regions[idx].userspace_addr;
>> +
>> + /* This is ugly */
>> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
>> + regions[idx].memory_size +
>> + memory.regions[idx].mmap_offset,
>> + PROT_READ | PROT_WRITE, MAP_SHARED,
>> + pmsg->fds[idx],
>> + 0);
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "mapped region %d to %p\n",
>> + idx, (void *)mapped_address);
>> +
>> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "mmap qemu guest failed.\n");
>> + return -1;
>> + }
>> +
>> + mapped_address += memory.regions[idx].mmap_offset;
>> +
>> + regions[idx].address_offset = mapped_address -
>> + regions[idx].guest_phys_address;
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
>> + idx,
>> + (void *)(uintptr_t)regions[idx].guest_phys_address,
>> + (void *)(uintptr_t)regions[idx].userspace_address,
>> + regions[idx].memory_size);
>> + }
>> + ops->set_mem_table(ctx, regions, memory.nregions);
>> + return 0;
>> +}
>> +
>> +
>> +static int
>> +virtio_is_ready(struct virtio_net *dev)
>> +{
>> + struct vhost_virtqueue *rvq, *tvq;
>> +
>> + /* mq support in future.*/
>> + rvq = dev->virtqueue[VIRTIO_RXQ];
>> + tvq = dev->virtqueue[VIRTIO_TXQ];
>> + if (rvq && tvq && rvq->desc && tvq->desc &&
>> + (rvq->kickfd != (eventfd_t)-1) &&
>> + (rvq->callfd != (eventfd_t)-1) &&
>> + (tvq->kickfd != (eventfd_t)-1) &&
>> + (tvq->callfd != (eventfd_t)-1)) {
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "virtio is now ready for processing.\n");
>> + return 1;
>> + }
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "virtio isn't ready for processing.\n");
>> + return 0;
>> +}
>> +
>> +void
>> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + struct vhost_vring_file file;
>> +
>> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
>> + file.fd = -1;
>> + else
>> + file.fd = pmsg->fds[0];
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring call idx:%d file:%d\n", file.index, file.fd);
>> + ops->set_vring_call(ctx, &file);
>> +}
>> +
>> +
>> +/*
>> + * In vhost-user, when we receive kick message, will test whether virtio
>> + * device is ready for packet processing.
>> + */
>> +void
>> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + struct vhost_vring_file file;
>> + struct virtio_net *dev = get_device(ctx);
>> +
>> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
>> + file.fd = -1;
>> + else
>> + file.fd = pmsg->fds[0];
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring kick idx:%d file:%d\n", file.index, file.fd);
>> + ops->set_vring_kick(ctx, &file);
>> +
>> + if (virtio_is_ready(dev) &&
>> + !(dev->flags & VIRTIO_DEV_RUNNING))
>> + notify_ops->new_device(dev);
>> +
>> +}
>> +
>> +/*
>> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
>> + */
>> +int
>> +user_get_vring_base(struct vhost_device_ctx ctx,
>> + struct vhost_vring_state *state)
>> +{
>> + struct virtio_net *dev = get_device(ctx);
>> +
>> + /* We have to stop the queue (virtio) if it is running. */
>> + if (dev->flags & VIRTIO_DEV_RUNNING)
>> + notify_ops->destroy_device(dev);
>> +
>> + /* Here we are safe to get the last used index */
>> + ops->get_vring_base(ctx, state->index, state);
>> +
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring base idx:%d file:%d\n", state->index, state->num);
>> + /*
>> + * Based on current qemu vhost-user implementation, this message is
>> + * sent and only sent in vhost_vring_stop.
>> + * TODO: cleanup the vring, it isn't usable since here.
>> + */
>
> Please don't tie yourself to a current qemu implementation. Please just
> extend qemu to send explicit start/stop messages.
> You'll need to negotiate the new capabilities.
>
>
> Nikolay, it seems that version field is only 2 bits.
> how can we extend it cleanly?
Will something like this do:
#define VHOST_USER_VERSION_MASK_MAJ (0x3)
#define VHOST_USER_REPLY_MASK (0x1<<2)
#define VHOST_USER_VERSION_MASK_MIN (0xf<<3)
The "major" part of the version will be increased in case of
significant changes in the protocol. And the "minor" part in all other
cases. I guess this will give us enough space for versioning.
regards,
Nikolay Nikolaev
>
> Perhaps, add a new GET_POTOCOL message for exchanging vhost user
> specific bits, then remote should set a high version bit to let qemu
> know it's supported?
>
>
>
>
>> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
>> + close(dev->virtqueue[VIRTIO_RXQ]->callfd);
>> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
>> + }
>> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
>> + close(dev->virtqueue[VIRTIO_TXQ]->callfd);
>> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
>> + }
>> +
>> + return 0;
>> +
>> +}
>> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
>> new file mode 100644
>> index 0000000..0f6a75a
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
>> @@ -0,0 +1,48 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _VIRTIO_NET_USER_H
>> +#define _VIRTIO_NET_USER_H
>> +
>> +#include "vhost-net.h"
>> +#include "vhost-net-user.h"
>> +
>> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>> index 57a5801..c458ed9 100644
>> --- a/lib/librte_vhost/virtio-net.c
>> +++ b/lib/librte_vhost/virtio-net.c
>> @@ -50,6 +50,7 @@
>> #include <rte_virtio_net.h>
>>
>> #include "vhost-net.h"
>> +#include "virtio-net.h"
>>
>> /*
>> * Device linked list structure for configuration.
>> @@ -60,7 +61,7 @@ struct virtio_net_config_ll {
>> };
>>
>> /* device ops to add/remove device to/from data core. */
>> -static struct virtio_net_device_ops const *notify_ops;
>> +struct virtio_net_device_ops const *notify_ops;
>> /* root address of the linked list of managed virtio devices */
>> static struct virtio_net_config_ll *ll_root;
>>
>> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>> if ((qemu_va >= region->userspace_address) &&
>> (qemu_va <= region->userspace_address +
>> region->memory_size)) {
>> - vhost_va = dev->mem->mapped_address + qemu_va -
>> - dev->mem->base_address;
>> + vhost_va = qemu_va + region->guest_phys_address +
>> + region->address_offset -
>> + region->userspace_address;
>> break;
>> }
>> }
>> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
>> * Searches the configuration core linked list and
>> * retrieves the device if it exists.
>> */
>> -static struct virtio_net *
>> +struct virtio_net *
>> get_device(struct vhost_device_ctx ctx)
>> {
>> struct virtio_net_config_ll *ll_dev;
>> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
>> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
>> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>>
>> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
>> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
>> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
>> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
>> +
>> /* Backends are set to -1 indicating an inactive device. */
>> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
>> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
>> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
>> if (dev == NULL)
>> return -1;
>>
>> - if (dev->mem) {
>> - munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> - (size_t)dev->mem->mapped_size);
>> - free(dev->mem);
>> - }
>> -
>> /* Malloc the memory structure depending on the number of regions. */
>> mem = calloc(1, sizeof(struct virtio_memory) +
>> (sizeof(struct virtio_memory_regions) * nregions));
>> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
>> vq = dev->virtqueue[file->index];
>>
>> - if (vq->kickfd)
>> + if ((int)vq->kickfd >= 0)
>> close((int)vq->kickfd);
>>
>> vq->kickfd = file->fd;
>> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
>> vq = dev->virtqueue[file->index];
>>
>> - if (vq->callfd)
>> + if ((int)vq->callfd >= 0)
>> close((int)vq->callfd);
>> +
>> vq->callfd = file->fd;
>>
>> return 0;
>> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
>> new file mode 100644
>> index 0000000..75fb57e
>> --- /dev/null
>> +++ b/lib/librte_vhost/virtio-net.h
>> @@ -0,0 +1,43 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _VIRTIO_NET_H
>> +#define _VIRTIO_NET_H
>> +
>> +#include "vhost-net.h"
>> +#include "rte_virtio_net.h"
>> +
>> +struct virtio_net_device_ops const *notify_ops;
>> +struct virtio_net *get_device(struct vhost_device_ctx ctx);
>> +
>> +#endif
>> --
>> 1.8.1.4
>>
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support
2015-01-28 14:27 ` Nikolay Nikolaev
@ 2015-01-28 14:37 ` Michael S. Tsirkin
0 siblings, 0 replies; 21+ messages in thread
From: Michael S. Tsirkin @ 2015-01-28 14:37 UTC (permalink / raw)
To: Nikolay Nikolaev
Cc: Huawei Xie, fbl, qemu-devel, VirtualOpenSystems Technical Team
On Wed, Jan 28, 2015 at 04:27:35PM +0200, Nikolay Nikolaev wrote:
> On Wed, Jan 28, 2015 at 3:34 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > I had to drop the dpdk mailing list from Cc.
> > Added qemu mailing list, please copy patches there
> > in the future.
> >
> > On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote:
> >>
> >> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
> >
> > Overall, I think it's a reasonable implementation.
> > Some comments below:
> >
> >> ---
> >> lib/librte_vhost/Makefile | 5 +-
> >> lib/librte_vhost/vhost-net.h | 4 +
> >> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
> >> lib/librte_vhost/vhost_user/fd_man.c | 4 +-
> >> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
> >> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
> >> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
> >> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
> >> lib/librte_vhost/virtio-net.c | 26 +-
> >> lib/librte_vhost/virtio-net.h | 43 +++
> >> 10 files changed, 865 insertions(+), 18 deletions(-)
> >> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
> >> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
> >> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
> >> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
> >> create mode 100644 lib/librte_vhost/virtio-net.h
> >>
> >> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> >> index e0d0ef6..b2f14a0 100644
> >> --- a/lib/librte_vhost/Makefile
> >> +++ b/lib/librte_vhost/Makefile
> >> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
> >> # library name
> >> LIB = librte_vhost.a
> >>
> >> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> >> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> >> LDFLAGS += -lfuse
> >> # all source are stored in SRCS-y
> >> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> >> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> >> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
> >>
> >> # install includes
> >> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
> >> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> >> index 11737cc..3f18f25 100644
> >> --- a/lib/librte_vhost/vhost-net.h
> >> +++ b/lib/librte_vhost/vhost-net.h
> >> @@ -41,8 +41,12 @@
> >>
> >> #include <rte_log.h>
> >>
> >> +#include "rte_virtio_net.h"
> >> +
> >> #define VHOST_MEMORY_MAX_NREGIONS 8
> >>
> >> +extern struct vhost_net_device_ops const *ops;
> >> +
> >> /* Macros for printing using RTE_LOG */
> >> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> >> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
> >> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> >> index edcbc10..1d2c403 100644
> >> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> >> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> >> @@ -50,8 +50,7 @@
> >> #include "rte_virtio_net.h"
> >> #include "vhost-net.h"
> >> #include "virtio-net-cdev.h"
> >> -
> >> -extern struct vhost_net_device_ops const *ops;
> >> +#include "virtio-net.h"
> >>
> >> /* Line size for reading maps file. */
> >> static const uint32_t BUFSIZE = PATH_MAX;
> >> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> >> struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
> >> ((uint64_t)(uintptr_t)mem_regions_addr + size);
> >> uint64_t base_address = 0, mapped_address, mapped_size;
> >> + struct virtio_net *dev;
> >>
> >> for (idx = 0; idx < nregions; idx++) {
> >> regions[idx].guest_phys_address =
> >> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> >> regions[idx].guest_phys_address;
> >> }
> >>
> >> + dev = get_device(ctx);
> >> + if (dev && dev->mem && dev->mem->mapped_address) {
> >> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >> + (size_t)dev->mem->mapped_size);
> >> + free(dev->mem);
> >> + dev->mem = NULL;
> >> + }
> >> +
> >> ops->set_mem_table(ctx, ®ions[0], valid_regions);
> >> return 0;
> >> }
> >> diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
> >> index 09187e0..0d2beb9 100644
> >> --- a/lib/librte_vhost/vhost_user/fd_man.c
> >> +++ b/lib/librte_vhost/vhost_user/fd_man.c
> >> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
> >>
> >> static void
> >> fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> >> - fd_cb rcb, fd_cb wcb, uint64_t dat)
> >> + fd_cb rcb, fd_cb wcb, void *dat)
> >> {
> >> struct fdentry *pfdentry;
> >>
> >> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
> >> * Register the fd in the fdset with read/write handler and context.
> >> */
> >> int
> >> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
> >> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> >> {
> >> int i;
> >>
> >> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> >> new file mode 100644
> >> index 0000000..c84fd3b
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> >> @@ -0,0 +1,428 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#include <stdint.h>
> >> +#include <stdio.h>
> >> +#include <limits.h>
> >> +#include <stdlib.h>
> >> +#include <unistd.h>
> >> +#include <string.h>
> >> +#include <sys/types.h>
> >> +#include <sys/socket.h>
> >> +#include <sys/un.h>
> >> +#include <errno.h>
> >> +
> >> +#include <rte_log.h>
> >> +#include <rte_virtio_net.h>
> >> +
> >> +#include "fd_man.h"
> >> +#include "vhost-net-user.h"
> >> +#include "vhost-net.h"
> >> +#include "virtio-net-user.h"
> >> +
> >> +static void vserver_new_vq_conn(int fd, void *data);
> >> +static void vserver_message_handler(int fd, void *dat);
> >> +struct vhost_net_device_ops const *ops;
> >> +
> >> +static struct vhost_server *g_vhost_server;
> >> +
> >> +static const char *vhost_message_str[VHOST_USER_MAX] = {
> >> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
> >> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> >> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> >> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> >> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> >> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> >> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> >> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> >> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> >> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> >> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> >> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> >> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> >> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> >> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
> >> +};
> >> +
> >> +/**
> >> + * Create a unix domain socket, bind to path and listen for connection.
> >> + * @return
> >> + * socket fd or -1 on failure
> >> + */
> >> +static int
> >> +uds_socket(const char *path)
> >> +{
> >> + struct sockaddr_un un;
> >> + int sockfd;
> >> + int ret;
> >> +
> >> + if (path == NULL)
> >> + return -1;
> >> +
> >> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> >> + if (sockfd < 0)
> >> + return -1;
> >> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> >> +
> >> + memset(&un, 0, sizeof(un));
> >> + un.sun_family = AF_UNIX;
> >> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> >> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> >> + if (ret == -1)
> >> + goto err;
> >> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> >> +
> >> + ret = listen(sockfd, 1);
> >> + if (ret == -1)
> >> + goto err;
> >> +
> >> + return sockfd;
> >> +
> >> +err:
> >> + close(sockfd);
> >> + return -1;
> >> +}
> >> +
> >> +/* return bytes# of read on success or negative val on failure. */
> >> +static int
> >> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> >> +{
> >> + struct iovec iov;
> >> + struct msghdr msgh = { 0 };
> >> + size_t fdsize = fd_num * sizeof(int);
> >> + char control[CMSG_SPACE(fdsize)];
> >> + struct cmsghdr *cmsg;
> >> + int ret;
> >> +
> >> + iov.iov_base = buf;
> >> + iov.iov_len = buflen;
> >> +
> >> + msgh.msg_iov = &iov;
> >> + msgh.msg_iovlen = 1;
> >> + msgh.msg_control = control;
> >> + msgh.msg_controllen = sizeof(control);
> >> +
> >> + ret = recvmsg(sockfd, &msgh, 0);
> >> + if (ret <= 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
> >> + return ret;
> >> + }
> >> +
> >> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> >> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
> >> + return -1;
> >> + }
> >> +
> >> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> >> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> >> + if ((cmsg->cmsg_level == SOL_SOCKET) &&
> >> + (cmsg->cmsg_type == SCM_RIGHTS)) {
> >> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
> >> + break;
> >> + }
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +/* return bytes# of read on success or negative val on failure. */
> >> +static int
> >> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> >> +{
> >> + int ret;
> >> +
> >> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> >> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> >> + if (ret <= 0)
> >> + return ret;
> >> +
> >> + if (msg && msg->size) {
> >> + if (msg->size > sizeof(msg->payload)) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "invalid msg size: %d\n", msg->size);
> >> + return -1;
> >> + }
> >> + ret = read(sockfd, &msg->payload, msg->size);
> >> + if (ret <= 0)
> >> + return ret;
> >> + if (ret != (int)msg->size) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "read control message failed\n");
> >> + return -1;
> >> + }
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +static int
> >> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> >> +{
> >> +
> >> + struct iovec iov;
> >> + struct msghdr msgh = { 0 };
> >> + size_t fdsize = fd_num * sizeof(int);
> >> + char control[CMSG_SPACE(fdsize)];
> >> + struct cmsghdr *cmsg;
> >> + int ret;
> >> +
> >> + iov.iov_base = buf;
> >> + iov.iov_len = buflen;
> >> +
> >> + msgh.msg_iov = &iov;
> >> + msgh.msg_iovlen = 1;
> >> +
> >> + if (fds && fd_num > 0) {
> >> + msgh.msg_control = control;
> >> + msgh.msg_controllen = sizeof(control);
> >> + cmsg = CMSG_FIRSTHDR(&msgh);
> >> + cmsg->cmsg_len = CMSG_LEN(fdsize);
> >> + cmsg->cmsg_level = SOL_SOCKET;
> >> + cmsg->cmsg_type = SCM_RIGHTS;
> >> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
> >> + } else {
> >> + msgh.msg_control = NULL;
> >> + msgh.msg_controllen = 0;
> >> + }
> >> +
> >> + do {
> >> + ret = sendmsg(sockfd, &msgh, 0);
> >> + } while (ret < 0 && errno == EINTR);
> >> +
> >> + if (ret < 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
> >> + return ret;
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +static int
> >> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> >> +{
> >> + int ret;
> >> +
> >> + if (!msg)
> >> + return 0;
> >> +
> >> + msg->flags &= ~VHOST_USER_VERSION_MASK;
> >> + msg->flags |= VHOST_USER_VERSION;
> >> + msg->flags |= VHOST_USER_REPLY_MASK;
> >> +
> >> + ret = send_fd_message(sockfd, (char *)msg,
> >> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +/* call back when there is new virtio connection. */
> >> +static void
> >> +vserver_new_vq_conn(int fd, void *dat)
> >> +{
> >> + struct vhost_server *vserver = (struct vhost_server *)dat;
> >> + int conn_fd;
> >> + int fh;
> >> + struct vhost_device_ctx vdev_ctx = { 0 };
> >> +
> >> + conn_fd = accept(fd, NULL, NULL);
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "new virtio connection is %d\n", conn_fd);
> >> + if (conn_fd < 0)
> >> + return;
> >> +
> >> + fh = ops->new_device(vdev_ctx);
> >> + if (fh == -1) {
> >> + close(conn_fd);
> >> + return;
> >> + }
> >> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> >> +
> >> + fdset_add(&vserver->fdset,
> >> + conn_fd, vserver_message_handler, NULL, (void *)fh);
> >> +}
> >> +
> >> +/* callback when there is message on the connfd */
> >> +static void
> >> +vserver_message_handler(int connfd, void *dat)
> >> +{
> >> + struct vhost_device_ctx ctx;
> >> + uint32_t fh = (uint32_t)dat;
> >> + struct VhostUserMsg msg;
> >> + uint64_t features;
> >> + int ret;
> >> +
> >> + ctx.fh = fh;
> >> + ret = read_vhost_message(connfd, &msg);
> >> + if (ret < 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "vhost read message failed\n");
> >> +
> >> + close(connfd);
> >> + fdset_del(&g_vhost_server->fdset, connfd);
> >> + ops->destroy_device(ctx);
> >> +
> >> + return;
> >> + } else if (ret == 0) {
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vhost peer closed\n");
> >> +
> >> + close(connfd);
> >> + fdset_del(&g_vhost_server->fdset, connfd);
> >> + ops->destroy_device(ctx);
> >> +
> >> + return;
> >> + }
> >> + if (msg.request > VHOST_USER_MAX) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "vhost read incorrect message\n");
> >> +
> >> + close(connfd);
> >> + fdset_del(&g_vhost_server->fdset, connfd);
> >> +
> >> + return;
> >> + }
> >> +
> >> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> >> + vhost_message_str[msg.request]);
> >> + switch (msg.request) {
> >> + case VHOST_USER_GET_FEATURES:
> >> + ret = ops->get_features(ctx, &features);
> >> + msg.payload.u64 = features;
> >> + msg.size = sizeof(msg.payload.u64);
> >> + send_vhost_message(connfd, &msg);
> >
> > What if this fails (e.g. remote died)?
> > How will everything be cleaned up?
> >
> >> + break;
> >> + case VHOST_USER_SET_FEATURES:
> >> + features = msg.payload.u64;
> >> + ops->set_features(ctx, &features);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_OWNER:
> >> + ops->set_owner(ctx);
> >> + break;
> >> + case VHOST_USER_RESET_OWNER:
> >> + ops->reset_owner(ctx);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_MEM_TABLE:
> >> + user_set_mem_table(ctx, &msg);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_LOG_BASE:
> >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> >> + case VHOST_USER_SET_LOG_FD:
> >> + close(msg.fds[0]);
> >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> >> + break;
> >> +
> >> + case VHOST_USER_SET_VRING_NUM:
> >> + ops->set_vring_num(ctx, &msg.payload.state);
> >> + break;
> >> + case VHOST_USER_SET_VRING_ADDR:
> >> + ops->set_vring_addr(ctx, &msg.payload.addr);
> >> + break;
> >> + case VHOST_USER_SET_VRING_BASE:
> >> + ops->set_vring_base(ctx, &msg.payload.state);
> >> + break;
> >> +
> >> + case VHOST_USER_GET_VRING_BASE:
> >> + ret = user_get_vring_base(ctx, &msg.payload.state);
> >> + msg.size = sizeof(msg.payload.state);
> >> + send_vhost_message(connfd, &msg);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_VRING_KICK:
> >> + user_set_vring_kick(ctx, &msg);
> >> + break;
> >> + case VHOST_USER_SET_VRING_CALL:
> >> + user_set_vring_call(ctx, &msg);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_VRING_ERR:
> >> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
> >> + close(msg.fds[0]);
> >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> >> + break;
> >> +
> >> + default:
> >> + break;
> >> +
> >> + }
> >> +}
> >> +
> >> +
> >> +/**
> >> + * Creates and initialise the vhost server.
> >> + */
> >> +int
> >> +rte_vhost_driver_register(const char *path)
> >> +{
> >> +
> >> + struct vhost_server *vserver;
> >> +
> >> + if (g_vhost_server != NULL)
> >> + return -1;
> >> +
> >> + vserver = calloc(sizeof(struct vhost_server), 1);
> >> + if (vserver == NULL)
> >> + return -1;
> >> +
> >> + fdset_init(&vserver->fdset);
> >> +
> >> + unlink(path);
> >> +
> >> + vserver->listenfd = uds_socket(path);
> >> + if (vserver->listenfd < 0) {
> >> + free(vserver);
> >> + return -1;
> >> + }
> >> + vserver->path = path;
> >> +
> >> + fdset_add(&vserver->fdset, vserver->listenfd,
> >> + vserver_new_vq_conn, NULL,
> >> + vserver);
> >> +
> >> + ops = get_virtio_net_callbacks();
> >> +
> >> + g_vhost_server = vserver;
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +
> >> +int
> >> +rte_vhost_driver_session_start(void)
> >> +{
> >> + fdset_event_dispatch(&g_vhost_server->fdset);
> >> + return 0;
> >> +}
> >> +
> >> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> >> new file mode 100644
> >> index 0000000..7e6cda4
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> >> @@ -0,0 +1,108 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#ifndef _VHOST_NET_USER_H
> >> +#define _VHOST_NET_USER_H
> >> +
> >> +#include <stdint.h>
> >> +#include <linux/vhost.h>
> >> +
> >> +#include "fd_man.h"
> >> +
> >> +struct vhost_server {
> >> + const char *path; /**< The path the uds is bind to. */
> >> + int listenfd; /**< The listener sockfd. */
> >> + struct fdset fdset; /**< The fd list this vhost server manages. */
> >> +};
> >> +
> >> +/* refer to hw/virtio/vhost-user.c */
> >> +
> >> +#define VHOST_MEMORY_MAX_NREGIONS 8
> >> +
> >> +typedef enum VhostUserRequest {
> >> + VHOST_USER_NONE = 0,
> >> + VHOST_USER_GET_FEATURES = 1,
> >> + VHOST_USER_SET_FEATURES = 2,
> >> + VHOST_USER_SET_OWNER = 3,
> >> + VHOST_USER_RESET_OWNER = 4,
> >> + VHOST_USER_SET_MEM_TABLE = 5,
> >> + VHOST_USER_SET_LOG_BASE = 6,
> >> + VHOST_USER_SET_LOG_FD = 7,
> >> + VHOST_USER_SET_VRING_NUM = 8,
> >> + VHOST_USER_SET_VRING_ADDR = 9,
> >> + VHOST_USER_SET_VRING_BASE = 10,
> >> + VHOST_USER_GET_VRING_BASE = 11,
> >> + VHOST_USER_SET_VRING_KICK = 12,
> >> + VHOST_USER_SET_VRING_CALL = 13,
> >> + VHOST_USER_SET_VRING_ERR = 14,
> >> + VHOST_USER_MAX
> >> +} VhostUserRequest;
> >> +
> >> +typedef struct VhostUserMemoryRegion {
> >> + uint64_t guest_phys_addr;
> >> + uint64_t memory_size;
> >> + uint64_t userspace_addr;
> >> + uint64_t mmap_offset;
> >> +} VhostUserMemoryRegion;
> >> +
> >> +typedef struct VhostUserMemory {
> >> + uint32_t nregions;
> >> + uint32_t padding;
> >> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> >> +} VhostUserMemory;
> >> +
> >> +typedef struct VhostUserMsg {
> >> + VhostUserRequest request;
> >> +
> >> +#define VHOST_USER_VERSION_MASK (0x3)
> >> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
> >> + uint32_t flags;
> >> + uint32_t size; /* the following payload size */
> >> + union {
> >> +#define VHOST_USER_VRING_IDX_MASK (0xff)
> >> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> >> + uint64_t u64;
> >> + struct vhost_vring_state state;
> >> + struct vhost_vring_addr addr;
> >> + VhostUserMemory memory;
> >> + } payload;
> >> + int fds[VHOST_MEMORY_MAX_NREGIONS];
> >> +} __attribute((packed)) VhostUserMsg;
> >> +
> >> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> >> +
> >> +/* The version of the protocol we support */
> >> +#define VHOST_USER_VERSION (0x1)
> >> +
> >> +/*****************************************************************************/
> >> +#endif
> >> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >> new file mode 100644
> >> index 0000000..6601fcd
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >> @@ -0,0 +1,205 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#include <stdint.h>
> >> +#include <stdio.h>
> >> +#include <stdlib.h>
> >> +#include <unistd.h>
> >> +#include <sys/mman.h>
> >> +
> >> +#include <rte_log.h>
> >> +
> >> +#include "virtio-net.h"
> >> +#include "virtio-net-user.h"
> >> +#include "vhost-net-user.h"
> >> +#include "vhost-net.h"
> >> +
> >> +int
> >> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >> +{
> >> + unsigned int idx;
> >> + struct VhostUserMemory memory = pmsg->payload.memory;
> >> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> >> + uint64_t mapped_address, base_address = 0;
> >> +
> >> + for (idx = 0; idx < memory.nregions; idx++) {
> >> + if (memory.regions[idx].guest_phys_addr == 0)
> >> + base_address = memory.regions[idx].userspace_addr;
> >> + }
> >> + if (base_address == 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "couldn't find the mem region whose GPA is 0.\n");
> >> + return -1;
> >> + }
> >> +
> >> + for (idx = 0; idx < memory.nregions; idx++) {
> >> + regions[idx].guest_phys_address =
> >> + memory.regions[idx].guest_phys_addr;
> >> + regions[idx].guest_phys_address_end =
> >> + memory.regions[idx].guest_phys_addr +
> >> + memory.regions[idx].memory_size;
> >> + regions[idx].memory_size = memory.regions[idx].memory_size;
> >> + regions[idx].userspace_address =
> >> + memory.regions[idx].userspace_addr;
> >> +
> >> + /* This is ugly */
> >> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> >> + regions[idx].memory_size +
> >> + memory.regions[idx].mmap_offset,
> >> + PROT_READ | PROT_WRITE, MAP_SHARED,
> >> + pmsg->fds[idx],
> >> + 0);
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "mapped region %d to %p\n",
> >> + idx, (void *)mapped_address);
> >> +
> >> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "mmap qemu guest failed.\n");
> >> + return -1;
> >> + }
> >> +
> >> + mapped_address += memory.regions[idx].mmap_offset;
> >> +
> >> + regions[idx].address_offset = mapped_address -
> >> + regions[idx].guest_phys_address;
> >> + LOG_DEBUG(VHOST_CONFIG,
> >> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> >> + idx,
> >> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> >> + (void *)(uintptr_t)regions[idx].userspace_address,
> >> + regions[idx].memory_size);
> >> + }
> >> + ops->set_mem_table(ctx, regions, memory.nregions);
> >> + return 0;
> >> +}
> >> +
> >> +
> >> +static int
> >> +virtio_is_ready(struct virtio_net *dev)
> >> +{
> >> + struct vhost_virtqueue *rvq, *tvq;
> >> +
> >> + /* mq support in future.*/
> >> + rvq = dev->virtqueue[VIRTIO_RXQ];
> >> + tvq = dev->virtqueue[VIRTIO_TXQ];
> >> + if (rvq && tvq && rvq->desc && tvq->desc &&
> >> + (rvq->kickfd != (eventfd_t)-1) &&
> >> + (rvq->callfd != (eventfd_t)-1) &&
> >> + (tvq->kickfd != (eventfd_t)-1) &&
> >> + (tvq->callfd != (eventfd_t)-1)) {
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "virtio is now ready for processing.\n");
> >> + return 1;
> >> + }
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "virtio isn't ready for processing.\n");
> >> + return 0;
> >> +}
> >> +
> >> +void
> >> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >> +{
> >> + struct vhost_vring_file file;
> >> +
> >> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> >> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> >> + file.fd = -1;
> >> + else
> >> + file.fd = pmsg->fds[0];
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vring call idx:%d file:%d\n", file.index, file.fd);
> >> + ops->set_vring_call(ctx, &file);
> >> +}
> >> +
> >> +
> >> +/*
> >> + * In vhost-user, when we receive kick message, will test whether virtio
> >> + * device is ready for packet processing.
> >> + */
> >> +void
> >> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >> +{
> >> + struct vhost_vring_file file;
> >> + struct virtio_net *dev = get_device(ctx);
> >> +
> >> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> >> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> >> + file.fd = -1;
> >> + else
> >> + file.fd = pmsg->fds[0];
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vring kick idx:%d file:%d\n", file.index, file.fd);
> >> + ops->set_vring_kick(ctx, &file);
> >> +
> >> + if (virtio_is_ready(dev) &&
> >> + !(dev->flags & VIRTIO_DEV_RUNNING))
> >> + notify_ops->new_device(dev);
> >> +
> >> +}
> >> +
> >> +/*
> >> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
> >> + */
> >> +int
> >> +user_get_vring_base(struct vhost_device_ctx ctx,
> >> + struct vhost_vring_state *state)
> >> +{
> >> + struct virtio_net *dev = get_device(ctx);
> >> +
> >> + /* We have to stop the queue (virtio) if it is running. */
> >> + if (dev->flags & VIRTIO_DEV_RUNNING)
> >> + notify_ops->destroy_device(dev);
> >> +
> >> + /* Here we are safe to get the last used index */
> >> + ops->get_vring_base(ctx, state->index, state);
> >> +
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vring base idx:%d file:%d\n", state->index, state->num);
> >> + /*
> >> + * Based on current qemu vhost-user implementation, this message is
> >> + * sent and only sent in vhost_vring_stop.
> >> + * TODO: cleanup the vring, it isn't usable since here.
> >> + */
> >
> > Please don't tie yourself to a current qemu implementation. Please just
> > extend qemu to send explicit start/stop messages.
> > You'll need to negotiate the new capabilities.
> >
> >
> > Nikolay, it seems that version field is only 2 bits.
> > how can we extend it cleanly?
>
> Will something like this do:
>
> #define VHOST_USER_VERSION_MASK_MAJ (0x3)
> #define VHOST_USER_REPLY_MASK (0x1<<2)
> #define VHOST_USER_VERSION_MASK_MIN (0xf<<3)
>
> The "major" part of the version will be increased in case of
> significant changes in the protocol. And the "minor" part in all other
> cases. I guess this will give us enough space for versioning.
>
> regards,
> Nikolay Nikolaev
I think it's not enough
1. Which message do we use to pass it?
2. It's preferable to have something like feature-bits
and not version numbers. Much easier to avoid
conflicts.
> >
> > Perhaps, add a new GET_POTOCOL message for exchanging vhost user
> > specific bits, then remote should set a high version bit to let qemu
> > know it's supported?
> >
> >
> >
> >
> >> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
> >> + close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> >> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> >> + }
> >> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
> >> + close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> >> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> >> + }
> >> +
> >> + return 0;
> >> +
> >> +}
> >> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> >> new file mode 100644
> >> index 0000000..0f6a75a
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> >> @@ -0,0 +1,48 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#ifndef _VIRTIO_NET_USER_H
> >> +#define _VIRTIO_NET_USER_H
> >> +
> >> +#include "vhost-net.h"
> >> +#include "vhost-net-user.h"
> >> +
> >> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> >> +
> >> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> >> +
> >> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> >> +
> >> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> >> +
> >> +#endif
> >> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> >> index 57a5801..c458ed9 100644
> >> --- a/lib/librte_vhost/virtio-net.c
> >> +++ b/lib/librte_vhost/virtio-net.c
> >> @@ -50,6 +50,7 @@
> >> #include <rte_virtio_net.h>
> >>
> >> #include "vhost-net.h"
> >> +#include "virtio-net.h"
> >>
> >> /*
> >> * Device linked list structure for configuration.
> >> @@ -60,7 +61,7 @@ struct virtio_net_config_ll {
> >> };
> >>
> >> /* device ops to add/remove device to/from data core. */
> >> -static struct virtio_net_device_ops const *notify_ops;
> >> +struct virtio_net_device_ops const *notify_ops;
> >> /* root address of the linked list of managed virtio devices */
> >> static struct virtio_net_config_ll *ll_root;
> >>
> >> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
> >> if ((qemu_va >= region->userspace_address) &&
> >> (qemu_va <= region->userspace_address +
> >> region->memory_size)) {
> >> - vhost_va = dev->mem->mapped_address + qemu_va -
> >> - dev->mem->base_address;
> >> + vhost_va = qemu_va + region->guest_phys_address +
> >> + region->address_offset -
> >> + region->userspace_address;
> >> break;
> >> }
> >> }
> >> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
> >> * Searches the configuration core linked list and
> >> * retrieves the device if it exists.
> >> */
> >> -static struct virtio_net *
> >> +struct virtio_net *
> >> get_device(struct vhost_device_ctx ctx)
> >> {
> >> struct virtio_net_config_ll *ll_dev;
> >> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
> >> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> >> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
> >>
> >> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> >> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> >> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> >> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> >> +
> >> /* Backends are set to -1 indicating an inactive device. */
> >> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> >> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> >> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
> >> if (dev == NULL)
> >> return -1;
> >>
> >> - if (dev->mem) {
> >> - munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >> - (size_t)dev->mem->mapped_size);
> >> - free(dev->mem);
> >> - }
> >> -
> >> /* Malloc the memory structure depending on the number of regions. */
> >> mem = calloc(1, sizeof(struct virtio_memory) +
> >> (sizeof(struct virtio_memory_regions) * nregions));
> >> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >> vq = dev->virtqueue[file->index];
> >>
> >> - if (vq->kickfd)
> >> + if ((int)vq->kickfd >= 0)
> >> close((int)vq->kickfd);
> >>
> >> vq->kickfd = file->fd;
> >> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >> vq = dev->virtqueue[file->index];
> >>
> >> - if (vq->callfd)
> >> + if ((int)vq->callfd >= 0)
> >> close((int)vq->callfd);
> >> +
> >> vq->callfd = file->fd;
> >>
> >> return 0;
> >> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
> >> new file mode 100644
> >> index 0000000..75fb57e
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/virtio-net.h
> >> @@ -0,0 +1,43 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#ifndef _VIRTIO_NET_H
> >> +#define _VIRTIO_NET_H
> >> +
> >> +#include "vhost-net.h"
> >> +#include "rte_virtio_net.h"
> >> +
> >> +struct virtio_net_device_ops const *notify_ops;
> >> +struct virtio_net *get_device(struct vhost_device_ctx ctx);
> >> +
> >> +#endif
> >> --
> >> 1.8.1.4
> >>
^ permalink raw reply [flat|nested] 21+ messages in thread
* [RFC PATCH v2 11/14] vhost user memory region map
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (9 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 10/14] vhost user support Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 12/14] cleanup when vhost user connection is closed Huawei Xie
` (3 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/rte_virtio_net.h | 2 +
lib/librte_vhost/vhost-net.h | 2 -
lib/librte_vhost/vhost_user/vhost-net-user.h | 3 +-
lib/librte_vhost/vhost_user/virtio-net-user.c | 109 ++++++++++++++++++++++++--
4 files changed, 104 insertions(+), 12 deletions(-)
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 0bf07c7..46c2072 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -50,6 +50,8 @@
#include <rte_mempool.h>
#include <rte_mbuf.h>
+#define VHOST_MEMORY_MAX_NREGIONS 8
+
/* Used to indicate that the device is running on a data core */
#define VIRTIO_DEV_RUNNING 1
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 3f18f25..94b359f 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -43,8 +43,6 @@
#include "rte_virtio_net.h"
-#define VHOST_MEMORY_MAX_NREGIONS 8
-
extern struct vhost_net_device_ops const *ops;
/* Macros for printing using RTE_LOG */
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 7e6cda4..91e8fc3 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -37,6 +37,7 @@
#include <stdint.h>
#include <linux/vhost.h>
+#include "rte_virtio_net.h"
#include "fd_man.h"
struct vhost_server {
@@ -47,8 +48,6 @@ struct vhost_server {
/* refer to hw/virtio/vhost-user.c */
-#define VHOST_MEMORY_MAX_NREGIONS 8
-
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
VHOST_USER_GET_FEATURES = 1,
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 6601fcd..3481eb8 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -36,7 +36,11 @@
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <rte_common.h>
#include <rte_log.h>
#include "virtio-net.h"
@@ -44,13 +48,59 @@
#include "vhost-net-user.h"
#include "vhost-net.h"
+struct orig_region_map {
+ int fd;
+ uint64_t mapped_address;
+ uint64_t mapped_size;
+ uint64_t blksz;
+};
+
+#define orig_region(ptr, nregions) ((struct orig_region_map *)RTE_PTR_ADD(ptr, \
+ sizeof(struct virtio_memory) + \
+ sizeof(struct virtio_memory_regions) * (nregions)))
+
+static uint64_t
+get_blk_size(int fd)
+{
+ struct stat stat;
+
+ fstat(fd, &stat);
+ return (uint64_t)stat.st_blksize;
+}
+
int
user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
{
- unsigned int idx;
struct VhostUserMemory memory = pmsg->payload.memory;
struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
- uint64_t mapped_address, base_address = 0;
+ uint64_t mapped_address, mapped_size, base_address = 0;
+ struct virtio_net *dev;
+ unsigned int idx = 0;
+ struct orig_region_map tmp[VHOST_MEMORY_MAX_NREGIONS] = {
+ [0 ... VHOST_MEMORY_MAX_NREGIONS - 1] = { 0 } };
+ struct orig_region_map *region;
+ uint64_t alignment;
+ int ret;
+
+ /* unmap old memory regions one by one*/
+ dev = get_device(ctx);
+ if (dev->mem) {
+ region = orig_region(dev->mem, dev->mem->nregions);
+ for (idx = 0; idx < dev->mem->nregions; idx++) {
+ if (region[idx].mapped_address) {
+ alignment = region[idx].blksz;
+ printf("Freeing %p\n",
+ (void *)(uintptr_t)region[idx].mapped_address);
+ ret = munmap((void *)RTE_ALIGN_FLOOR(region[idx].mapped_address, alignment),
+ RTE_ALIGN_CEIL(region[idx].mapped_size, alignment));
+ printf("munmap ret= %d\n", ret);
+ printf("close file %d\n", region[idx].fd);
+ close(region[idx].fd);
+ }
+ }
+ free(dev->mem);
+ dev->mem = NULL;
+ }
for (idx = 0; idx < memory.nregions; idx++) {
if (memory.regions[idx].guest_phys_addr == 0)
@@ -73,22 +123,30 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
memory.regions[idx].userspace_addr;
/* This is ugly */
+ mapped_size = regions[idx].memory_size +
+ memory.regions[idx].mmap_offset;
mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
- regions[idx].memory_size +
- memory.regions[idx].mmap_offset,
+ mapped_size,
PROT_READ | PROT_WRITE, MAP_SHARED,
pmsg->fds[idx],
0);
+
RTE_LOG(INFO, VHOST_CONFIG,
- "mapped region %d to %p\n",
- idx, (void *)mapped_address);
+ "mapped region %d fd:%d to %p sz:0x%"PRIx64" off:0x%"PRIx64"\n",
+ idx, pmsg->fds[idx], (void *)mapped_address,
+ mapped_size, memory.regions[idx].mmap_offset);
if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
RTE_LOG(ERR, VHOST_CONFIG,
"mmap qemu guest failed.\n");
- return -1;
+ goto err;
}
+ tmp[idx].mapped_address = mapped_address;
+ tmp[idx].mapped_size = mapped_size;
+ tmp[idx].blksz = get_blk_size(pmsg->fds[idx]);
+ tmp[idx].fd = pmsg->fds[idx];
+
mapped_address += memory.regions[idx].mmap_offset;
regions[idx].address_offset = mapped_address -
@@ -100,10 +158,45 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
(void *)(uintptr_t)regions[idx].userspace_address,
regions[idx].memory_size);
}
+
ops->set_mem_table(ctx, regions, memory.nregions);
+
+ if (dev->mem) {
+ void *tmp_mem;
+ tmp_mem = realloc(dev->mem,
+ sizeof(struct virtio_memory) +
+ sizeof(struct virtio_memory_regions) * memory.nregions +
+ sizeof(struct orig_region_map) * memory.nregions);
+ if (tmp_mem == NULL)
+ goto err_realloc;
+
+ dev->mem = tmp_mem;
+ region = orig_region(dev->mem, memory.nregions);
+ for (idx = 0; idx < memory.nregions; idx++) {
+ region[idx].mapped_address = tmp[idx].mapped_address;
+ region[idx].mapped_size = tmp[idx].mapped_size;
+ region[idx].blksz = tmp[idx].blksz;
+ region[idx].fd = tmp[idx].fd;
+ }
+ } else
+ goto err_set_mem_table;
+
return 0;
-}
+err_realloc:
+ free(dev->mem);
+err_set_mem_table:
+err:
+ while (idx--) {
+ alignment = tmp[idx].blksz;
+ munmap((void *)RTE_ALIGN_FLOOR(
+ tmp[idx].mapped_address, alignment),
+ RTE_ALIGN_CEIL(tmp[idx].mapped_size, alignment));
+ close(tmp[idx].fd);
+ }
+ dev->mem = NULL;
+ return -1;
+}
static int
virtio_is_ready(struct virtio_net *dev)
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 12/14] cleanup when vhost user connection is closed
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (10 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 11/14] vhost user memory region map Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 13/14] multiple socket support Huawei Xie
` (2 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_user/vhost-net-user.c | 4 ++
lib/librte_vhost/vhost_user/virtio-net-user.c | 56 +++++++++++++++++++--------
lib/librte_vhost/vhost_user/virtio-net-user.h | 1 +
3 files changed, 45 insertions(+), 16 deletions(-)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index c84fd3b..71e5bbd 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -289,6 +289,7 @@ vserver_message_handler(int connfd, void *dat)
close(connfd);
fdset_del(&g_vhost_server->fdset, connfd);
+ user_destroy_device(ctx);
ops->destroy_device(ctx);
return;
@@ -298,6 +299,7 @@ vserver_message_handler(int connfd, void *dat)
close(connfd);
fdset_del(&g_vhost_server->fdset, connfd);
+ user_destroy_device(ctx);
ops->destroy_device(ctx);
return;
@@ -308,6 +310,8 @@ vserver_message_handler(int connfd, void *dat)
close(connfd);
fdset_del(&g_vhost_server->fdset, connfd);
+ user_destroy_device(ctx);
+ ops->destroy_device(ctx);
return;
}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 3481eb8..8e6d580 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -68,6 +68,30 @@ get_blk_size(int fd)
return (uint64_t)stat.st_blksize;
}
+static void
+free_mem_region(struct virtio_net *dev)
+{
+ struct orig_region_map *region;
+ unsigned int idx;
+ uint64_t alignment;
+
+ if (!dev || !dev->mem)
+ return;
+
+ region = orig_region(dev->mem, dev->mem->nregions);
+ for (idx = 0; idx < dev->mem->nregions; idx++) {
+ if (region[idx].mapped_address) {
+ alignment = region[idx].blksz;
+ munmap((void *)
+ RTE_ALIGN_FLOOR(
+ region[idx].mapped_address, alignment),
+ RTE_ALIGN_CEIL(
+ region[idx].mapped_size, alignment));
+ close(region[idx].fd);
+ }
+ }
+}
+
int
user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
{
@@ -80,24 +104,11 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
[0 ... VHOST_MEMORY_MAX_NREGIONS - 1] = { 0 } };
struct orig_region_map *region;
uint64_t alignment;
- int ret;
/* unmap old memory regions one by one*/
dev = get_device(ctx);
- if (dev->mem) {
- region = orig_region(dev->mem, dev->mem->nregions);
- for (idx = 0; idx < dev->mem->nregions; idx++) {
- if (region[idx].mapped_address) {
- alignment = region[idx].blksz;
- printf("Freeing %p\n",
- (void *)(uintptr_t)region[idx].mapped_address);
- ret = munmap((void *)RTE_ALIGN_FLOOR(region[idx].mapped_address, alignment),
- RTE_ALIGN_CEIL(region[idx].mapped_size, alignment));
- printf("munmap ret= %d\n", ret);
- printf("close file %d\n", region[idx].fd);
- close(region[idx].fd);
- }
- }
+ if (dev && dev->mem) {
+ free_mem_region(dev);
free(dev->mem);
dev->mem = NULL;
}
@@ -258,7 +269,6 @@ user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
if (virtio_is_ready(dev) &&
!(dev->flags & VIRTIO_DEV_RUNNING))
notify_ops->new_device(dev);
-
}
/*
@@ -294,5 +304,19 @@ user_get_vring_base(struct vhost_device_ctx ctx,
}
return 0;
+}
+
+void
+user_destroy_device(struct vhost_device_ctx ctx)
+{
+ struct virtio_net *dev = get_device(ctx);
+ if (dev && (dev->flags & VIRTIO_DEV_RUNNING))
+ notify_ops->destroy_device(dev);
+
+ if (dev && dev->mem) {
+ free_mem_region(dev);
+ free(dev->mem);
+ dev->mem = NULL;
+ }
}
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
index 0f6a75a..df24860 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.h
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
@@ -45,4 +45,5 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
+void user_destroy_device(struct vhost_device_ctx);
#endif
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 13/14] multiple socket support
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (11 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 12/14] cleanup when vhost user connection is closed Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-01-26 3:20 ` [RFC PATCH v2 14/14] vhost user ifr_name support Huawei Xie
2015-02-09 7:52 ` [RFC PATCH v2 00/14] qemu vhost-user support Linhaifeng
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/vhost_user/vhost-net-user.c | 57 +++++++++++++++++++---------
lib/librte_vhost/vhost_user/vhost-net-user.h | 1 -
2 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 71e5bbd..3a45a5e 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -54,7 +54,18 @@ static void vserver_new_vq_conn(int fd, void *data);
static void vserver_message_handler(int fd, void *dat);
struct vhost_net_device_ops const *ops;
-static struct vhost_server *g_vhost_server;
+struct connfd_ctx {
+ struct vhost_server *vserver;
+ uint32_t fh;
+};
+
+#define MAX_VHOST_SERVER 1024
+static struct {
+ struct vhost_server *server[MAX_VHOST_SERVER];
+ struct fdset fdset; /**< The fd list this vhost server manages. */
+} g_vhost_server;
+
+static int vserver_idx;
static const char *vhost_message_str[VHOST_USER_MAX] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
@@ -251,6 +262,7 @@ vserver_new_vq_conn(int fd, void *dat)
{
struct vhost_server *vserver = (struct vhost_server *)dat;
int conn_fd;
+ struct connfd_ctx *ctx;
int fh;
struct vhost_device_ctx vdev_ctx = { 0 };
@@ -260,15 +272,24 @@ vserver_new_vq_conn(int fd, void *dat)
if (conn_fd < 0)
return;
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx == NULL) {
+ close(conn_fd);
+ return;
+ }
+
fh = ops->new_device(vdev_ctx);
if (fh == -1) {
+ free(ctx);
close(conn_fd);
return;
}
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
- fdset_add(&vserver->fdset,
- conn_fd, vserver_message_handler, NULL, (void *)fh);
+ ctx->vserver = vserver;
+ ctx->fh = fh;
+ fdset_add(&g_vhost_server.fdset,
+ conn_fd, vserver_message_handler, NULL, ctx);
}
/* callback when there is message on the connfd */
@@ -276,19 +297,20 @@ static void
vserver_message_handler(int connfd, void *dat)
{
struct vhost_device_ctx ctx;
- uint32_t fh = (uint32_t)dat;
+ struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat;
struct VhostUserMsg msg;
uint64_t features;
int ret;
- ctx.fh = fh;
+ ctx.fh = cfd_ctx->fh;
ret = read_vhost_message(connfd, &msg);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"vhost read message failed\n");
close(connfd);
- fdset_del(&g_vhost_server->fdset, connfd);
+ fdset_del(&g_vhost_server.fdset, connfd);
+ free(cfd_ctx);
user_destroy_device(ctx);
ops->destroy_device(ctx);
@@ -298,7 +320,8 @@ vserver_message_handler(int connfd, void *dat)
"vhost peer closed\n");
close(connfd);
- fdset_del(&g_vhost_server->fdset, connfd);
+ fdset_del(&g_vhost_server.fdset, connfd);
+ free(cfd_ctx);
user_destroy_device(ctx);
ops->destroy_device(ctx);
@@ -309,7 +332,8 @@ vserver_message_handler(int connfd, void *dat)
"vhost read incorrect message\n");
close(connfd);
- fdset_del(&g_vhost_server->fdset, connfd);
+ fdset_del(&g_vhost_server.fdset, connfd);
+ free(cfd_ctx);
user_destroy_device(ctx);
ops->destroy_device(ctx);
@@ -390,18 +414,19 @@ vserver_message_handler(int connfd, void *dat)
int
rte_vhost_driver_register(const char *path)
{
-
struct vhost_server *vserver;
- if (g_vhost_server != NULL)
+ if (vserver_idx == 0) {
+ fdset_init(&g_vhost_server.fdset);
+ ops = get_virtio_net_callbacks();
+ }
+ if (vserver_idx == MAX_VHOST_SERVER)
return -1;
vserver = calloc(sizeof(struct vhost_server), 1);
if (vserver == NULL)
return -1;
- fdset_init(&vserver->fdset);
-
unlink(path);
vserver->listenfd = uds_socket(path);
@@ -411,13 +436,11 @@ rte_vhost_driver_register(const char *path)
}
vserver->path = path;
- fdset_add(&vserver->fdset, vserver->listenfd,
+ fdset_add(&g_vhost_server.fdset, vserver->listenfd,
vserver_new_vq_conn, NULL,
vserver);
- ops = get_virtio_net_callbacks();
-
- g_vhost_server = vserver;
+ g_vhost_server.server[vserver_idx++] = vserver;
return 0;
}
@@ -426,7 +449,7 @@ rte_vhost_driver_register(const char *path)
int
rte_vhost_driver_session_start(void)
{
- fdset_event_dispatch(&g_vhost_server->fdset);
+ fdset_event_dispatch(&g_vhost_server.fdset);
return 0;
}
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
index 91e8fc3..e2a91a9 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.h
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
@@ -43,7 +43,6 @@
struct vhost_server {
const char *path; /**< The path the uds is bind to. */
int listenfd; /**< The listener sockfd. */
- struct fdset fdset; /**< The fd list this vhost server manages. */
};
/* refer to hw/virtio/vhost-user.c */
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* [RFC PATCH v2 14/14] vhost user ifr_name support
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (12 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 13/14] multiple socket support Huawei Xie
@ 2015-01-26 3:20 ` Huawei Xie
2015-02-09 7:52 ` [RFC PATCH v2 00/14] qemu vhost-user support Linhaifeng
14 siblings, 0 replies; 21+ messages in thread
From: Huawei Xie @ 2015-01-26 3:20 UTC (permalink / raw)
To: dev-VfR2kkLFssw
Signed-off-by: Huawei Xie <huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
lib/librte_vhost/Makefile | 2 +-
lib/librte_vhost/rte_virtio_net.h | 3 +-
lib/librte_vhost/vhost-net.h | 3 +
lib/librte_vhost/vhost_cuse/eventfd_copy.c | 89 +++++++++++++++++++++++++++
lib/librte_vhost/vhost_cuse/eventfd_copy.h | 40 ++++++++++++
lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 51 ++-------------
lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 53 ++++++++++++++++
lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 3 +
lib/librte_vhost/vhost_user/vhost-net-user.c | 7 +++
lib/librte_vhost/virtio-net.c | 63 ++++++-------------
10 files changed, 223 insertions(+), 91 deletions(-)
create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c
create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h
diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index b2f14a0..c9017d5 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -37,7 +37,7 @@ LIB = librte_vhost.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
LDFLAGS += -lfuse
# all source are stored in SRCS-y
-#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
+#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c virtio-net.c vhost_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
# install includes
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 46c2072..611a3d4 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -100,7 +100,8 @@ struct virtio_net {
uint64_t features; /**< Negotiated feature set. */
uint64_t device_fh; /**< device identifier. */
uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */
- char ifname[IFNAMSIZ]; /**< Name of the tap device. */
+#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+ char ifname[IF_NAME_SZ]; /**< Name of the tap device or socket path. */
void *priv; /**< private context */
} __rte_cache_aligned;
diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 94b359f..d125a05 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -93,6 +93,9 @@ struct vhost_net_device_ops {
int (*new_device)(struct vhost_device_ctx);
void (*destroy_device)(struct vhost_device_ctx);
+ void (*set_ifname)(struct vhost_device_ctx,
+ const char *if_name, unsigned int if_len);
+
int (*get_features)(struct vhost_device_ctx, uint64_t *);
int (*set_features)(struct vhost_device_ctx, uint64_t *);
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_vhost/vhost_cuse/eventfd_copy.c
new file mode 100644
index 0000000..f2ed04e
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.c
@@ -0,0 +1,89 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <rte_log.h>
+
+#include "eventfd_link/eventfd_link.h"
+#include "eventfd_copy.h"
+#include "vhost-net.h"
+
+static const char eventfd_cdev[] = "/dev/eventfd-link";
+
+/*
+ * This function uses the eventfd_link kernel module to copy an eventfd file
+ * descriptor provided by QEMU in to our process space.
+ */
+int
+eventfd_copy(int target_fd, int target_pid)
+{
+ int eventfd_link, ret;
+ struct eventfd_copy eventfd_copy;
+ int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+
+ if (fd == -1)
+ return -1;
+
+ /* Open the character device to the kernel module. */
+ /* TODO: check this earlier rather than fail until VM boots! */
+ eventfd_link = open(eventfd_cdev, O_RDWR);
+ if (eventfd_link < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "eventfd_link module is not loaded\n");
+ close(fd);
+ return -1;
+ }
+
+ eventfd_copy.source_fd = fd;
+ eventfd_copy.target_fd = target_fd;
+ eventfd_copy.target_pid = target_pid;
+ /* Call the IOCTL to copy the eventfd. */
+ ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
+ close(eventfd_link);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "EVENTFD_COPY ioctl failed\n");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_vhost/vhost_cuse/eventfd_copy.h
new file mode 100644
index 0000000..5f7307c
--- /dev/null
+++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.h
@@ -0,0 +1,40 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVENTFD_H
+#define _EVENTFD_H
+
+int
+eventfd_copy(int target_fd, int target_pid);
+
+#endif
+
diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
index 2ddd6e0..228da43 100644
--- a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
@@ -38,8 +38,6 @@
#include <stdint.h>
#include <string.h>
#include <unistd.h>
-#include <sys/eventfd.h>
-#include <sys/ioctl.h>
#include <rte_ethdev.h>
#include <rte_log.h>
@@ -48,7 +46,7 @@
#include "virtio-net-cdev.h"
#include "vhost-net.h"
-#include "eventfd_link/eventfd_link.h"
+#include "eventfd_copy.h"
#define FUSE_OPT_DUMMY "\0\0"
#define FUSE_OPT_FORE "-f\0\0"
@@ -58,7 +56,7 @@ static const uint32_t default_major = 231;
static const uint32_t default_minor = 1;
static const char cuse_device_name[] = "/dev/cuse";
static const char default_cdev[] = "vhost-net";
-static const char eventfd_cdev[] = "/dev/eventfd-link";
+
static struct fuse_session *session;
struct vhost_net_device_ops const *ops;
@@ -178,47 +176,6 @@ vhost_net_release(fuse_req_t req, struct fuse_file_info *fi)
} while (0)
/*
- * This function uses the eventfd_link kernel module to copy an eventfd file
- * descriptor provided by QEMU in to our process space.
- */
-static int
-eventfd_copy(int target_fd, int target_pid)
-{
- int eventfd_link, ret;
- struct eventfd_copy eventfd_copy;
- int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
-
- if (fd == -1)
- return -1;
-
- /* Open the character device to the kernel module. */
- /* TODO: check this earlier rather than fail until VM boots! */
- eventfd_link = open(eventfd_cdev, O_RDWR);
- if (eventfd_link < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "eventfd_link module is not loaded\n");
- close(fd);
- return -1;
- }
-
- eventfd_copy.source_fd = fd;
- eventfd_copy.target_fd = target_fd;
- eventfd_copy.target_pid = target_pid;
- /* Call the IOCTL to copy the eventfd. */
- ret = ioctl(eventfd_link, EVENTFD_COPY, &eventfd_copy);
- close(eventfd_link);
-
- if (ret < 0) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "EVENTFD_COPY ioctl failed\n");
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
-/*
* The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type
* of IOCTL a buffer is requested to read or to write. This request is handled
* by FUSE and the buffer is then given to CUSE.
@@ -240,7 +197,9 @@ vhost_net_ioctl(fuse_req_t req, int cmd, void *arg,
case VHOST_NET_SET_BACKEND:
LOG_DEBUG(VHOST_CONFIG,
"(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh);
- VHOST_IOCTL_R(struct vhost_vring_file, file, ops->set_backend);
+ file = *(const struct vhost_vring_file *)in_buf;
+ result = cuse_set_backend(ctx, &file);
+ fuse_reply_ioctl(req, result, NULL, 0);
break;
case VHOST_GET_FEATURES:
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index 1d2c403..6f7f4da 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -43,6 +43,10 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
#include <errno.h>
#include <rte_log.h>
@@ -51,6 +55,7 @@
#include "vhost-net.h"
#include "virtio-net-cdev.h"
#include "virtio-net.h"
+#include "eventfd_copy.h"
/* Line size for reading maps file. */
static const uint32_t BUFSIZE = PATH_MAX;
@@ -346,3 +351,51 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
ops->set_mem_table(ctx, ®ions[0], valid_regions);
return 0;
}
+
+/*
+ * Function to get the tap device name from the provided file descriptor and
+ * save it in the device structure.
+ */
+static int
+get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int pid)
+{
+ int fd_tap;
+ struct ifreq ifr;
+ uint32_t ifr_size;
+ int ret;
+
+ fd_tap = eventfd_copy(tap_fd, pid);
+ if (fd_tap < 0)
+ return -1;
+
+ ret = ioctl(fd_tap, TUNGETIFF, &ifr);
+
+ if (close(fd_tap) < 0)
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") fd close failed\n",
+ dev->device_fh);
+
+ if (ret >= 0) {
+ ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
+ ops->set_ifname(ctx, ifr.ifr_name, ifr_size);
+ } else
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") TUNGETIFF ioctl failed\n",
+ dev->device_fh);
+
+ return 0;
+}
+
+int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
+{
+ struct virtio_net *dev;
+
+ dev = get_device(ctx);
+ if (dev == NULL)
+ return -1;
+
+ if (!(dev->flags & VIRTIO_DEV_RUNNING))
+ get_ifname(ctx, dev, file->fd, ctx.pid);
+
+ return ops->set_backend(ctx, file);
+}
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
index 5ee81b1..eb6b0ba 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
@@ -42,4 +42,7 @@ int
cuse_set_mem_table(struct vhost_device_ctx ctx,
const struct vhost_memory *mem_regions_addr, uint32_t nregions);
+int
+cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *);
+
#endif
diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 3a45a5e..38fef83 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -265,6 +265,7 @@ vserver_new_vq_conn(int fd, void *dat)
struct connfd_ctx *ctx;
int fh;
struct vhost_device_ctx vdev_ctx = { 0 };
+ unsigned int size;
conn_fd = accept(fd, NULL, NULL);
RTE_LOG(INFO, VHOST_CONFIG,
@@ -284,6 +285,12 @@ vserver_new_vq_conn(int fd, void *dat)
close(conn_fd);
return;
}
+
+ vdev_ctx.fh = fh;
+ size = strnlen(vserver->path, PATH_MAX);
+ ops->set_ifname(vdev_ctx, vserver->path,
+ size);
+
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
ctx->vserver = vserver;
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index c458ed9..c45bccc 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -40,8 +40,6 @@
#include <unistd.h>
#include <sys/socket.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
#include <rte_ethdev.h>
#include <rte_log.h>
@@ -354,6 +352,24 @@ destroy_device(struct vhost_device_ctx ctx)
}
}
+static void
+set_ifname(struct vhost_device_ctx ctx,
+ const char *if_name, unsigned int if_len)
+{
+ struct virtio_net *dev;
+ unsigned int len;
+
+ dev = get_device(ctx);
+ if (dev == NULL)
+ return;
+
+ len = if_len > sizeof(dev->ifname) ?
+ sizeof(dev->ifname) : if_len;
+
+ strncpy(dev->ifname, if_name, len);
+}
+
+
/*
* Called from CUSE IOCTL: VHOST_SET_OWNER
* This function just returns success at the moment unless
@@ -660,46 +676,6 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
}
/*
- * Function to get the tap device name from the provided file descriptor and
- * save it in the device structure.
- */
-static int
-get_ifname(struct virtio_net *dev, int tap_fd, int pid)
-{
- struct eventfd_copy fd_tap;
- struct ifreq ifr;
- uint32_t size, ifr_size;
- int ret;
-
- fd_tap.source_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
- fd_tap.target_fd = tap_fd;
- fd_tap.target_pid = pid;
-
- if (eventfd_copy(dev, &fd_tap))
- return -1;
-
- ret = ioctl(fd_tap.source_fd, TUNGETIFF, &ifr);
-
- if (close(fd_tap.source_fd) < 0)
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") fd close failed\n",
- dev->device_fh);
-
- if (ret >= 0) {
- ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name));
- size = ifr_size > sizeof(dev->ifname) ?
- sizeof(dev->ifname) : ifr_size;
-
- strncpy(dev->ifname, ifr.ifr_name, size);
- } else
- RTE_LOG(ERR, VHOST_CONFIG,
- "(%"PRIu64") TUNGETIFF ioctl failed\n",
- dev->device_fh);
-
- return 0;
-}
-
-/*
* Called from CUSE IOCTL: VHOST_NET_SET_BACKEND
* To complete device initialisation when the virtio driver is loaded,
* we are provided with a valid fd for a tap device (not used by us).
@@ -727,7 +703,6 @@ set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
if (!(dev->flags & VIRTIO_DEV_RUNNING)) {
if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) &&
((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED)) {
- get_ifname(dev, file->fd, ctx.pid);
return notify_ops->new_device(dev);
}
/* Otherwise we remove it. */
@@ -745,6 +720,8 @@ static const struct vhost_net_device_ops vhost_device_ops = {
.new_device = new_device,
.destroy_device = destroy_device,
+ .set_ifname = set_ifname,
+
.get_features = get_features,
.set_features = set_features,
--
1.8.1.4
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [RFC PATCH v2 00/14] qemu vhost-user support
[not found] ` <1422242440-28948-1-git-send-email-huawei.xie-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
` (13 preceding siblings ...)
2015-01-26 3:20 ` [RFC PATCH v2 14/14] vhost user ifr_name support Huawei Xie
@ 2015-02-09 7:52 ` Linhaifeng
14 siblings, 0 replies; 21+ messages in thread
From: Linhaifeng @ 2015-02-09 7:52 UTC (permalink / raw)
To: Huawei Xie, dev-VfR2kkLFssw
Hi, Xie
Is librte_vhost support openvswitch?
How to attach the vhost_device_ctx to the port of openvswitch?
On 2015/1/26 11:20, Huawei Xie wrote:
> v2 changes:
> make fdset num field reflect the current number of fds vhost server manages
> allocate context for connected fd in vserver_new_vq_conn
> enable multiple socket support
> get_feature fix: apply Tetsuya's comment
> set_feature fix
> close received log fd, err fd: apply Haifeng's comment
> CTRL_VQ fix
> set ifname to unix domain socket path
> change the context type from uint64_t to void * in event management
> other code rework
>
> Huawei Xie (14):
> turn on VIRTIO_NET_F_CTRL_RX is dependant on VIRTIO_NET_F_CTRL_VQ.
> create vhost_cuse directory
> rename vhost-net-cdev.h to vhost-net.h
> consistent print style
> implement the eventfd copying(from fd in qemu process to fd in vhost process) into vhost-net-cdev.c
> copy host_memory_map from virtio-net.c to a new file virtio-net-cdev.c
> host_memory_map
> split set_memory_table into two parts
> add select based event driven fd management logic
> vhost user support
> vhost user memory region map
> cleanup when vhost user connection is closed
> multiple socket support
> vhost user ifr_name support
>
> lib/librte_vhost/Makefile | 5 +-
> lib/librte_vhost/rte_virtio_net.h | 5 +-
> lib/librte_vhost/vhost-net-cdev.c | 389 ----------------------
> lib/librte_vhost/vhost-net-cdev.h | 113 -------
> lib/librte_vhost/vhost-net.h | 121 +++++++
> lib/librte_vhost/vhost_cuse/eventfd_copy.c | 89 +++++
> lib/librte_vhost/vhost_cuse/eventfd_copy.h | 40 +++
> lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 414 +++++++++++++++++++++++
> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 401 ++++++++++++++++++++++
> lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 48 +++
> lib/librte_vhost/vhost_rxtx.c | 5 +-
> lib/librte_vhost/vhost_user/fd_man.c | 207 ++++++++++++
> lib/librte_vhost/vhost_user/fd_man.h | 64 ++++
> lib/librte_vhost/vhost_user/vhost-net-user.c | 462 ++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user/vhost-net-user.h | 106 ++++++
> lib/librte_vhost/vhost_user/virtio-net-user.c | 322 ++++++++++++++++++
> lib/librte_vhost/vhost_user/virtio-net-user.h | 49 +++
> lib/librte_vhost/virtio-net.c | 455 +++----------------------
> lib/librte_vhost/virtio-net.h | 43 +++
> 19 files changed, 2419 insertions(+), 919 deletions(-)
> delete mode 100644 lib/librte_vhost/vhost-net-cdev.c
> delete mode 100644 lib/librte_vhost/vhost-net-cdev.h
> create mode 100644 lib/librte_vhost/vhost-net.h
> create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c
> create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h
> create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c
> create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h
> create mode 100644 lib/librte_vhost/vhost_user/fd_man.c
> create mode 100644 lib/librte_vhost/vhost_user/fd_man.h
> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
> create mode 100644 lib/librte_vhost/virtio-net.h
>
--
Regards,
Haifeng
^ permalink raw reply [flat|nested] 21+ messages in thread