* Re: [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support
[not found] ` <1422242440-28948-11-git-send-email-huawei.xie@intel.com>
@ 2015-01-28 13:34 ` Michael S. Tsirkin
2015-01-28 14:27 ` Nikolay Nikolaev
0 siblings, 1 reply; 3+ messages in thread
From: Michael S. Tsirkin @ 2015-01-28 13:34 UTC (permalink / raw)
To: Huawei Xie; +Cc: fbl, qemu-devel, n.nikolaev
I had to drop the dpdk mailing list from Cc.
Added qemu mailing list, please copy patches there
in the future.
On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote:
>
> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
Overall, I think it's a reasonable implementation.
Some comments below:
> ---
> lib/librte_vhost/Makefile | 5 +-
> lib/librte_vhost/vhost-net.h | 4 +
> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
> lib/librte_vhost/vhost_user/fd_man.c | 4 +-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
> lib/librte_vhost/virtio-net.c | 26 +-
> lib/librte_vhost/virtio-net.h | 43 +++
> 10 files changed, 865 insertions(+), 18 deletions(-)
> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
> create mode 100644 lib/librte_vhost/virtio-net.h
>
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index e0d0ef6..b2f14a0 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
> # library name
> LIB = librte_vhost.a
>
> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> LDFLAGS += -lfuse
> # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
>
> # install includes
> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> index 11737cc..3f18f25 100644
> --- a/lib/librte_vhost/vhost-net.h
> +++ b/lib/librte_vhost/vhost-net.h
> @@ -41,8 +41,12 @@
>
> #include <rte_log.h>
>
> +#include "rte_virtio_net.h"
> +
> #define VHOST_MEMORY_MAX_NREGIONS 8
>
> +extern struct vhost_net_device_ops const *ops;
> +
> /* Macros for printing using RTE_LOG */
> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> index edcbc10..1d2c403 100644
> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> @@ -50,8 +50,7 @@
> #include "rte_virtio_net.h"
> #include "vhost-net.h"
> #include "virtio-net-cdev.h"
> -
> -extern struct vhost_net_device_ops const *ops;
> +#include "virtio-net.h"
>
> /* Line size for reading maps file. */
> static const uint32_t BUFSIZE = PATH_MAX;
> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
> ((uint64_t)(uintptr_t)mem_regions_addr + size);
> uint64_t base_address = 0, mapped_address, mapped_size;
> + struct virtio_net *dev;
>
> for (idx = 0; idx < nregions; idx++) {
> regions[idx].guest_phys_address =
> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> regions[idx].guest_phys_address;
> }
>
> + dev = get_device(ctx);
> + if (dev && dev->mem && dev->mem->mapped_address) {
> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
> + (size_t)dev->mem->mapped_size);
> + free(dev->mem);
> + dev->mem = NULL;
> + }
> +
> ops->set_mem_table(ctx, ®ions[0], valid_regions);
> return 0;
> }
> diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
> index 09187e0..0d2beb9 100644
> --- a/lib/librte_vhost/vhost_user/fd_man.c
> +++ b/lib/librte_vhost/vhost_user/fd_man.c
> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
>
> static void
> fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> - fd_cb rcb, fd_cb wcb, uint64_t dat)
> + fd_cb rcb, fd_cb wcb, void *dat)
> {
> struct fdentry *pfdentry;
>
> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
> * Register the fd in the fdset with read/write handler and context.
> */
> int
> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> {
> int i;
>
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> new file mode 100644
> index 0000000..c84fd3b
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -0,0 +1,428 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +#include <rte_virtio_net.h>
> +
> +#include "fd_man.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +#include "virtio-net-user.h"
> +
> +static void vserver_new_vq_conn(int fd, void *data);
> +static void vserver_message_handler(int fd, void *dat);
> +struct vhost_net_device_ops const *ops;
> +
> +static struct vhost_server *g_vhost_server;
> +
> +static const char *vhost_message_str[VHOST_USER_MAX] = {
> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
> +};
> +
> +/**
> + * Create a unix domain socket, bind to path and listen for connection.
> + * @return
> + * socket fd or -1 on failure
> + */
> +static int
> +uds_socket(const char *path)
> +{
> + struct sockaddr_un un;
> + int sockfd;
> + int ret;
> +
> + if (path == NULL)
> + return -1;
> +
> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> + if (sockfd < 0)
> + return -1;
> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> +
> + memset(&un, 0, sizeof(un));
> + un.sun_family = AF_UNIX;
> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> + if (ret == -1)
> + goto err;
> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> +
> + ret = listen(sockfd, 1);
> + if (ret == -1)
> + goto err;
> +
> + return sockfd;
> +
> +err:
> + close(sockfd);
> + return -1;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> + struct iovec iov;
> + struct msghdr msgh = { 0 };
> + size_t fdsize = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fdsize)];
> + struct cmsghdr *cmsg;
> + int ret;
> +
> + iov.iov_base = buf;
> + iov.iov_len = buflen;
> +
> + msgh.msg_iov = &iov;
> + msgh.msg_iovlen = 1;
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> +
> + ret = recvmsg(sockfd, &msgh, 0);
> + if (ret <= 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
> + return ret;
> + }
> +
> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
> + return -1;
> + }
> +
> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> + if ((cmsg->cmsg_level == SOL_SOCKET) &&
> + (cmsg->cmsg_type == SCM_RIGHTS)) {
> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
> + break;
> + }
> + }
> +
> + return ret;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> + int ret;
> +
> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> + if (ret <= 0)
> + return ret;
> +
> + if (msg && msg->size) {
> + if (msg->size > sizeof(msg->payload)) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "invalid msg size: %d\n", msg->size);
> + return -1;
> + }
> + ret = read(sockfd, &msg->payload, msg->size);
> + if (ret <= 0)
> + return ret;
> + if (ret != (int)msg->size) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "read control message failed\n");
> + return -1;
> + }
> + }
> +
> + return ret;
> +}
> +
> +static int
> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> + struct iovec iov;
> + struct msghdr msgh = { 0 };
> + size_t fdsize = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fdsize)];
> + struct cmsghdr *cmsg;
> + int ret;
> +
> + iov.iov_base = buf;
> + iov.iov_len = buflen;
> +
> + msgh.msg_iov = &iov;
> + msgh.msg_iovlen = 1;
> +
> + if (fds && fd_num > 0) {
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> + cmsg = CMSG_FIRSTHDR(&msgh);
> + cmsg->cmsg_len = CMSG_LEN(fdsize);
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
> + } else {
> + msgh.msg_control = NULL;
> + msgh.msg_controllen = 0;
> + }
> +
> + do {
> + ret = sendmsg(sockfd, &msgh, 0);
> + } while (ret < 0 && errno == EINTR);
> +
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
> + return ret;
> + }
> +
> + return ret;
> +}
> +
> +static int
> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> + int ret;
> +
> + if (!msg)
> + return 0;
> +
> + msg->flags &= ~VHOST_USER_VERSION_MASK;
> + msg->flags |= VHOST_USER_VERSION;
> + msg->flags |= VHOST_USER_REPLY_MASK;
> +
> + ret = send_fd_message(sockfd, (char *)msg,
> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> +
> + return ret;
> +}
> +
> +/* call back when there is new virtio connection. */
> +static void
> +vserver_new_vq_conn(int fd, void *dat)
> +{
> + struct vhost_server *vserver = (struct vhost_server *)dat;
> + int conn_fd;
> + int fh;
> + struct vhost_device_ctx vdev_ctx = { 0 };
> +
> + conn_fd = accept(fd, NULL, NULL);
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "new virtio connection is %d\n", conn_fd);
> + if (conn_fd < 0)
> + return;
> +
> + fh = ops->new_device(vdev_ctx);
> + if (fh == -1) {
> + close(conn_fd);
> + return;
> + }
> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> +
> + fdset_add(&vserver->fdset,
> + conn_fd, vserver_message_handler, NULL, (void *)fh);
> +}
> +
> +/* callback when there is message on the connfd */
> +static void
> +vserver_message_handler(int connfd, void *dat)
> +{
> + struct vhost_device_ctx ctx;
> + uint32_t fh = (uint32_t)dat;
> + struct VhostUserMsg msg;
> + uint64_t features;
> + int ret;
> +
> + ctx.fh = fh;
> + ret = read_vhost_message(connfd, &msg);
> + if (ret < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "vhost read message failed\n");
> +
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> + ops->destroy_device(ctx);
> +
> + return;
> + } else if (ret == 0) {
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vhost peer closed\n");
> +
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> + ops->destroy_device(ctx);
> +
> + return;
> + }
> + if (msg.request > VHOST_USER_MAX) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "vhost read incorrect message\n");
> +
> + close(connfd);
> + fdset_del(&g_vhost_server->fdset, connfd);
> +
> + return;
> + }
> +
> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> + vhost_message_str[msg.request]);
> + switch (msg.request) {
> + case VHOST_USER_GET_FEATURES:
> + ret = ops->get_features(ctx, &features);
> + msg.payload.u64 = features;
> + msg.size = sizeof(msg.payload.u64);
> + send_vhost_message(connfd, &msg);
What if this fails (e.g. remote died)?
How will everything be cleaned up?
> + break;
> + case VHOST_USER_SET_FEATURES:
> + features = msg.payload.u64;
> + ops->set_features(ctx, &features);
> + break;
> +
> + case VHOST_USER_SET_OWNER:
> + ops->set_owner(ctx);
> + break;
> + case VHOST_USER_RESET_OWNER:
> + ops->reset_owner(ctx);
> + break;
> +
> + case VHOST_USER_SET_MEM_TABLE:
> + user_set_mem_table(ctx, &msg);
> + break;
> +
> + case VHOST_USER_SET_LOG_BASE:
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> + case VHOST_USER_SET_LOG_FD:
> + close(msg.fds[0]);
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> + break;
> +
> + case VHOST_USER_SET_VRING_NUM:
> + ops->set_vring_num(ctx, &msg.payload.state);
> + break;
> + case VHOST_USER_SET_VRING_ADDR:
> + ops->set_vring_addr(ctx, &msg.payload.addr);
> + break;
> + case VHOST_USER_SET_VRING_BASE:
> + ops->set_vring_base(ctx, &msg.payload.state);
> + break;
> +
> + case VHOST_USER_GET_VRING_BASE:
> + ret = user_get_vring_base(ctx, &msg.payload.state);
> + msg.size = sizeof(msg.payload.state);
> + send_vhost_message(connfd, &msg);
> + break;
> +
> + case VHOST_USER_SET_VRING_KICK:
> + user_set_vring_kick(ctx, &msg);
> + break;
> + case VHOST_USER_SET_VRING_CALL:
> + user_set_vring_call(ctx, &msg);
> + break;
> +
> + case VHOST_USER_SET_VRING_ERR:
> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
> + close(msg.fds[0]);
> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> + break;
> +
> + default:
> + break;
> +
> + }
> +}
> +
> +
> +/**
> + * Creates and initialise the vhost server.
> + */
> +int
> +rte_vhost_driver_register(const char *path)
> +{
> +
> + struct vhost_server *vserver;
> +
> + if (g_vhost_server != NULL)
> + return -1;
> +
> + vserver = calloc(sizeof(struct vhost_server), 1);
> + if (vserver == NULL)
> + return -1;
> +
> + fdset_init(&vserver->fdset);
> +
> + unlink(path);
> +
> + vserver->listenfd = uds_socket(path);
> + if (vserver->listenfd < 0) {
> + free(vserver);
> + return -1;
> + }
> + vserver->path = path;
> +
> + fdset_add(&vserver->fdset, vserver->listenfd,
> + vserver_new_vq_conn, NULL,
> + vserver);
> +
> + ops = get_virtio_net_callbacks();
> +
> + g_vhost_server = vserver;
> +
> + return 0;
> +}
> +
> +
> +int
> +rte_vhost_driver_session_start(void)
> +{
> + fdset_event_dispatch(&g_vhost_server->fdset);
> + return 0;
> +}
> +
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> new file mode 100644
> index 0000000..7e6cda4
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> @@ -0,0 +1,108 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "fd_man.h"
> +
> +struct vhost_server {
> + const char *path; /**< The path the uds is bind to. */
> + int listenfd; /**< The listener sockfd. */
> + struct fdset fdset; /**< The fd list this vhost server manages. */
> +};
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +
> +typedef enum VhostUserRequest {
> + VHOST_USER_NONE = 0,
> + VHOST_USER_GET_FEATURES = 1,
> + VHOST_USER_SET_FEATURES = 2,
> + VHOST_USER_SET_OWNER = 3,
> + VHOST_USER_RESET_OWNER = 4,
> + VHOST_USER_SET_MEM_TABLE = 5,
> + VHOST_USER_SET_LOG_BASE = 6,
> + VHOST_USER_SET_LOG_FD = 7,
> + VHOST_USER_SET_VRING_NUM = 8,
> + VHOST_USER_SET_VRING_ADDR = 9,
> + VHOST_USER_SET_VRING_BASE = 10,
> + VHOST_USER_GET_VRING_BASE = 11,
> + VHOST_USER_SET_VRING_KICK = 12,
> + VHOST_USER_SET_VRING_CALL = 13,
> + VHOST_USER_SET_VRING_ERR = 14,
> + VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> + uint64_t guest_phys_addr;
> + uint64_t memory_size;
> + uint64_t userspace_addr;
> + uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> + uint32_t nregions;
> + uint32_t padding;
> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> + VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK (0x3)
> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
> + uint32_t flags;
> + uint32_t size; /* the following payload size */
> + union {
> +#define VHOST_USER_VRING_IDX_MASK (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> + uint64_t u64;
> + struct vhost_vring_state state;
> + struct vhost_vring_addr addr;
> + VhostUserMemory memory;
> + } payload;
> + int fds[VHOST_MEMORY_MAX_NREGIONS];
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION (0x1)
> +
> +/*****************************************************************************/
> +#endif
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> new file mode 100644
> index 0000000..6601fcd
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -0,0 +1,205 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <rte_log.h>
> +
> +#include "virtio-net.h"
> +#include "virtio-net-user.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + unsigned int idx;
> + struct VhostUserMemory memory = pmsg->payload.memory;
> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> + uint64_t mapped_address, base_address = 0;
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + if (memory.regions[idx].guest_phys_addr == 0)
> + base_address = memory.regions[idx].userspace_addr;
> + }
> + if (base_address == 0) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "couldn't find the mem region whose GPA is 0.\n");
> + return -1;
> + }
> +
> + for (idx = 0; idx < memory.nregions; idx++) {
> + regions[idx].guest_phys_address =
> + memory.regions[idx].guest_phys_addr;
> + regions[idx].guest_phys_address_end =
> + memory.regions[idx].guest_phys_addr +
> + memory.regions[idx].memory_size;
> + regions[idx].memory_size = memory.regions[idx].memory_size;
> + regions[idx].userspace_address =
> + memory.regions[idx].userspace_addr;
> +
> + /* This is ugly */
> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> + regions[idx].memory_size +
> + memory.regions[idx].mmap_offset,
> + PROT_READ | PROT_WRITE, MAP_SHARED,
> + pmsg->fds[idx],
> + 0);
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "mapped region %d to %p\n",
> + idx, (void *)mapped_address);
> +
> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "mmap qemu guest failed.\n");
> + return -1;
> + }
> +
> + mapped_address += memory.regions[idx].mmap_offset;
> +
> + regions[idx].address_offset = mapped_address -
> + regions[idx].guest_phys_address;
> + LOG_DEBUG(VHOST_CONFIG,
> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> + idx,
> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> + (void *)(uintptr_t)regions[idx].userspace_address,
> + regions[idx].memory_size);
> + }
> + ops->set_mem_table(ctx, regions, memory.nregions);
> + return 0;
> +}
> +
> +
> +static int
> +virtio_is_ready(struct virtio_net *dev)
> +{
> + struct vhost_virtqueue *rvq, *tvq;
> +
> + /* mq support in future.*/
> + rvq = dev->virtqueue[VIRTIO_RXQ];
> + tvq = dev->virtqueue[VIRTIO_TXQ];
> + if (rvq && tvq && rvq->desc && tvq->desc &&
> + (rvq->kickfd != (eventfd_t)-1) &&
> + (rvq->callfd != (eventfd_t)-1) &&
> + (tvq->kickfd != (eventfd_t)-1) &&
> + (tvq->callfd != (eventfd_t)-1)) {
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "virtio is now ready for processing.\n");
> + return 1;
> + }
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "virtio isn't ready for processing.\n");
> + return 0;
> +}
> +
> +void
> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + struct vhost_vring_file file;
> +
> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> + file.fd = -1;
> + else
> + file.fd = pmsg->fds[0];
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring call idx:%d file:%d\n", file.index, file.fd);
> + ops->set_vring_call(ctx, &file);
> +}
> +
> +
> +/*
> + * In vhost-user, when we receive kick message, will test whether virtio
> + * device is ready for packet processing.
> + */
> +void
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> + struct vhost_vring_file file;
> + struct virtio_net *dev = get_device(ctx);
> +
> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> + file.fd = -1;
> + else
> + file.fd = pmsg->fds[0];
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring kick idx:%d file:%d\n", file.index, file.fd);
> + ops->set_vring_kick(ctx, &file);
> +
> + if (virtio_is_ready(dev) &&
> + !(dev->flags & VIRTIO_DEV_RUNNING))
> + notify_ops->new_device(dev);
> +
> +}
> +
> +/*
> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
> + */
> +int
> +user_get_vring_base(struct vhost_device_ctx ctx,
> + struct vhost_vring_state *state)
> +{
> + struct virtio_net *dev = get_device(ctx);
> +
> + /* We have to stop the queue (virtio) if it is running. */
> + if (dev->flags & VIRTIO_DEV_RUNNING)
> + notify_ops->destroy_device(dev);
> +
> + /* Here we are safe to get the last used index */
> + ops->get_vring_base(ctx, state->index, state);
> +
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "vring base idx:%d file:%d\n", state->index, state->num);
> + /*
> + * Based on current qemu vhost-user implementation, this message is
> + * sent and only sent in vhost_vring_stop.
> + * TODO: cleanup the vring, it isn't usable since here.
> + */
Please don't tie yourself to a current qemu implementation. Please just
extend qemu to send explicit start/stop messages.
You'll need to negotiate the new capabilities.
Nikolay, it seems that version field is only 2 bits.
how can we extend it cleanly?
Perhaps, add a new GET_POTOCOL message for exchanging vhost user
specific bits, then remote should set a high version bit to let qemu
know it's supported?
> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
> + close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> + }
> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
> + close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> + }
> +
> + return 0;
> +
> +}
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> new file mode 100644
> index 0000000..0f6a75a
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -0,0 +1,48 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_USER_H
> +#define _VIRTIO_NET_USER_H
> +
> +#include "vhost-net.h"
> +#include "vhost-net-user.h"
> +
> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> +
> +#endif
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index 57a5801..c458ed9 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -50,6 +50,7 @@
> #include <rte_virtio_net.h>
>
> #include "vhost-net.h"
> +#include "virtio-net.h"
>
> /*
> * Device linked list structure for configuration.
> @@ -60,7 +61,7 @@ struct virtio_net_config_ll {
> };
>
> /* device ops to add/remove device to/from data core. */
> -static struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net_device_ops const *notify_ops;
> /* root address of the linked list of managed virtio devices */
> static struct virtio_net_config_ll *ll_root;
>
> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
> if ((qemu_va >= region->userspace_address) &&
> (qemu_va <= region->userspace_address +
> region->memory_size)) {
> - vhost_va = dev->mem->mapped_address + qemu_va -
> - dev->mem->base_address;
> + vhost_va = qemu_va + region->guest_phys_address +
> + region->address_offset -
> + region->userspace_address;
> break;
> }
> }
> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
> * Searches the configuration core linked list and
> * retrieves the device if it exists.
> */
> -static struct virtio_net *
> +struct virtio_net *
> get_device(struct vhost_device_ctx ctx)
> {
> struct virtio_net_config_ll *ll_dev;
> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>
> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> +
> /* Backends are set to -1 indicating an inactive device. */
> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
> if (dev == NULL)
> return -1;
>
> - if (dev->mem) {
> - munmap((void *)(uintptr_t)dev->mem->mapped_address,
> - (size_t)dev->mem->mapped_size);
> - free(dev->mem);
> - }
> -
> /* Malloc the memory structure depending on the number of regions. */
> mem = calloc(1, sizeof(struct virtio_memory) +
> (sizeof(struct virtio_memory_regions) * nregions));
> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> vq = dev->virtqueue[file->index];
>
> - if (vq->kickfd)
> + if ((int)vq->kickfd >= 0)
> close((int)vq->kickfd);
>
> vq->kickfd = file->fd;
> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> vq = dev->virtqueue[file->index];
>
> - if (vq->callfd)
> + if ((int)vq->callfd >= 0)
> close((int)vq->callfd);
> +
> vq->callfd = file->fd;
>
> return 0;
> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
> new file mode 100644
> index 0000000..75fb57e
> --- /dev/null
> +++ b/lib/librte_vhost/virtio-net.h
> @@ -0,0 +1,43 @@
> +/*-
> + * BSD LICENSE
> + *
> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + * * Neither the name of Intel Corporation nor the names of its
> + * contributors may be used to endorse or promote products derived
> + * from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_H
> +#define _VIRTIO_NET_H
> +
> +#include "vhost-net.h"
> +#include "rte_virtio_net.h"
> +
> +struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net *get_device(struct vhost_device_ctx ctx);
> +
> +#endif
> --
> 1.8.1.4
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support
2015-01-28 13:34 ` [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support Michael S. Tsirkin
@ 2015-01-28 14:27 ` Nikolay Nikolaev
2015-01-28 14:37 ` Michael S. Tsirkin
0 siblings, 1 reply; 3+ messages in thread
From: Nikolay Nikolaev @ 2015-01-28 14:27 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Huawei Xie, fbl, qemu-devel, VirtualOpenSystems Technical Team
On Wed, Jan 28, 2015 at 3:34 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> I had to drop the dpdk mailing list from Cc.
> Added qemu mailing list, please copy patches there
> in the future.
>
> On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote:
>>
>> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
>
> Overall, I think it's a reasonable implementation.
> Some comments below:
>
>> ---
>> lib/librte_vhost/Makefile | 5 +-
>> lib/librte_vhost/vhost-net.h | 4 +
>> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
>> lib/librte_vhost/vhost_user/fd_man.c | 4 +-
>> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
>> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
>> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
>> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
>> lib/librte_vhost/virtio-net.c | 26 +-
>> lib/librte_vhost/virtio-net.h | 43 +++
>> 10 files changed, 865 insertions(+), 18 deletions(-)
>> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
>> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
>> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
>> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
>> create mode 100644 lib/librte_vhost/virtio-net.h
>>
>> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
>> index e0d0ef6..b2f14a0 100644
>> --- a/lib/librte_vhost/Makefile
>> +++ b/lib/librte_vhost/Makefile
>> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
>> # library name
>> LIB = librte_vhost.a
>>
>> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>> LDFLAGS += -lfuse
>> # all source are stored in SRCS-y
>> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
>> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
>> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
>>
>> # install includes
>> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
>> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
>> index 11737cc..3f18f25 100644
>> --- a/lib/librte_vhost/vhost-net.h
>> +++ b/lib/librte_vhost/vhost-net.h
>> @@ -41,8 +41,12 @@
>>
>> #include <rte_log.h>
>>
>> +#include "rte_virtio_net.h"
>> +
>> #define VHOST_MEMORY_MAX_NREGIONS 8
>>
>> +extern struct vhost_net_device_ops const *ops;
>> +
>> /* Macros for printing using RTE_LOG */
>> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
>> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
>> index edcbc10..1d2c403 100644
>> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
>> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
>> @@ -50,8 +50,7 @@
>> #include "rte_virtio_net.h"
>> #include "vhost-net.h"
>> #include "virtio-net-cdev.h"
>> -
>> -extern struct vhost_net_device_ops const *ops;
>> +#include "virtio-net.h"
>>
>> /* Line size for reading maps file. */
>> static const uint32_t BUFSIZE = PATH_MAX;
>> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>> struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>> ((uint64_t)(uintptr_t)mem_regions_addr + size);
>> uint64_t base_address = 0, mapped_address, mapped_size;
>> + struct virtio_net *dev;
>>
>> for (idx = 0; idx < nregions; idx++) {
>> regions[idx].guest_phys_address =
>> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>> regions[idx].guest_phys_address;
>> }
>>
>> + dev = get_device(ctx);
>> + if (dev && dev->mem && dev->mem->mapped_address) {
>> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> + (size_t)dev->mem->mapped_size);
>> + free(dev->mem);
>> + dev->mem = NULL;
>> + }
>> +
>> ops->set_mem_table(ctx, ®ions[0], valid_regions);
>> return 0;
>> }
>> diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
>> index 09187e0..0d2beb9 100644
>> --- a/lib/librte_vhost/vhost_user/fd_man.c
>> +++ b/lib/librte_vhost/vhost_user/fd_man.c
>> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
>>
>> static void
>> fdset_add_fd(struct fdset *pfdset, int idx, int fd,
>> - fd_cb rcb, fd_cb wcb, uint64_t dat)
>> + fd_cb rcb, fd_cb wcb, void *dat)
>> {
>> struct fdentry *pfdentry;
>>
>> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
>> * Register the fd in the fdset with read/write handler and context.
>> */
>> int
>> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
>> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
>> {
>> int i;
>>
>> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
>> new file mode 100644
>> index 0000000..c84fd3b
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
>> @@ -0,0 +1,428 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <limits.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <string.h>
>> +#include <sys/types.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <errno.h>
>> +
>> +#include <rte_log.h>
>> +#include <rte_virtio_net.h>
>> +
>> +#include "fd_man.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +#include "virtio-net-user.h"
>> +
>> +static void vserver_new_vq_conn(int fd, void *data);
>> +static void vserver_message_handler(int fd, void *dat);
>> +struct vhost_net_device_ops const *ops;
>> +
>> +static struct vhost_server *g_vhost_server;
>> +
>> +static const char *vhost_message_str[VHOST_USER_MAX] = {
>> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
>> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
>> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
>> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
>> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
>> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
>> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
>> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
>> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
>> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
>> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
>> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
>> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
>> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
>> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
>> +};
>> +
>> +/**
>> + * Create a unix domain socket, bind to path and listen for connection.
>> + * @return
>> + * socket fd or -1 on failure
>> + */
>> +static int
>> +uds_socket(const char *path)
>> +{
>> + struct sockaddr_un un;
>> + int sockfd;
>> + int ret;
>> +
>> + if (path == NULL)
>> + return -1;
>> +
>> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
>> + if (sockfd < 0)
>> + return -1;
>> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
>> +
>> + memset(&un, 0, sizeof(un));
>> + un.sun_family = AF_UNIX;
>> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
>> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
>> + if (ret == -1)
>> + goto err;
>> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
>> +
>> + ret = listen(sockfd, 1);
>> + if (ret == -1)
>> + goto err;
>> +
>> + return sockfd;
>> +
>> +err:
>> + close(sockfd);
>> + return -1;
>> +}
>> +
>> +/* return bytes# of read on success or negative val on failure. */
>> +static int
>> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> + struct iovec iov;
>> + struct msghdr msgh = { 0 };
>> + size_t fdsize = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fdsize)];
>> + struct cmsghdr *cmsg;
>> + int ret;
>> +
>> + iov.iov_base = buf;
>> + iov.iov_len = buflen;
>> +
>> + msgh.msg_iov = &iov;
>> + msgh.msg_iovlen = 1;
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> +
>> + ret = recvmsg(sockfd, &msgh, 0);
>> + if (ret <= 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
>> + return ret;
>> + }
>> +
>> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
>> + return -1;
>> + }
>> +
>> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
>> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
>> + if ((cmsg->cmsg_level == SOL_SOCKET) &&
>> + (cmsg->cmsg_type == SCM_RIGHTS)) {
>> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
>> + break;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/* return bytes# of read on success or negative val on failure. */
>> +static int
>> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> + int ret;
>> +
>> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
>> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
>> + if (ret <= 0)
>> + return ret;
>> +
>> + if (msg && msg->size) {
>> + if (msg->size > sizeof(msg->payload)) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "invalid msg size: %d\n", msg->size);
>> + return -1;
>> + }
>> + ret = read(sockfd, &msg->payload, msg->size);
>> + if (ret <= 0)
>> + return ret;
>> + if (ret != (int)msg->size) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "read control message failed\n");
>> + return -1;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
>> +{
>> +
>> + struct iovec iov;
>> + struct msghdr msgh = { 0 };
>> + size_t fdsize = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fdsize)];
>> + struct cmsghdr *cmsg;
>> + int ret;
>> +
>> + iov.iov_base = buf;
>> + iov.iov_len = buflen;
>> +
>> + msgh.msg_iov = &iov;
>> + msgh.msg_iovlen = 1;
>> +
>> + if (fds && fd_num > 0) {
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> + cmsg = CMSG_FIRSTHDR(&msgh);
>> + cmsg->cmsg_len = CMSG_LEN(fdsize);
>> + cmsg->cmsg_level = SOL_SOCKET;
>> + cmsg->cmsg_type = SCM_RIGHTS;
>> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
>> + } else {
>> + msgh.msg_control = NULL;
>> + msgh.msg_controllen = 0;
>> + }
>> +
>> + do {
>> + ret = sendmsg(sockfd, &msgh, 0);
>> + } while (ret < 0 && errno == EINTR);
>> +
>> + if (ret < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
>> + return ret;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
>> +{
>> + int ret;
>> +
>> + if (!msg)
>> + return 0;
>> +
>> + msg->flags &= ~VHOST_USER_VERSION_MASK;
>> + msg->flags |= VHOST_USER_VERSION;
>> + msg->flags |= VHOST_USER_REPLY_MASK;
>> +
>> + ret = send_fd_message(sockfd, (char *)msg,
>> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
>> +
>> + return ret;
>> +}
>> +
>> +/* call back when there is new virtio connection. */
>> +static void
>> +vserver_new_vq_conn(int fd, void *dat)
>> +{
>> + struct vhost_server *vserver = (struct vhost_server *)dat;
>> + int conn_fd;
>> + int fh;
>> + struct vhost_device_ctx vdev_ctx = { 0 };
>> +
>> + conn_fd = accept(fd, NULL, NULL);
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "new virtio connection is %d\n", conn_fd);
>> + if (conn_fd < 0)
>> + return;
>> +
>> + fh = ops->new_device(vdev_ctx);
>> + if (fh == -1) {
>> + close(conn_fd);
>> + return;
>> + }
>> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
>> +
>> + fdset_add(&vserver->fdset,
>> + conn_fd, vserver_message_handler, NULL, (void *)fh);
>> +}
>> +
>> +/* callback when there is message on the connfd */
>> +static void
>> +vserver_message_handler(int connfd, void *dat)
>> +{
>> + struct vhost_device_ctx ctx;
>> + uint32_t fh = (uint32_t)dat;
>> + struct VhostUserMsg msg;
>> + uint64_t features;
>> + int ret;
>> +
>> + ctx.fh = fh;
>> + ret = read_vhost_message(connfd, &msg);
>> + if (ret < 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "vhost read message failed\n");
>> +
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> + ops->destroy_device(ctx);
>> +
>> + return;
>> + } else if (ret == 0) {
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vhost peer closed\n");
>> +
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> + ops->destroy_device(ctx);
>> +
>> + return;
>> + }
>> + if (msg.request > VHOST_USER_MAX) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "vhost read incorrect message\n");
>> +
>> + close(connfd);
>> + fdset_del(&g_vhost_server->fdset, connfd);
>> +
>> + return;
>> + }
>> +
>> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
>> + vhost_message_str[msg.request]);
>> + switch (msg.request) {
>> + case VHOST_USER_GET_FEATURES:
>> + ret = ops->get_features(ctx, &features);
>> + msg.payload.u64 = features;
>> + msg.size = sizeof(msg.payload.u64);
>> + send_vhost_message(connfd, &msg);
>
> What if this fails (e.g. remote died)?
> How will everything be cleaned up?
>
>> + break;
>> + case VHOST_USER_SET_FEATURES:
>> + features = msg.payload.u64;
>> + ops->set_features(ctx, &features);
>> + break;
>> +
>> + case VHOST_USER_SET_OWNER:
>> + ops->set_owner(ctx);
>> + break;
>> + case VHOST_USER_RESET_OWNER:
>> + ops->reset_owner(ctx);
>> + break;
>> +
>> + case VHOST_USER_SET_MEM_TABLE:
>> + user_set_mem_table(ctx, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_LOG_BASE:
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> + case VHOST_USER_SET_LOG_FD:
>> + close(msg.fds[0]);
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_NUM:
>> + ops->set_vring_num(ctx, &msg.payload.state);
>> + break;
>> + case VHOST_USER_SET_VRING_ADDR:
>> + ops->set_vring_addr(ctx, &msg.payload.addr);
>> + break;
>> + case VHOST_USER_SET_VRING_BASE:
>> + ops->set_vring_base(ctx, &msg.payload.state);
>> + break;
>> +
>> + case VHOST_USER_GET_VRING_BASE:
>> + ret = user_get_vring_base(ctx, &msg.payload.state);
>> + msg.size = sizeof(msg.payload.state);
>> + send_vhost_message(connfd, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_KICK:
>> + user_set_vring_kick(ctx, &msg);
>> + break;
>> + case VHOST_USER_SET_VRING_CALL:
>> + user_set_vring_call(ctx, &msg);
>> + break;
>> +
>> + case VHOST_USER_SET_VRING_ERR:
>> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
>> + close(msg.fds[0]);
>> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
>> + break;
>> +
>> + default:
>> + break;
>> +
>> + }
>> +}
>> +
>> +
>> +/**
>> + * Creates and initialise the vhost server.
>> + */
>> +int
>> +rte_vhost_driver_register(const char *path)
>> +{
>> +
>> + struct vhost_server *vserver;
>> +
>> + if (g_vhost_server != NULL)
>> + return -1;
>> +
>> + vserver = calloc(sizeof(struct vhost_server), 1);
>> + if (vserver == NULL)
>> + return -1;
>> +
>> + fdset_init(&vserver->fdset);
>> +
>> + unlink(path);
>> +
>> + vserver->listenfd = uds_socket(path);
>> + if (vserver->listenfd < 0) {
>> + free(vserver);
>> + return -1;
>> + }
>> + vserver->path = path;
>> +
>> + fdset_add(&vserver->fdset, vserver->listenfd,
>> + vserver_new_vq_conn, NULL,
>> + vserver);
>> +
>> + ops = get_virtio_net_callbacks();
>> +
>> + g_vhost_server = vserver;
>> +
>> + return 0;
>> +}
>> +
>> +
>> +int
>> +rte_vhost_driver_session_start(void)
>> +{
>> + fdset_event_dispatch(&g_vhost_server->fdset);
>> + return 0;
>> +}
>> +
>> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
>> new file mode 100644
>> index 0000000..7e6cda4
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
>> @@ -0,0 +1,108 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _VHOST_NET_USER_H
>> +#define _VHOST_NET_USER_H
>> +
>> +#include <stdint.h>
>> +#include <linux/vhost.h>
>> +
>> +#include "fd_man.h"
>> +
>> +struct vhost_server {
>> + const char *path; /**< The path the uds is bind to. */
>> + int listenfd; /**< The listener sockfd. */
>> + struct fdset fdset; /**< The fd list this vhost server manages. */
>> +};
>> +
>> +/* refer to hw/virtio/vhost-user.c */
>> +
>> +#define VHOST_MEMORY_MAX_NREGIONS 8
>> +
>> +typedef enum VhostUserRequest {
>> + VHOST_USER_NONE = 0,
>> + VHOST_USER_GET_FEATURES = 1,
>> + VHOST_USER_SET_FEATURES = 2,
>> + VHOST_USER_SET_OWNER = 3,
>> + VHOST_USER_RESET_OWNER = 4,
>> + VHOST_USER_SET_MEM_TABLE = 5,
>> + VHOST_USER_SET_LOG_BASE = 6,
>> + VHOST_USER_SET_LOG_FD = 7,
>> + VHOST_USER_SET_VRING_NUM = 8,
>> + VHOST_USER_SET_VRING_ADDR = 9,
>> + VHOST_USER_SET_VRING_BASE = 10,
>> + VHOST_USER_GET_VRING_BASE = 11,
>> + VHOST_USER_SET_VRING_KICK = 12,
>> + VHOST_USER_SET_VRING_CALL = 13,
>> + VHOST_USER_SET_VRING_ERR = 14,
>> + VHOST_USER_MAX
>> +} VhostUserRequest;
>> +
>> +typedef struct VhostUserMemoryRegion {
>> + uint64_t guest_phys_addr;
>> + uint64_t memory_size;
>> + uint64_t userspace_addr;
>> + uint64_t mmap_offset;
>> +} VhostUserMemoryRegion;
>> +
>> +typedef struct VhostUserMemory {
>> + uint32_t nregions;
>> + uint32_t padding;
>> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>> +} VhostUserMemory;
>> +
>> +typedef struct VhostUserMsg {
>> + VhostUserRequest request;
>> +
>> +#define VHOST_USER_VERSION_MASK (0x3)
>> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
>> + uint32_t flags;
>> + uint32_t size; /* the following payload size */
>> + union {
>> +#define VHOST_USER_VRING_IDX_MASK (0xff)
>> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
>> + uint64_t u64;
>> + struct vhost_vring_state state;
>> + struct vhost_vring_addr addr;
>> + VhostUserMemory memory;
>> + } payload;
>> + int fds[VHOST_MEMORY_MAX_NREGIONS];
>> +} __attribute((packed)) VhostUserMsg;
>> +
>> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
>> +
>> +/* The version of the protocol we support */
>> +#define VHOST_USER_VERSION (0x1)
>> +
>> +/*****************************************************************************/
>> +#endif
>> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
>> new file mode 100644
>> index 0000000..6601fcd
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
>> @@ -0,0 +1,205 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <unistd.h>
>> +#include <sys/mman.h>
>> +
>> +#include <rte_log.h>
>> +
>> +#include "virtio-net.h"
>> +#include "virtio-net-user.h"
>> +#include "vhost-net-user.h"
>> +#include "vhost-net.h"
>> +
>> +int
>> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + unsigned int idx;
>> + struct VhostUserMemory memory = pmsg->payload.memory;
>> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
>> + uint64_t mapped_address, base_address = 0;
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + if (memory.regions[idx].guest_phys_addr == 0)
>> + base_address = memory.regions[idx].userspace_addr;
>> + }
>> + if (base_address == 0) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "couldn't find the mem region whose GPA is 0.\n");
>> + return -1;
>> + }
>> +
>> + for (idx = 0; idx < memory.nregions; idx++) {
>> + regions[idx].guest_phys_address =
>> + memory.regions[idx].guest_phys_addr;
>> + regions[idx].guest_phys_address_end =
>> + memory.regions[idx].guest_phys_addr +
>> + memory.regions[idx].memory_size;
>> + regions[idx].memory_size = memory.regions[idx].memory_size;
>> + regions[idx].userspace_address =
>> + memory.regions[idx].userspace_addr;
>> +
>> + /* This is ugly */
>> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
>> + regions[idx].memory_size +
>> + memory.regions[idx].mmap_offset,
>> + PROT_READ | PROT_WRITE, MAP_SHARED,
>> + pmsg->fds[idx],
>> + 0);
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "mapped region %d to %p\n",
>> + idx, (void *)mapped_address);
>> +
>> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
>> + RTE_LOG(ERR, VHOST_CONFIG,
>> + "mmap qemu guest failed.\n");
>> + return -1;
>> + }
>> +
>> + mapped_address += memory.regions[idx].mmap_offset;
>> +
>> + regions[idx].address_offset = mapped_address -
>> + regions[idx].guest_phys_address;
>> + LOG_DEBUG(VHOST_CONFIG,
>> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
>> + idx,
>> + (void *)(uintptr_t)regions[idx].guest_phys_address,
>> + (void *)(uintptr_t)regions[idx].userspace_address,
>> + regions[idx].memory_size);
>> + }
>> + ops->set_mem_table(ctx, regions, memory.nregions);
>> + return 0;
>> +}
>> +
>> +
>> +static int
>> +virtio_is_ready(struct virtio_net *dev)
>> +{
>> + struct vhost_virtqueue *rvq, *tvq;
>> +
>> + /* mq support in future.*/
>> + rvq = dev->virtqueue[VIRTIO_RXQ];
>> + tvq = dev->virtqueue[VIRTIO_TXQ];
>> + if (rvq && tvq && rvq->desc && tvq->desc &&
>> + (rvq->kickfd != (eventfd_t)-1) &&
>> + (rvq->callfd != (eventfd_t)-1) &&
>> + (tvq->kickfd != (eventfd_t)-1) &&
>> + (tvq->callfd != (eventfd_t)-1)) {
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "virtio is now ready for processing.\n");
>> + return 1;
>> + }
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "virtio isn't ready for processing.\n");
>> + return 0;
>> +}
>> +
>> +void
>> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + struct vhost_vring_file file;
>> +
>> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
>> + file.fd = -1;
>> + else
>> + file.fd = pmsg->fds[0];
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring call idx:%d file:%d\n", file.index, file.fd);
>> + ops->set_vring_call(ctx, &file);
>> +}
>> +
>> +
>> +/*
>> + * In vhost-user, when we receive kick message, will test whether virtio
>> + * device is ready for packet processing.
>> + */
>> +void
>> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
>> +{
>> + struct vhost_vring_file file;
>> + struct virtio_net *dev = get_device(ctx);
>> +
>> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
>> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
>> + file.fd = -1;
>> + else
>> + file.fd = pmsg->fds[0];
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring kick idx:%d file:%d\n", file.index, file.fd);
>> + ops->set_vring_kick(ctx, &file);
>> +
>> + if (virtio_is_ready(dev) &&
>> + !(dev->flags & VIRTIO_DEV_RUNNING))
>> + notify_ops->new_device(dev);
>> +
>> +}
>> +
>> +/*
>> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
>> + */
>> +int
>> +user_get_vring_base(struct vhost_device_ctx ctx,
>> + struct vhost_vring_state *state)
>> +{
>> + struct virtio_net *dev = get_device(ctx);
>> +
>> + /* We have to stop the queue (virtio) if it is running. */
>> + if (dev->flags & VIRTIO_DEV_RUNNING)
>> + notify_ops->destroy_device(dev);
>> +
>> + /* Here we are safe to get the last used index */
>> + ops->get_vring_base(ctx, state->index, state);
>> +
>> + RTE_LOG(INFO, VHOST_CONFIG,
>> + "vring base idx:%d file:%d\n", state->index, state->num);
>> + /*
>> + * Based on current qemu vhost-user implementation, this message is
>> + * sent and only sent in vhost_vring_stop.
>> + * TODO: cleanup the vring, it isn't usable since here.
>> + */
>
> Please don't tie yourself to a current qemu implementation. Please just
> extend qemu to send explicit start/stop messages.
> You'll need to negotiate the new capabilities.
>
>
> Nikolay, it seems that version field is only 2 bits.
> how can we extend it cleanly?
Will something like this do:
#define VHOST_USER_VERSION_MASK_MAJ (0x3)
#define VHOST_USER_REPLY_MASK (0x1<<2)
#define VHOST_USER_VERSION_MASK_MIN (0xf<<3)
The "major" part of the version will be increased in case of
significant changes in the protocol. And the "minor" part in all other
cases. I guess this will give us enough space for versioning.
regards,
Nikolay Nikolaev
>
> Perhaps, add a new GET_POTOCOL message for exchanging vhost user
> specific bits, then remote should set a high version bit to let qemu
> know it's supported?
>
>
>
>
>> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
>> + close(dev->virtqueue[VIRTIO_RXQ]->callfd);
>> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
>> + }
>> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
>> + close(dev->virtqueue[VIRTIO_TXQ]->callfd);
>> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
>> + }
>> +
>> + return 0;
>> +
>> +}
>> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
>> new file mode 100644
>> index 0000000..0f6a75a
>> --- /dev/null
>> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
>> @@ -0,0 +1,48 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _VIRTIO_NET_USER_H
>> +#define _VIRTIO_NET_USER_H
>> +
>> +#include "vhost-net.h"
>> +#include "vhost-net-user.h"
>> +
>> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>> +
>> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
>> +
>> +#endif
>> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
>> index 57a5801..c458ed9 100644
>> --- a/lib/librte_vhost/virtio-net.c
>> +++ b/lib/librte_vhost/virtio-net.c
>> @@ -50,6 +50,7 @@
>> #include <rte_virtio_net.h>
>>
>> #include "vhost-net.h"
>> +#include "virtio-net.h"
>>
>> /*
>> * Device linked list structure for configuration.
>> @@ -60,7 +61,7 @@ struct virtio_net_config_ll {
>> };
>>
>> /* device ops to add/remove device to/from data core. */
>> -static struct virtio_net_device_ops const *notify_ops;
>> +struct virtio_net_device_ops const *notify_ops;
>> /* root address of the linked list of managed virtio devices */
>> static struct virtio_net_config_ll *ll_root;
>>
>> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>> if ((qemu_va >= region->userspace_address) &&
>> (qemu_va <= region->userspace_address +
>> region->memory_size)) {
>> - vhost_va = dev->mem->mapped_address + qemu_va -
>> - dev->mem->base_address;
>> + vhost_va = qemu_va + region->guest_phys_address +
>> + region->address_offset -
>> + region->userspace_address;
>> break;
>> }
>> }
>> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
>> * Searches the configuration core linked list and
>> * retrieves the device if it exists.
>> */
>> -static struct virtio_net *
>> +struct virtio_net *
>> get_device(struct vhost_device_ctx ctx)
>> {
>> struct virtio_net_config_ll *ll_dev;
>> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
>> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
>> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
>>
>> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
>> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
>> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
>> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
>> +
>> /* Backends are set to -1 indicating an inactive device. */
>> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
>> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
>> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
>> if (dev == NULL)
>> return -1;
>>
>> - if (dev->mem) {
>> - munmap((void *)(uintptr_t)dev->mem->mapped_address,
>> - (size_t)dev->mem->mapped_size);
>> - free(dev->mem);
>> - }
>> -
>> /* Malloc the memory structure depending on the number of regions. */
>> mem = calloc(1, sizeof(struct virtio_memory) +
>> (sizeof(struct virtio_memory_regions) * nregions));
>> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
>> vq = dev->virtqueue[file->index];
>>
>> - if (vq->kickfd)
>> + if ((int)vq->kickfd >= 0)
>> close((int)vq->kickfd);
>>
>> vq->kickfd = file->fd;
>> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
>> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
>> vq = dev->virtqueue[file->index];
>>
>> - if (vq->callfd)
>> + if ((int)vq->callfd >= 0)
>> close((int)vq->callfd);
>> +
>> vq->callfd = file->fd;
>>
>> return 0;
>> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
>> new file mode 100644
>> index 0000000..75fb57e
>> --- /dev/null
>> +++ b/lib/librte_vhost/virtio-net.h
>> @@ -0,0 +1,43 @@
>> +/*-
>> + * BSD LICENSE
>> + *
>> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + * * Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * * Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in
>> + * the documentation and/or other materials provided with the
>> + * distribution.
>> + * * Neither the name of Intel Corporation nor the names of its
>> + * contributors may be used to endorse or promote products derived
>> + * from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _VIRTIO_NET_H
>> +#define _VIRTIO_NET_H
>> +
>> +#include "vhost-net.h"
>> +#include "rte_virtio_net.h"
>> +
>> +struct virtio_net_device_ops const *notify_ops;
>> +struct virtio_net *get_device(struct vhost_device_ctx ctx);
>> +
>> +#endif
>> --
>> 1.8.1.4
>>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support
2015-01-28 14:27 ` Nikolay Nikolaev
@ 2015-01-28 14:37 ` Michael S. Tsirkin
0 siblings, 0 replies; 3+ messages in thread
From: Michael S. Tsirkin @ 2015-01-28 14:37 UTC (permalink / raw)
To: Nikolay Nikolaev
Cc: Huawei Xie, fbl, qemu-devel, VirtualOpenSystems Technical Team
On Wed, Jan 28, 2015 at 04:27:35PM +0200, Nikolay Nikolaev wrote:
> On Wed, Jan 28, 2015 at 3:34 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > I had to drop the dpdk mailing list from Cc.
> > Added qemu mailing list, please copy patches there
> > in the future.
> >
> > On Mon, Jan 26, 2015 at 11:20:36AM +0800, Huawei Xie wrote:
> >>
> >> Signed-off-by: Huawei Xie <huawei.xie@intel.com>
> >
> > Overall, I think it's a reasonable implementation.
> > Some comments below:
> >
> >> ---
> >> lib/librte_vhost/Makefile | 5 +-
> >> lib/librte_vhost/vhost-net.h | 4 +
> >> lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 12 +-
> >> lib/librte_vhost/vhost_user/fd_man.c | 4 +-
> >> lib/librte_vhost/vhost_user/vhost-net-user.c | 428 ++++++++++++++++++++++++++
> >> lib/librte_vhost/vhost_user/vhost-net-user.h | 108 +++++++
> >> lib/librte_vhost/vhost_user/virtio-net-user.c | 205 ++++++++++++
> >> lib/librte_vhost/vhost_user/virtio-net-user.h | 48 +++
> >> lib/librte_vhost/virtio-net.c | 26 +-
> >> lib/librte_vhost/virtio-net.h | 43 +++
> >> 10 files changed, 865 insertions(+), 18 deletions(-)
> >> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
> >> create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
> >> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
> >> create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
> >> create mode 100644 lib/librte_vhost/virtio-net.h
> >>
> >> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> >> index e0d0ef6..b2f14a0 100644
> >> --- a/lib/librte_vhost/Makefile
> >> +++ b/lib/librte_vhost/Makefile
> >> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
> >> # library name
> >> LIB = librte_vhost.a
> >>
> >> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> >> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> >> LDFLAGS += -lfuse
> >> # all source are stored in SRCS-y
> >> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> >> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> >> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
> >>
> >> # install includes
> >> SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
> >> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> >> index 11737cc..3f18f25 100644
> >> --- a/lib/librte_vhost/vhost-net.h
> >> +++ b/lib/librte_vhost/vhost-net.h
> >> @@ -41,8 +41,12 @@
> >>
> >> #include <rte_log.h>
> >>
> >> +#include "rte_virtio_net.h"
> >> +
> >> #define VHOST_MEMORY_MAX_NREGIONS 8
> >>
> >> +extern struct vhost_net_device_ops const *ops;
> >> +
> >> /* Macros for printing using RTE_LOG */
> >> #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
> >> #define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1
> >> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> >> index edcbc10..1d2c403 100644
> >> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> >> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> >> @@ -50,8 +50,7 @@
> >> #include "rte_virtio_net.h"
> >> #include "vhost-net.h"
> >> #include "virtio-net-cdev.h"
> >> -
> >> -extern struct vhost_net_device_ops const *ops;
> >> +#include "virtio-net.h"
> >>
> >> /* Line size for reading maps file. */
> >> static const uint32_t BUFSIZE = PATH_MAX;
> >> @@ -268,6 +267,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> >> struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
> >> ((uint64_t)(uintptr_t)mem_regions_addr + size);
> >> uint64_t base_address = 0, mapped_address, mapped_size;
> >> + struct virtio_net *dev;
> >>
> >> for (idx = 0; idx < nregions; idx++) {
> >> regions[idx].guest_phys_address =
> >> @@ -335,6 +335,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
> >> regions[idx].guest_phys_address;
> >> }
> >>
> >> + dev = get_device(ctx);
> >> + if (dev && dev->mem && dev->mem->mapped_address) {
> >> + munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >> + (size_t)dev->mem->mapped_size);
> >> + free(dev->mem);
> >> + dev->mem = NULL;
> >> + }
> >> +
> >> ops->set_mem_table(ctx, ®ions[0], valid_regions);
> >> return 0;
> >> }
> >> diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c
> >> index 09187e0..0d2beb9 100644
> >> --- a/lib/librte_vhost/vhost_user/fd_man.c
> >> +++ b/lib/librte_vhost/vhost_user/fd_man.c
> >> @@ -72,7 +72,7 @@ fdset_find_free_slot(struct fdset *pfdset)
> >>
> >> static void
> >> fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> >> - fd_cb rcb, fd_cb wcb, uint64_t dat)
> >> + fd_cb rcb, fd_cb wcb, void *dat)
> >> {
> >> struct fdentry *pfdentry;
> >>
> >> @@ -138,7 +138,7 @@ fdset_init(struct fdset *pfdset)
> >> * Register the fd in the fdset with read/write handler and context.
> >> */
> >> int
> >> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, uint64_t dat)
> >> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> >> {
> >> int i;
> >>
> >> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> >> new file mode 100644
> >> index 0000000..c84fd3b
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> >> @@ -0,0 +1,428 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#include <stdint.h>
> >> +#include <stdio.h>
> >> +#include <limits.h>
> >> +#include <stdlib.h>
> >> +#include <unistd.h>
> >> +#include <string.h>
> >> +#include <sys/types.h>
> >> +#include <sys/socket.h>
> >> +#include <sys/un.h>
> >> +#include <errno.h>
> >> +
> >> +#include <rte_log.h>
> >> +#include <rte_virtio_net.h>
> >> +
> >> +#include "fd_man.h"
> >> +#include "vhost-net-user.h"
> >> +#include "vhost-net.h"
> >> +#include "virtio-net-user.h"
> >> +
> >> +static void vserver_new_vq_conn(int fd, void *data);
> >> +static void vserver_message_handler(int fd, void *dat);
> >> +struct vhost_net_device_ops const *ops;
> >> +
> >> +static struct vhost_server *g_vhost_server;
> >> +
> >> +static const char *vhost_message_str[VHOST_USER_MAX] = {
> >> + [VHOST_USER_NONE] = "VHOST_USER_NONE",
> >> + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> >> + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> >> + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> >> + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> >> + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> >> + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> >> + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> >> + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> >> + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> >> + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> >> + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> >> + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> >> + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> >> + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR"
> >> +};
> >> +
> >> +/**
> >> + * Create a unix domain socket, bind to path and listen for connection.
> >> + * @return
> >> + * socket fd or -1 on failure
> >> + */
> >> +static int
> >> +uds_socket(const char *path)
> >> +{
> >> + struct sockaddr_un un;
> >> + int sockfd;
> >> + int ret;
> >> +
> >> + if (path == NULL)
> >> + return -1;
> >> +
> >> + sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> >> + if (sockfd < 0)
> >> + return -1;
> >> + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> >> +
> >> + memset(&un, 0, sizeof(un));
> >> + un.sun_family = AF_UNIX;
> >> + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> >> + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> >> + if (ret == -1)
> >> + goto err;
> >> + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> >> +
> >> + ret = listen(sockfd, 1);
> >> + if (ret == -1)
> >> + goto err;
> >> +
> >> + return sockfd;
> >> +
> >> +err:
> >> + close(sockfd);
> >> + return -1;
> >> +}
> >> +
> >> +/* return bytes# of read on success or negative val on failure. */
> >> +static int
> >> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> >> +{
> >> + struct iovec iov;
> >> + struct msghdr msgh = { 0 };
> >> + size_t fdsize = fd_num * sizeof(int);
> >> + char control[CMSG_SPACE(fdsize)];
> >> + struct cmsghdr *cmsg;
> >> + int ret;
> >> +
> >> + iov.iov_base = buf;
> >> + iov.iov_len = buflen;
> >> +
> >> + msgh.msg_iov = &iov;
> >> + msgh.msg_iovlen = 1;
> >> + msgh.msg_control = control;
> >> + msgh.msg_controllen = sizeof(control);
> >> +
> >> + ret = recvmsg(sockfd, &msgh, 0);
> >> + if (ret <= 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
> >> + return ret;
> >> + }
> >> +
> >> + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> >> + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n");
> >> + return -1;
> >> + }
> >> +
> >> + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> >> + cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> >> + if ((cmsg->cmsg_level == SOL_SOCKET) &&
> >> + (cmsg->cmsg_type == SCM_RIGHTS)) {
> >> + memcpy(fds, CMSG_DATA(cmsg), fdsize);
> >> + break;
> >> + }
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +/* return bytes# of read on success or negative val on failure. */
> >> +static int
> >> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> >> +{
> >> + int ret;
> >> +
> >> + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> >> + msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> >> + if (ret <= 0)
> >> + return ret;
> >> +
> >> + if (msg && msg->size) {
> >> + if (msg->size > sizeof(msg->payload)) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "invalid msg size: %d\n", msg->size);
> >> + return -1;
> >> + }
> >> + ret = read(sockfd, &msg->payload, msg->size);
> >> + if (ret <= 0)
> >> + return ret;
> >> + if (ret != (int)msg->size) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "read control message failed\n");
> >> + return -1;
> >> + }
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +static int
> >> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> >> +{
> >> +
> >> + struct iovec iov;
> >> + struct msghdr msgh = { 0 };
> >> + size_t fdsize = fd_num * sizeof(int);
> >> + char control[CMSG_SPACE(fdsize)];
> >> + struct cmsghdr *cmsg;
> >> + int ret;
> >> +
> >> + iov.iov_base = buf;
> >> + iov.iov_len = buflen;
> >> +
> >> + msgh.msg_iov = &iov;
> >> + msgh.msg_iovlen = 1;
> >> +
> >> + if (fds && fd_num > 0) {
> >> + msgh.msg_control = control;
> >> + msgh.msg_controllen = sizeof(control);
> >> + cmsg = CMSG_FIRSTHDR(&msgh);
> >> + cmsg->cmsg_len = CMSG_LEN(fdsize);
> >> + cmsg->cmsg_level = SOL_SOCKET;
> >> + cmsg->cmsg_type = SCM_RIGHTS;
> >> + memcpy(CMSG_DATA(cmsg), fds, fdsize);
> >> + } else {
> >> + msgh.msg_control = NULL;
> >> + msgh.msg_controllen = 0;
> >> + }
> >> +
> >> + do {
> >> + ret = sendmsg(sockfd, &msgh, 0);
> >> + } while (ret < 0 && errno == EINTR);
> >> +
> >> + if (ret < 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n");
> >> + return ret;
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +static int
> >> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> >> +{
> >> + int ret;
> >> +
> >> + if (!msg)
> >> + return 0;
> >> +
> >> + msg->flags &= ~VHOST_USER_VERSION_MASK;
> >> + msg->flags |= VHOST_USER_VERSION;
> >> + msg->flags |= VHOST_USER_REPLY_MASK;
> >> +
> >> + ret = send_fd_message(sockfd, (char *)msg,
> >> + VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +/* call back when there is new virtio connection. */
> >> +static void
> >> +vserver_new_vq_conn(int fd, void *dat)
> >> +{
> >> + struct vhost_server *vserver = (struct vhost_server *)dat;
> >> + int conn_fd;
> >> + int fh;
> >> + struct vhost_device_ctx vdev_ctx = { 0 };
> >> +
> >> + conn_fd = accept(fd, NULL, NULL);
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "new virtio connection is %d\n", conn_fd);
> >> + if (conn_fd < 0)
> >> + return;
> >> +
> >> + fh = ops->new_device(vdev_ctx);
> >> + if (fh == -1) {
> >> + close(conn_fd);
> >> + return;
> >> + }
> >> + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> >> +
> >> + fdset_add(&vserver->fdset,
> >> + conn_fd, vserver_message_handler, NULL, (void *)fh);
> >> +}
> >> +
> >> +/* callback when there is message on the connfd */
> >> +static void
> >> +vserver_message_handler(int connfd, void *dat)
> >> +{
> >> + struct vhost_device_ctx ctx;
> >> + uint32_t fh = (uint32_t)dat;
> >> + struct VhostUserMsg msg;
> >> + uint64_t features;
> >> + int ret;
> >> +
> >> + ctx.fh = fh;
> >> + ret = read_vhost_message(connfd, &msg);
> >> + if (ret < 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "vhost read message failed\n");
> >> +
> >> + close(connfd);
> >> + fdset_del(&g_vhost_server->fdset, connfd);
> >> + ops->destroy_device(ctx);
> >> +
> >> + return;
> >> + } else if (ret == 0) {
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vhost peer closed\n");
> >> +
> >> + close(connfd);
> >> + fdset_del(&g_vhost_server->fdset, connfd);
> >> + ops->destroy_device(ctx);
> >> +
> >> + return;
> >> + }
> >> + if (msg.request > VHOST_USER_MAX) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "vhost read incorrect message\n");
> >> +
> >> + close(connfd);
> >> + fdset_del(&g_vhost_server->fdset, connfd);
> >> +
> >> + return;
> >> + }
> >> +
> >> + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> >> + vhost_message_str[msg.request]);
> >> + switch (msg.request) {
> >> + case VHOST_USER_GET_FEATURES:
> >> + ret = ops->get_features(ctx, &features);
> >> + msg.payload.u64 = features;
> >> + msg.size = sizeof(msg.payload.u64);
> >> + send_vhost_message(connfd, &msg);
> >
> > What if this fails (e.g. remote died)?
> > How will everything be cleaned up?
> >
> >> + break;
> >> + case VHOST_USER_SET_FEATURES:
> >> + features = msg.payload.u64;
> >> + ops->set_features(ctx, &features);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_OWNER:
> >> + ops->set_owner(ctx);
> >> + break;
> >> + case VHOST_USER_RESET_OWNER:
> >> + ops->reset_owner(ctx);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_MEM_TABLE:
> >> + user_set_mem_table(ctx, &msg);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_LOG_BASE:
> >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> >> + case VHOST_USER_SET_LOG_FD:
> >> + close(msg.fds[0]);
> >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> >> + break;
> >> +
> >> + case VHOST_USER_SET_VRING_NUM:
> >> + ops->set_vring_num(ctx, &msg.payload.state);
> >> + break;
> >> + case VHOST_USER_SET_VRING_ADDR:
> >> + ops->set_vring_addr(ctx, &msg.payload.addr);
> >> + break;
> >> + case VHOST_USER_SET_VRING_BASE:
> >> + ops->set_vring_base(ctx, &msg.payload.state);
> >> + break;
> >> +
> >> + case VHOST_USER_GET_VRING_BASE:
> >> + ret = user_get_vring_base(ctx, &msg.payload.state);
> >> + msg.size = sizeof(msg.payload.state);
> >> + send_vhost_message(connfd, &msg);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_VRING_KICK:
> >> + user_set_vring_kick(ctx, &msg);
> >> + break;
> >> + case VHOST_USER_SET_VRING_CALL:
> >> + user_set_vring_call(ctx, &msg);
> >> + break;
> >> +
> >> + case VHOST_USER_SET_VRING_ERR:
> >> + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
> >> + close(msg.fds[0]);
> >> + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> >> + break;
> >> +
> >> + default:
> >> + break;
> >> +
> >> + }
> >> +}
> >> +
> >> +
> >> +/**
> >> + * Creates and initialise the vhost server.
> >> + */
> >> +int
> >> +rte_vhost_driver_register(const char *path)
> >> +{
> >> +
> >> + struct vhost_server *vserver;
> >> +
> >> + if (g_vhost_server != NULL)
> >> + return -1;
> >> +
> >> + vserver = calloc(sizeof(struct vhost_server), 1);
> >> + if (vserver == NULL)
> >> + return -1;
> >> +
> >> + fdset_init(&vserver->fdset);
> >> +
> >> + unlink(path);
> >> +
> >> + vserver->listenfd = uds_socket(path);
> >> + if (vserver->listenfd < 0) {
> >> + free(vserver);
> >> + return -1;
> >> + }
> >> + vserver->path = path;
> >> +
> >> + fdset_add(&vserver->fdset, vserver->listenfd,
> >> + vserver_new_vq_conn, NULL,
> >> + vserver);
> >> +
> >> + ops = get_virtio_net_callbacks();
> >> +
> >> + g_vhost_server = vserver;
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +
> >> +int
> >> +rte_vhost_driver_session_start(void)
> >> +{
> >> + fdset_event_dispatch(&g_vhost_server->fdset);
> >> + return 0;
> >> +}
> >> +
> >> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> >> new file mode 100644
> >> index 0000000..7e6cda4
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> >> @@ -0,0 +1,108 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#ifndef _VHOST_NET_USER_H
> >> +#define _VHOST_NET_USER_H
> >> +
> >> +#include <stdint.h>
> >> +#include <linux/vhost.h>
> >> +
> >> +#include "fd_man.h"
> >> +
> >> +struct vhost_server {
> >> + const char *path; /**< The path the uds is bind to. */
> >> + int listenfd; /**< The listener sockfd. */
> >> + struct fdset fdset; /**< The fd list this vhost server manages. */
> >> +};
> >> +
> >> +/* refer to hw/virtio/vhost-user.c */
> >> +
> >> +#define VHOST_MEMORY_MAX_NREGIONS 8
> >> +
> >> +typedef enum VhostUserRequest {
> >> + VHOST_USER_NONE = 0,
> >> + VHOST_USER_GET_FEATURES = 1,
> >> + VHOST_USER_SET_FEATURES = 2,
> >> + VHOST_USER_SET_OWNER = 3,
> >> + VHOST_USER_RESET_OWNER = 4,
> >> + VHOST_USER_SET_MEM_TABLE = 5,
> >> + VHOST_USER_SET_LOG_BASE = 6,
> >> + VHOST_USER_SET_LOG_FD = 7,
> >> + VHOST_USER_SET_VRING_NUM = 8,
> >> + VHOST_USER_SET_VRING_ADDR = 9,
> >> + VHOST_USER_SET_VRING_BASE = 10,
> >> + VHOST_USER_GET_VRING_BASE = 11,
> >> + VHOST_USER_SET_VRING_KICK = 12,
> >> + VHOST_USER_SET_VRING_CALL = 13,
> >> + VHOST_USER_SET_VRING_ERR = 14,
> >> + VHOST_USER_MAX
> >> +} VhostUserRequest;
> >> +
> >> +typedef struct VhostUserMemoryRegion {
> >> + uint64_t guest_phys_addr;
> >> + uint64_t memory_size;
> >> + uint64_t userspace_addr;
> >> + uint64_t mmap_offset;
> >> +} VhostUserMemoryRegion;
> >> +
> >> +typedef struct VhostUserMemory {
> >> + uint32_t nregions;
> >> + uint32_t padding;
> >> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> >> +} VhostUserMemory;
> >> +
> >> +typedef struct VhostUserMsg {
> >> + VhostUserRequest request;
> >> +
> >> +#define VHOST_USER_VERSION_MASK (0x3)
> >> +#define VHOST_USER_REPLY_MASK (0x1 << 2)
> >> + uint32_t flags;
> >> + uint32_t size; /* the following payload size */
> >> + union {
> >> +#define VHOST_USER_VRING_IDX_MASK (0xff)
> >> +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
> >> + uint64_t u64;
> >> + struct vhost_vring_state state;
> >> + struct vhost_vring_addr addr;
> >> + VhostUserMemory memory;
> >> + } payload;
> >> + int fds[VHOST_MEMORY_MAX_NREGIONS];
> >> +} __attribute((packed)) VhostUserMsg;
> >> +
> >> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> >> +
> >> +/* The version of the protocol we support */
> >> +#define VHOST_USER_VERSION (0x1)
> >> +
> >> +/*****************************************************************************/
> >> +#endif
> >> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >> new file mode 100644
> >> index 0000000..6601fcd
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> >> @@ -0,0 +1,205 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#include <stdint.h>
> >> +#include <stdio.h>
> >> +#include <stdlib.h>
> >> +#include <unistd.h>
> >> +#include <sys/mman.h>
> >> +
> >> +#include <rte_log.h>
> >> +
> >> +#include "virtio-net.h"
> >> +#include "virtio-net-user.h"
> >> +#include "vhost-net-user.h"
> >> +#include "vhost-net.h"
> >> +
> >> +int
> >> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >> +{
> >> + unsigned int idx;
> >> + struct VhostUserMemory memory = pmsg->payload.memory;
> >> + struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> >> + uint64_t mapped_address, base_address = 0;
> >> +
> >> + for (idx = 0; idx < memory.nregions; idx++) {
> >> + if (memory.regions[idx].guest_phys_addr == 0)
> >> + base_address = memory.regions[idx].userspace_addr;
> >> + }
> >> + if (base_address == 0) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "couldn't find the mem region whose GPA is 0.\n");
> >> + return -1;
> >> + }
> >> +
> >> + for (idx = 0; idx < memory.nregions; idx++) {
> >> + regions[idx].guest_phys_address =
> >> + memory.regions[idx].guest_phys_addr;
> >> + regions[idx].guest_phys_address_end =
> >> + memory.regions[idx].guest_phys_addr +
> >> + memory.regions[idx].memory_size;
> >> + regions[idx].memory_size = memory.regions[idx].memory_size;
> >> + regions[idx].userspace_address =
> >> + memory.regions[idx].userspace_addr;
> >> +
> >> + /* This is ugly */
> >> + mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> >> + regions[idx].memory_size +
> >> + memory.regions[idx].mmap_offset,
> >> + PROT_READ | PROT_WRITE, MAP_SHARED,
> >> + pmsg->fds[idx],
> >> + 0);
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "mapped region %d to %p\n",
> >> + idx, (void *)mapped_address);
> >> +
> >> + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> >> + RTE_LOG(ERR, VHOST_CONFIG,
> >> + "mmap qemu guest failed.\n");
> >> + return -1;
> >> + }
> >> +
> >> + mapped_address += memory.regions[idx].mmap_offset;
> >> +
> >> + regions[idx].address_offset = mapped_address -
> >> + regions[idx].guest_phys_address;
> >> + LOG_DEBUG(VHOST_CONFIG,
> >> + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> >> + idx,
> >> + (void *)(uintptr_t)regions[idx].guest_phys_address,
> >> + (void *)(uintptr_t)regions[idx].userspace_address,
> >> + regions[idx].memory_size);
> >> + }
> >> + ops->set_mem_table(ctx, regions, memory.nregions);
> >> + return 0;
> >> +}
> >> +
> >> +
> >> +static int
> >> +virtio_is_ready(struct virtio_net *dev)
> >> +{
> >> + struct vhost_virtqueue *rvq, *tvq;
> >> +
> >> + /* mq support in future.*/
> >> + rvq = dev->virtqueue[VIRTIO_RXQ];
> >> + tvq = dev->virtqueue[VIRTIO_TXQ];
> >> + if (rvq && tvq && rvq->desc && tvq->desc &&
> >> + (rvq->kickfd != (eventfd_t)-1) &&
> >> + (rvq->callfd != (eventfd_t)-1) &&
> >> + (tvq->kickfd != (eventfd_t)-1) &&
> >> + (tvq->callfd != (eventfd_t)-1)) {
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "virtio is now ready for processing.\n");
> >> + return 1;
> >> + }
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "virtio isn't ready for processing.\n");
> >> + return 0;
> >> +}
> >> +
> >> +void
> >> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >> +{
> >> + struct vhost_vring_file file;
> >> +
> >> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> >> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> >> + file.fd = -1;
> >> + else
> >> + file.fd = pmsg->fds[0];
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vring call idx:%d file:%d\n", file.index, file.fd);
> >> + ops->set_vring_call(ctx, &file);
> >> +}
> >> +
> >> +
> >> +/*
> >> + * In vhost-user, when we receive kick message, will test whether virtio
> >> + * device is ready for packet processing.
> >> + */
> >> +void
> >> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> >> +{
> >> + struct vhost_vring_file file;
> >> + struct virtio_net *dev = get_device(ctx);
> >> +
> >> + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> >> + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
> >> + file.fd = -1;
> >> + else
> >> + file.fd = pmsg->fds[0];
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vring kick idx:%d file:%d\n", file.index, file.fd);
> >> + ops->set_vring_kick(ctx, &file);
> >> +
> >> + if (virtio_is_ready(dev) &&
> >> + !(dev->flags & VIRTIO_DEV_RUNNING))
> >> + notify_ops->new_device(dev);
> >> +
> >> +}
> >> +
> >> +/*
> >> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
> >> + */
> >> +int
> >> +user_get_vring_base(struct vhost_device_ctx ctx,
> >> + struct vhost_vring_state *state)
> >> +{
> >> + struct virtio_net *dev = get_device(ctx);
> >> +
> >> + /* We have to stop the queue (virtio) if it is running. */
> >> + if (dev->flags & VIRTIO_DEV_RUNNING)
> >> + notify_ops->destroy_device(dev);
> >> +
> >> + /* Here we are safe to get the last used index */
> >> + ops->get_vring_base(ctx, state->index, state);
> >> +
> >> + RTE_LOG(INFO, VHOST_CONFIG,
> >> + "vring base idx:%d file:%d\n", state->index, state->num);
> >> + /*
> >> + * Based on current qemu vhost-user implementation, this message is
> >> + * sent and only sent in vhost_vring_stop.
> >> + * TODO: cleanup the vring, it isn't usable since here.
> >> + */
> >
> > Please don't tie yourself to a current qemu implementation. Please just
> > extend qemu to send explicit start/stop messages.
> > You'll need to negotiate the new capabilities.
> >
> >
> > Nikolay, it seems that version field is only 2 bits.
> > how can we extend it cleanly?
>
> Will something like this do:
>
> #define VHOST_USER_VERSION_MASK_MAJ (0x3)
> #define VHOST_USER_REPLY_MASK (0x1<<2)
> #define VHOST_USER_VERSION_MASK_MIN (0xf<<3)
>
> The "major" part of the version will be increased in case of
> significant changes in the protocol. And the "minor" part in all other
> cases. I guess this will give us enough space for versioning.
>
> regards,
> Nikolay Nikolaev
I think it's not enough
1. Which message do we use to pass it?
2. It's preferable to have something like feature-bits
and not version numbers. Much easier to avoid
conflicts.
> >
> > Perhaps, add a new GET_POTOCOL message for exchanging vhost user
> > specific bits, then remote should set a high version bit to let qemu
> > know it's supported?
> >
> >
> >
> >
> >> + if (((int)dev->virtqueue[VIRTIO_RXQ]->callfd) >= 0) {
> >> + close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> >> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> >> + }
> >> + if (((int)dev->virtqueue[VIRTIO_TXQ]->callfd) >= 0) {
> >> + close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> >> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> >> + }
> >> +
> >> + return 0;
> >> +
> >> +}
> >> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> >> new file mode 100644
> >> index 0000000..0f6a75a
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> >> @@ -0,0 +1,48 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#ifndef _VIRTIO_NET_USER_H
> >> +#define _VIRTIO_NET_USER_H
> >> +
> >> +#include "vhost-net.h"
> >> +#include "vhost-net-user.h"
> >> +
> >> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> >> +
> >> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> >> +
> >> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> >> +
> >> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> >> +
> >> +#endif
> >> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> >> index 57a5801..c458ed9 100644
> >> --- a/lib/librte_vhost/virtio-net.c
> >> +++ b/lib/librte_vhost/virtio-net.c
> >> @@ -50,6 +50,7 @@
> >> #include <rte_virtio_net.h>
> >>
> >> #include "vhost-net.h"
> >> +#include "virtio-net.h"
> >>
> >> /*
> >> * Device linked list structure for configuration.
> >> @@ -60,7 +61,7 @@ struct virtio_net_config_ll {
> >> };
> >>
> >> /* device ops to add/remove device to/from data core. */
> >> -static struct virtio_net_device_ops const *notify_ops;
> >> +struct virtio_net_device_ops const *notify_ops;
> >> /* root address of the linked list of managed virtio devices */
> >> static struct virtio_net_config_ll *ll_root;
> >>
> >> @@ -88,8 +89,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
> >> if ((qemu_va >= region->userspace_address) &&
> >> (qemu_va <= region->userspace_address +
> >> region->memory_size)) {
> >> - vhost_va = dev->mem->mapped_address + qemu_va -
> >> - dev->mem->base_address;
> >> + vhost_va = qemu_va + region->guest_phys_address +
> >> + region->address_offset -
> >> + region->userspace_address;
> >> break;
> >> }
> >> }
> >> @@ -119,7 +121,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
> >> * Searches the configuration core linked list and
> >> * retrieves the device if it exists.
> >> */
> >> -static struct virtio_net *
> >> +struct virtio_net *
> >> get_device(struct vhost_device_ctx ctx)
> >> {
> >> struct virtio_net_config_ll *ll_dev;
> >> @@ -256,6 +258,11 @@ init_device(struct virtio_net *dev)
> >> memset(dev->virtqueue[VIRTIO_RXQ], 0, sizeof(struct vhost_virtqueue));
> >> memset(dev->virtqueue[VIRTIO_TXQ], 0, sizeof(struct vhost_virtqueue));
> >>
> >> + dev->virtqueue[VIRTIO_RXQ]->kickfd = (eventfd_t)-1;
> >> + dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> >> + dev->virtqueue[VIRTIO_TXQ]->kickfd = (eventfd_t)-1;
> >> + dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> >> +
> >> /* Backends are set to -1 indicating an inactive device. */
> >> dev->virtqueue[VIRTIO_RXQ]->backend = VIRTIO_DEV_STOPPED;
> >> dev->virtqueue[VIRTIO_TXQ]->backend = VIRTIO_DEV_STOPPED;
> >> @@ -455,12 +462,6 @@ set_mem_table(struct vhost_device_ctx ctx,
> >> if (dev == NULL)
> >> return -1;
> >>
> >> - if (dev->mem) {
> >> - munmap((void *)(uintptr_t)dev->mem->mapped_address,
> >> - (size_t)dev->mem->mapped_size);
> >> - free(dev->mem);
> >> - }
> >> -
> >> /* Malloc the memory structure depending on the number of regions. */
> >> mem = calloc(1, sizeof(struct virtio_memory) +
> >> (sizeof(struct virtio_memory_regions) * nregions));
> >> @@ -624,7 +625,7 @@ set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >> vq = dev->virtqueue[file->index];
> >>
> >> - if (vq->kickfd)
> >> + if ((int)vq->kickfd >= 0)
> >> close((int)vq->kickfd);
> >>
> >> vq->kickfd = file->fd;
> >> @@ -650,8 +651,9 @@ set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file)
> >> /* file->index refers to the queue index. The txq is 1, rxq is 0. */
> >> vq = dev->virtqueue[file->index];
> >>
> >> - if (vq->callfd)
> >> + if ((int)vq->callfd >= 0)
> >> close((int)vq->callfd);
> >> +
> >> vq->callfd = file->fd;
> >>
> >> return 0;
> >> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
> >> new file mode 100644
> >> index 0000000..75fb57e
> >> --- /dev/null
> >> +++ b/lib/librte_vhost/virtio-net.h
> >> @@ -0,0 +1,43 @@
> >> +/*-
> >> + * BSD LICENSE
> >> + *
> >> + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> >> + * All rights reserved.
> >> + *
> >> + * Redistribution and use in source and binary forms, with or without
> >> + * modification, are permitted provided that the following conditions
> >> + * are met:
> >> + *
> >> + * * Redistributions of source code must retain the above copyright
> >> + * notice, this list of conditions and the following disclaimer.
> >> + * * Redistributions in binary form must reproduce the above copyright
> >> + * notice, this list of conditions and the following disclaimer in
> >> + * the documentation and/or other materials provided with the
> >> + * distribution.
> >> + * * Neither the name of Intel Corporation nor the names of its
> >> + * contributors may be used to endorse or promote products derived
> >> + * from this software without specific prior written permission.
> >> + *
> >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >> + */
> >> +
> >> +#ifndef _VIRTIO_NET_H
> >> +#define _VIRTIO_NET_H
> >> +
> >> +#include "vhost-net.h"
> >> +#include "rte_virtio_net.h"
> >> +
> >> +struct virtio_net_device_ops const *notify_ops;
> >> +struct virtio_net *get_device(struct vhost_device_ctx ctx);
> >> +
> >> +#endif
> >> --
> >> 1.8.1.4
> >>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-01-28 14:37 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1422242440-28948-1-git-send-email-huawei.xie@intel.com>
[not found] ` <1422242440-28948-11-git-send-email-huawei.xie@intel.com>
2015-01-28 13:34 ` [Qemu-devel] [dpdk-dev] [RFC PATCH v2 10/14] vhost user support Michael S. Tsirkin
2015-01-28 14:27 ` Nikolay Nikolaev
2015-01-28 14:37 ` Michael S. Tsirkin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).