qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
@ 2014-06-10 10:02 Nikolay Nikolaev
  2014-06-10 10:02 ` [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation Nikolay Nikolaev
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Nikolay Nikolaev @ 2014-06-10 10:02 UTC (permalink / raw)
  To: snabb-devel, qemu-devel; +Cc: a.motakis, luke, tech, n.nikolaev, mst

The supplied chardev id will be inspected for supported options. Only
a socket backend, with a set path (i.e. a Unix socket) and optionally
the server parameter set, will be allowed. Other options (nowait, telnet)
will make the chardev unusable and the netdev will not be initialised.

Additional checks for validity:
  - requires `-numa node,memdev=..`
  - requires `-device virtio-net-*`

The `vhostforce` option is used to force vhost-net when we deal with
non-MSIX guests.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
 hmp-commands.hx    |    4 +-
 hw/net/vhost_net.c |    4 ++
 net/hub.c          |    1 
 net/net.c          |    3 +
 net/vhost-user.c   |  115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 qapi-schema.json   |   19 ++++++++-
 qemu-options.hx    |   18 ++++++++
 7 files changed, 158 insertions(+), 6 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2e462c0..bc9c032 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1206,7 +1206,7 @@ ETEXI
     {
         .name       = "host_net_add",
         .args_type  = "device:s,opts:s?",
-        .params     = "tap|user|socket|vde|netmap|dump [options]",
+        .params     = "tap|user|socket|vde|netmap|vhost-user|dump [options]",
         .help       = "add host VLAN client",
         .mhandler.cmd = net_host_device_add,
     },
@@ -1234,7 +1234,7 @@ ETEXI
     {
         .name       = "netdev_add",
         .args_type  = "netdev:O",
-        .params     = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]",
+        .params     = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
         .help       = "add host network device",
         .mhandler.cmd = hmp_netdev_add,
         .command_completion = netdev_add_completion,
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 5f06736..7ac7c21 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -15,6 +15,7 @@
 
 #include "net/net.h"
 #include "net/tap.h"
+#include "net/vhost-user.h"
 
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
@@ -360,6 +361,9 @@ VHostNetState *get_vhost_net(NetClientState *nc)
     case NET_CLIENT_OPTIONS_KIND_TAP:
         vhost_net = tap_get_vhost_net(nc);
         break;
+    case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
+        vhost_net = vhost_user_get_vhost_net(nc);
+        break;
     default:
         break;
     }
diff --git a/net/hub.c b/net/hub.c
index 33a99c9..7e0f2d6 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -322,6 +322,7 @@ void net_hub_check_clients(void)
             case NET_CLIENT_OPTIONS_KIND_TAP:
             case NET_CLIENT_OPTIONS_KIND_SOCKET:
             case NET_CLIENT_OPTIONS_KIND_VDE:
+            case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
                 has_host_dev = 1;
                 break;
             default:
diff --git a/net/net.c b/net/net.c
index 0ff2e40..c91b67b 100644
--- a/net/net.c
+++ b/net/net.c
@@ -786,6 +786,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
         [NET_CLIENT_OPTIONS_KIND_BRIDGE]    = net_init_bridge,
 #endif
         [NET_CLIENT_OPTIONS_KIND_HUBPORT]   = net_init_hubport,
+        [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
 };
 
 
@@ -819,6 +820,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
         case NET_CLIENT_OPTIONS_KIND_BRIDGE:
 #endif
         case NET_CLIENT_OPTIONS_KIND_HUBPORT:
+        case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
             break;
 
         default:
@@ -907,6 +909,7 @@ static int net_host_check_device(const char *device)
 #ifdef CONFIG_VDE
                                        ,"vde"
 #endif
+                                       ,"vhost-user"
     };
     for (i = 0; i < ARRAY_SIZE(valid_param_list); i++) {
         if (!strncmp(valid_param_list[i], device,
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 4bdd19d..32b78fb 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -12,6 +12,7 @@
 #include "net/vhost_net.h"
 #include "net/vhost-user.h"
 #include "sysemu/char.h"
+#include "qemu/config-file.h"
 #include "qemu/error-report.h"
 
 typedef struct VhostUserState {
@@ -21,9 +22,17 @@ typedef struct VhostUserState {
     VHostNetState *vhost_net;
 } VhostUserState;
 
+typedef struct VhostUserChardevProps {
+    bool is_socket;
+    bool is_unix;
+    bool is_server;
+    bool has_unsupported;
+} VhostUserChardevProps;
+
 VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
 {
     VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
     return s->vhost_net;
 }
 
@@ -82,7 +91,7 @@ static bool vhost_user_has_ufo(NetClientState *nc)
 }
 
 static NetClientInfo net_vhost_user_info = {
-        .type = 0,
+        .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
         .size = sizeof(VhostUserState),
         .cleanup = vhost_user_cleanup,
         .has_vnet_hdr = vhost_user_has_vnet_hdr,
@@ -148,8 +157,108 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
     return 0;
 }
 
+static int net_vhost_chardev_opts(const char *name, const char *value,
+                                  void *opaque)
+{
+    VhostUserChardevProps *props = opaque;
+
+    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
+        props->is_socket = true;
+    } else if (strcmp(name, "path") == 0) {
+        props->is_unix = true;
+    } else if (strcmp(name, "server") == 0) {
+        props->is_server = true;
+    } else {
+        error_report("vhost-user does not support a chardev"
+                     " with the following option:\n %s = %s",
+                     name, value);
+        props->has_unsupported = true;
+        return -1;
+    }
+    return 0;
+}
+
+static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
+{
+    CharDriverState *chr = qemu_chr_find(opts->chardev);
+    VhostUserChardevProps props;
+
+    if (chr == NULL) {
+        error_report("chardev \"%s\" not found", opts->chardev);
+        return NULL;
+    }
+
+    /* inspect chardev opts */
+    memset(&props, 0, sizeof(props));
+    qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, false);
+
+    if (!props.is_socket || !props.is_unix) {
+        error_report("chardev \"%s\" is not a unix socket",
+                     opts->chardev);
+        return NULL;
+    }
+
+    if (props.has_unsupported) {
+        error_report("chardev \"%s\" has an unsupported option",
+                opts->chardev);
+        return NULL;
+    }
+
+    qemu_chr_fe_claim_no_fail(chr);
+
+    return chr;
+}
+
+static int net_vhost_check_net(QemuOpts *opts, void *opaque)
+{
+    const char *name = opaque;
+    const char *driver, *netdev;
+    const char virtio_name[] = "virtio-net-";
+
+    driver = qemu_opt_get(opts, "driver");
+    netdev = qemu_opt_get(opts, "netdev");
+
+    if (!driver || !netdev) {
+        return 0;
+    }
+
+    if (strcmp(netdev, name) == 0 &&
+        strncmp(driver, virtio_name, strlen(virtio_name)) != 0) {
+        error_report("vhost-user requires frontend driver virtio-net-*");
+        return -1;
+    }
+
+    return 0;
+}
+
 int net_init_vhost_user(const NetClientOptions *opts, const char *name,
-                   NetClientState *peer)
+                        NetClientState *peer)
 {
-    return net_vhost_user_init(peer, "vhost_user", 0, 0, 0);
+    const NetdevVhostUserOptions *vhost_user_opts;
+    CharDriverState *chr;
+    bool vhostforce;
+
+    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+    vhost_user_opts = opts->vhost_user;
+
+    chr = net_vhost_parse_chardev(vhost_user_opts);
+    if (!chr) {
+        error_report("No suitable chardev found");
+        return -1;
+    }
+
+    /* verify net frontend */
+    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
+                          (char *)name, true) == -1) {
+        return -1;
+    }
+
+    /* vhost-force for non-MSIX */
+    if (vhost_user_opts->has_vhost_force) {
+        vhostforce = vhost_user_opts->vhost_force;
+    } else {
+        vhostforce = false;
+    }
+
+    return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce);
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index 7bc33ea..f062ce9 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3267,6 +3267,22 @@
     '*devname':    'str' } }
 
 ##
+# @NetdevVhostUserOptions
+#
+# Vhost-user network backend
+#
+# @chardev: name of a unix socket chardev
+#
+# @vhost-force: #optional vhost on for non-MSIX virtio guests (default: false).
+#
+# Since 2.1
+##
+{ 'type': 'NetdevVhostUserOptions',
+  'data': {
+    'chardev':        'str',
+    '*vhost-force':    'bool' } }
+
+##
 # @NetClientOptions
 #
 # A discriminated record of network device traits.
@@ -3284,7 +3300,8 @@
     'dump':     'NetdevDumpOptions',
     'bridge':   'NetdevBridgeOptions',
     'hubport':  'NetdevHubPortOptions',
-    'netmap':   'NetdevNetmapOptions' } }
+    'netmap':   'NetdevNetmapOptions',
+    'vhost-user': 'NetdevVhostUserOptions' } }
 
 ##
 # @NetLegacy
diff --git a/qemu-options.hx b/qemu-options.hx
index 5a4eff9..1ad2528 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1460,6 +1460,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 #ifdef CONFIG_NETMAP
     "netmap|"
 #endif
+    "vhost-user|"
     "socket|"
     "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
 STEXI
@@ -1791,6 +1792,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
 netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
 required hub automatically.
 
+@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
+
+Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
+be a unix domain socket backed one. The vhost-user uses a specifically defined
+protocol to pass vhost ioctl replacement messages to an application on the other
+end of the socket. On non-MSIX guests, the feature can be forced with
+@var{vhostforce}.
+
+Example:
+@example
+qemu -m 512 -object memory-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
+     -numa node,memdev=mem \
+     -chardev socket,path=/path/to/socket \
+     -netdev type=vhost-user,id=net0,chardev=chr0 \
+     -device virtio-net-pci,netdev=net0
+@end example
+
 @item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
 Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
 At most @var{len} bytes (64k by default) per packet are stored. The file format is

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation
  2014-06-10 10:02 [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Nikolay Nikolaev
@ 2014-06-10 10:02 ` Nikolay Nikolaev
  2014-06-10 10:22   ` Michael S. Tsirkin
  2014-06-10 10:03 ` [Qemu-devel] [PATCH v10-fix 18/18] Add qtest for vhost-user Nikolay Nikolaev
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Nikolay Nikolaev @ 2014-06-10 10:02 UTC (permalink / raw)
  To: snabb-devel, qemu-devel; +Cc: a.motakis, luke, tech, n.nikolaev, mst

This document describes the basic message format used by vhost-user
for communication over a unix domain socket. The protocol is based
on the existing ioctl interface used for the kernel version of vhost.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
 docs/specs/vhost-user.txt |  267 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 267 insertions(+)
 create mode 100644 docs/specs/vhost-user.txt

diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
new file mode 100644
index 0000000..808ae81
--- /dev/null
+++ b/docs/specs/vhost-user.txt
@@ -0,0 +1,267 @@
+Vhost-user Protocol
+===================
+
+Copyright (c) 2014 Virtual Open Systems Sarl.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+===================
+
+This protocol is aiming to complement the ioctl interface used to control the
+vhost implementation in the Linux kernel. It implements the control plane needed
+to establish virtqueue sharing with a user space process on the same host. It
+uses communication over a Unix domain socket to share file descriptors in the
+ancillary data of the message.
+
+The protocol defines 2 sides of the communication, master and slave. Master is
+the application that shares its virtqueues, in our case QEMU. Slave is the
+consumer of the virtqueues.
+
+In the current implementation QEMU is the Master, and the Slave is intended to
+be a software Ethernet switch running in user space, such as Snabbswitch.
+
+Master and slave can be either a client (i.e. connecting) or server (listening)
+in the socket communication.
+
+Message Specification
+---------------------
+
+Note that all numbers are in the machine native byte order. A vhost-user message
+consists of 3 header fields and a payload:
+
+------------------------------------
+| request | flags | size | payload |
+------------------------------------
+
+ * Request: 32-bit type of the request
+ * Flags: 32-bit bit field:
+   - Lower 2 bits are the version (currently 0x01)
+   - Bit 2 is the reply flag - needs to be sent on each reply from the slave
+ * Size - 32-bit size of the payload
+
+
+Depending on the request type, payload can be:
+
+ * A single 64-bit integer
+   -------
+   | u64 |
+   -------
+
+   u64: a 64-bit unsigned integer
+
+ * A vring state description
+   ---------------
+  | index | num |
+  ---------------
+
+   Index: a 32-bit index
+   Num: a 32-bit number
+
+ * A vring address description
+   --------------------------------------------------------------
+   | index | flags | size | descriptor | used | available | log |
+   --------------------------------------------------------------
+
+   Index: a 32-bit vring index
+   Flags: a 32-bit vring flags
+   Descriptor: a 64-bit user address of the vring descriptor table
+   Used: a 64-bit user address of the vring used ring
+   Available: a 64-bit user address of the vring available ring
+   Log: a 64-bit guest address for logging
+
+ * Memory regions description
+   ---------------------------------------------------
+   | num regions | padding | region0 | ... | region7 |
+   ---------------------------------------------------
+
+   Num regions: a 32-bit number of regions
+   Padding: 32-bit
+
+   A region is:
+   ---------------------------------------
+   | guest address | size | user address |
+   ---------------------------------------
+
+   Guest address: a 64-bit guest address of the region
+   Size: a 64-bit size
+   User address: a 64-bit user address
+
+
+In QEMU the vhost-user message is implemented with the following struct:
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+    uint32_t flags;
+    uint32_t size;
+    union {
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+    };
+} QEMU_PACKED VhostUserMsg;
+
+Communication
+-------------
+
+The protocol for vhost-user is based on the existing implementation of vhost
+for the Linux Kernel. Most messages that can be sent via the Unix domain socket
+implementing vhost-user have an equivalent ioctl to the kernel implementation.
+
+The communication consists of master sending message requests and slave sending
+message replies. Most of the requests don't require replies. Here is a list of
+the ones that do:
+
+ * VHOST_GET_FEATURES
+ * VHOST_GET_VRING_BASE
+
+There are several messages that the master sends with file descriptors passed
+in the ancillary data:
+
+ * VHOST_SET_MEM_TABLE
+ * VHOST_SET_LOG_FD
+ * VHOST_SET_VRING_KICK
+ * VHOST_SET_VRING_CALL
+ * VHOST_SET_VRING_ERR
+
+If Master is unable to send the full message or receives a wrong reply it will
+close the connection. An optional reconnection mechanism can be implemented.
+
+Message types
+-------------
+
+ * VHOST_USER_GET_FEATURES
+
+      Id: 2
+      Equivalent ioctl: VHOST_GET_FEATURES
+      Master payload: N/A
+      Slave payload: u64
+
+      Get from the underlying vhost implementation the features bitmask.
+
+ * VHOST_USER_SET_FEATURES
+
+      Id: 3
+      Ioctl: VHOST_SET_FEATURES
+      Master payload: u64
+
+      Enable features in the underlying vhost implementation using a bitmask.
+
+ * VHOST_USER_SET_OWNER
+
+      Id: 4
+      Equivalent ioctl: VHOST_SET_OWNER
+      Master payload: N/A
+
+      Issued when a new connection is established. It sets the current Master
+      as an owner of the session. This can be used on the Slave as a
+      "session start" flag.
+
+ * VHOST_USER_RESET_OWNER
+
+      Id: 5
+      Equivalent ioctl: VHOST_RESET_OWNER
+      Master payload: N/A
+
+      Issued when a new connection is about to be closed. The Master will no
+      longer own this connection (and will usually close it).
+
+ * VHOST_USER_SET_MEM_TABLE
+
+      Id: 6
+      Equivalent ioctl: VHOST_SET_MEM_TABLE
+      Master payload: memory regions description
+
+      Sets the memory map regions on the slave so it can translate the vring
+      addresses. In the ancillary data there is an array of file descriptors
+      for each memory mapped region. The size and ordering of the fds matches
+      the number and ordering of memory regions.
+
+ * VHOST_USER_SET_LOG_BASE
+
+      Id: 7
+      Equivalent ioctl: VHOST_SET_LOG_BASE
+      Master payload: u64
+
+      Sets the logging base address.
+
+ * VHOST_USER_SET_LOG_FD
+
+      Id: 8
+      Equivalent ioctl: VHOST_SET_LOG_FD
+      Master payload: N/A
+
+      Sets the logging file descriptor, which is passed as ancillary data.
+
+ * VHOST_USER_SET_VRING_NUM
+
+      Id: 9
+      Equivalent ioctl: VHOST_SET_VRING_NUM
+      Master payload: vring state description
+
+      Sets the number of vrings for this owner.
+
+ * VHOST_USER_SET_VRING_ADDR
+
+      Id: 10
+      Equivalent ioctl: VHOST_SET_VRING_ADDR
+      Master payload: vring address description
+      Slave payload: N/A
+
+      Sets the addresses of the different aspects of the vring.
+
+ * VHOST_USER_SET_VRING_BASE
+
+      Id: 11
+      Equivalent ioctl: VHOST_SET_VRING_BASE
+      Master payload: vring state description
+
+      Sets the base offset in the available vring.
+
+ * VHOST_USER_GET_VRING_BASE
+
+      Id: 12
+      Equivalent ioctl: VHOST_USER_GET_VRING_BASE
+      Master payload: vring state description
+      Slave payload: vring state description
+
+      Get the available vring base offset.
+
+ * VHOST_USER_SET_VRING_KICK
+
+      Id: 13
+      Equivalent ioctl: VHOST_SET_VRING_KICK
+      Master payload: u64
+
+      Set the event file descriptor for adding buffers to the vring. It
+      is passed in the ancillary data.
+      Bits (0-7) of the payload contain the vring index. Bit 8 is the
+      invalid FD flag. This flag is set when there is no file descriptor
+      in the ancillary data. This signals that polling should be used
+      instead of waiting for a kick.
+
+ * VHOST_USER_SET_VRING_CALL
+
+      Id: 14
+      Equivalent ioctl: VHOST_SET_VRING_CALL
+      Master payload: u64
+
+      Set the event file descriptor to signal when buffers are used. It
+      is passed in the ancillary data.
+      Bits (0-7) of the payload contain the vring index. Bit 8 is the
+      invalid FD flag. This flag is set when there is no file descriptor
+      in the ancillary data. This signals that polling will be used
+      instead of waiting for the call.
+
+ * VHOST_USER_SET_VRING_ERR
+
+      Id: 15
+      Equivalent ioctl: VHOST_SET_VRING_ERR
+      Master payload: u64
+
+      Set the event file descriptor to signal when error occurs. It
+      is passed in the ancillary data.
+      Bits (0-7) of the payload contain the vring index. Bit 8 is the
+      invalid FD flag. This flag is set when there is no file descriptor
+      in the ancillary data.
+

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Qemu-devel] [PATCH v10-fix 18/18] Add qtest for vhost-user
  2014-06-10 10:02 [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Nikolay Nikolaev
  2014-06-10 10:02 ` [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation Nikolay Nikolaev
@ 2014-06-10 10:03 ` Nikolay Nikolaev
  2014-06-10 10:49   ` Michael S. Tsirkin
  2014-06-10 10:34 ` [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Michael S. Tsirkin
  2014-06-10 12:11 ` [Qemu-devel] " Eric Blake
  3 siblings, 1 reply; 11+ messages in thread
From: Nikolay Nikolaev @ 2014-06-10 10:03 UTC (permalink / raw)
  To: snabb-devel, qemu-devel; +Cc: a.motakis, luke, tech, n.nikolaev, mst

This test creates a 'server' chardev to listen for vhost-user messages.
Once VHOST_USER_SET_MEM_TABLE is received it mmaps each received region,
and read 1k bytes from it. The read data is compared to data from readl.

The test requires hugetlbfs to be already mounted and writable. The mount
point defaults to '/hugetlbfs' and can be specified via the environment
variable QTEST_HUGETLBFS_PATH.

The rom pc-bios/pxe-virtio.rom is used to instantiate a virtio pcicontroller.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
 tests/Makefile          |    4 +
 tests/vhost-user-test.c |  312 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 316 insertions(+)
 create mode 100644 tests/vhost-user-test.c

diff --git a/tests/Makefile b/tests/Makefile
index 361bb7b..d1c399c 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -156,6 +156,7 @@ gcov-files-i386-y += hw/usb/hcd-ehci.c
 gcov-files-i386-y += hw/usb/hcd-uhci.c
 gcov-files-i386-y += hw/usb/dev-hid.c
 gcov-files-i386-y += hw/usb/dev-storage.c
+check-qtest-i386-y += tests/vhost-user-test$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -322,9 +323,12 @@ tests/es1370-test$(EXESUF): tests/es1370-test.o
 tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o
 tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o
 tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-pc-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o libqemuutil.a libqemustub.a
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
 
+LIBS+= -lutil
+
 # QTest rules
 
 TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
new file mode 100644
index 0000000..4bdfa8b
--- /dev/null
+++ b/tests/vhost-user-test.c
@@ -0,0 +1,312 @@
+/*
+ * QTest testcase for the vhost-user
+ *
+ * Copyright (c) 2014 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "libqtest.h"
+#include "qemu/option.h"
+#include "sysemu/char.h"
+#include "sysemu/sysemu.h"
+
+#include <glib.h>
+#include <linux/vhost.h>
+#include <sys/mman.h>
+#include <sys/vfs.h>
+#include <qemu/sockets.h>
+
+#define QEMU_CMD_ACCEL  " -machine accel=tcg"
+#define QEMU_CMD_MEM    " -m 512 -object memory-file,id=mem,size=512M,"\
+                        "mem-path=%s,share=on -numa node,memdev=mem"
+#define QEMU_CMD_CHR    " -chardev socket,id=chr0,path=%s"
+#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=chr0,vhostforce"
+#define QEMU_CMD_NET    " -device virtio-net-pci,netdev=net0 "
+#define QEMU_CMD_ROM    " -option-rom ../pc-bios/pxe-virtio.rom"
+
+#define QEMU_CMD        QEMU_CMD_ACCEL QEMU_CMD_MEM QEMU_CMD_CHR \
+                        QEMU_CMD_NETDEV QEMU_CMD_NET QEMU_CMD_ROM
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+/*********** FROM hw/virtio/vhost-user.c *************************************/
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1<<2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+    };
+} QEMU_PACKED VhostUserMsg;
+
+static VhostUserMsg m __attribute__ ((unused));
+#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
+                            + sizeof(m.flags) \
+                            + sizeof(m.size))
+
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+/*****************************************************************************/
+
+int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS];
+static VhostUserMemory memory;
+static GMutex data_mutex;
+static GCond data_cond;
+
+static void read_guest_mem(void)
+{
+    uint32_t *guest_mem;
+    gint64 end_time;
+    int i, j;
+
+    g_mutex_lock(&data_mutex);
+
+    end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
+    while (!fds_num) {
+        if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) {
+            /* timeout has passed */
+            g_assert(fds_num);
+            break;
+        }
+    }
+
+    /* check for sanity */
+    g_assert_cmpint(fds_num, >, 0);
+    g_assert_cmpint(fds_num, ==, memory.nregions);
+
+    /* iterate all regions */
+    for (i = 0; i < fds_num; i++) {
+
+        /* We'll check only the region statring at 0x0*/
+        if (memory.regions[i].guest_phys_addr != 0x0) {
+            continue;
+        }
+
+        g_assert_cmpint(memory.regions[i].memory_size, >, 1024);
+
+        guest_mem = mmap(0, memory.regions[i].memory_size,
+        PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0);
+
+        for (j = 0; j < 256; j++) {
+            uint32_t a = readl(memory.regions[i].guest_phys_addr + j*4);
+            uint32_t b = guest_mem[j];
+
+            g_assert_cmpint(a, ==, b);
+        }
+
+        munmap(guest_mem, memory.regions[i].memory_size);
+    }
+
+    g_assert_cmpint(1, ==, 1);
+    g_mutex_unlock(&data_mutex);
+}
+
+static void *thread_function(void *data)
+{
+    GMainLoop *loop;
+    loop = g_main_loop_new(NULL, FALSE);
+    g_main_loop_run(loop);
+    return NULL;
+}
+
+static int chr_can_read(void *opaque)
+{
+    return VHOST_USER_HDR_SIZE;
+}
+
+static void chr_read(void *opaque, const uint8_t *buf, int size)
+{
+    CharDriverState *chr = opaque;
+    VhostUserMsg msg;
+    uint8_t *p = (uint8_t *) &msg;
+    int fd;
+
+    if (size != VHOST_USER_HDR_SIZE) {
+        g_test_message("Wrong message size received %d\n", size);
+        return;
+    }
+
+    memcpy(p, buf, VHOST_USER_HDR_SIZE);
+
+    if (msg.size) {
+        p += VHOST_USER_HDR_SIZE;
+        qemu_chr_fe_read_all(chr, p, msg.size);
+    }
+
+    switch (msg.request) {
+    case VHOST_USER_GET_FEATURES:
+        /* send back features to qemu */
+        msg.flags |= VHOST_USER_REPLY_MASK;
+        msg.size = sizeof(m.u64);
+        msg.u64 = 0;
+        p = (uint8_t *) &msg;
+        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+        break;
+
+    case VHOST_USER_GET_VRING_BASE:
+        /* send back vring base to qemu */
+        msg.flags |= VHOST_USER_REPLY_MASK;
+        msg.size = sizeof(m.state);
+        msg.state.num = 0;
+        p = (uint8_t *) &msg;
+        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+        break;
+
+    case VHOST_USER_SET_MEM_TABLE:
+        /* received the mem table */
+        memcpy(&memory, &msg.memory, sizeof(msg.memory));
+        fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int));
+
+        /* signal the test that it can continue */
+        g_cond_signal(&data_cond);
+        g_mutex_unlock(&data_mutex);
+        break;
+
+    case VHOST_USER_SET_VRING_KICK:
+    case VHOST_USER_SET_VRING_CALL:
+        /* consume the fd */
+        qemu_chr_fe_get_msgfds(chr, &fd, 1);
+        /*
+         * This is a non-blocking eventfd.
+         * The receive function forces it to be blocking,
+         * so revert it back to non-blocking.
+         */
+        qemu_set_nonblock(fd);
+        break;
+    default:
+        break;
+    }
+}
+
+static const char *init_hugepagefs(void)
+{
+    const char *path;
+    struct statfs fs;
+    int ret;
+
+    path = getenv("QTEST_HUGETLBFS_PATH");
+    if (!path) {
+        path = "/hugetlbfs";
+    }
+
+    if (access(path, R_OK | W_OK | X_OK)) {
+        g_test_message("access on path (%s): %s\n", path, strerror(errno));
+        return 0;
+    }
+
+    do {
+        ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+        g_test_message("statfs on path (%s): %s\n", path, strerror(errno));
+        return 0;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC) {
+        g_test_message("Warning: path not on HugeTLBFS: %s\n", path);
+        return 0;
+    }
+
+    return path;
+}
+
+int main(int argc, char **argv)
+{
+    QTestState *s = NULL;
+    CharDriverState *chr = NULL;
+    const char *hugefs = 0;
+    char *socket_path = 0;
+    char *qemu_cmd = 0;
+    char *chr_path = 0;
+    int ret;
+
+    g_test_init(&argc, &argv, NULL);
+
+    module_call_init(MODULE_INIT_QOM);
+
+    hugefs = init_hugepagefs();
+    if (!hugefs) {
+        return 0;
+    }
+
+    socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid());
+
+    /* create char dev and add read handlers */
+    qemu_add_opts(&qemu_chardev_opts);
+    chr_path = g_strdup_printf("unix:%s,server,nowait", socket_path);
+    chr = qemu_chr_new("chr0", chr_path, NULL);
+    g_free(chr_path);
+    qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr);
+
+    /* run the main loop thread so the chardev may operate */
+    g_mutex_init(&data_mutex);
+    g_cond_init(&data_cond);
+    g_mutex_lock(&data_mutex);
+    g_thread_new(NULL, thread_function, NULL);
+
+    qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path);
+    s = qtest_start(qemu_cmd);
+    g_free(qemu_cmd);
+
+    qtest_add_func("/vhost-user/read-guest-mem", read_guest_mem);
+
+    ret = g_test_run();
+
+    if (s) {
+        qtest_quit(s);
+    }
+
+    /* cleanup */
+    unlink(socket_path);
+    g_free(socket_path);
+    g_cond_clear(&data_cond);
+    g_mutex_clear(&data_mutex);
+
+    return ret;
+}

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation
  2014-06-10 10:02 ` [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation Nikolay Nikolaev
@ 2014-06-10 10:22   ` Michael S. Tsirkin
  0 siblings, 0 replies; 11+ messages in thread
From: Michael S. Tsirkin @ 2014-06-10 10:22 UTC (permalink / raw)
  To: Nikolay Nikolaev; +Cc: a.motakis, luke, snabb-devel, qemu-devel, tech

On Tue, Jun 10, 2014 at 01:02:57PM +0300, Nikolay Nikolaev wrote:
> This document describes the basic message format used by vhost-user
> for communication over a unix domain socket. The protocol is based
> on the existing ioctl interface used for the kernel version of vhost.
> 
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>

Adds a whitespace error:
/scm/qemu/.git/rebase-apply/patch:278: new blank line at EOF.
+
warning: 1 line adds whitespace errors.

I fixed it up in my tree.

> ---
>  docs/specs/vhost-user.txt |  267 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 267 insertions(+)
>  create mode 100644 docs/specs/vhost-user.txt
> 
> diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
> new file mode 100644
> index 0000000..808ae81
> --- /dev/null
> +++ b/docs/specs/vhost-user.txt
> @@ -0,0 +1,267 @@
> +Vhost-user Protocol
> +===================
> +
> +Copyright (c) 2014 Virtual Open Systems Sarl.
> +
> +This work is licensed under the terms of the GNU GPL, version 2 or later.
> +See the COPYING file in the top-level directory.
> +===================
> +
> +This protocol is aiming to complement the ioctl interface used to control the
> +vhost implementation in the Linux kernel. It implements the control plane needed
> +to establish virtqueue sharing with a user space process on the same host. It
> +uses communication over a Unix domain socket to share file descriptors in the
> +ancillary data of the message.
> +
> +The protocol defines 2 sides of the communication, master and slave. Master is
> +the application that shares its virtqueues, in our case QEMU. Slave is the
> +consumer of the virtqueues.
> +
> +In the current implementation QEMU is the Master, and the Slave is intended to
> +be a software Ethernet switch running in user space, such as Snabbswitch.
> +
> +Master and slave can be either a client (i.e. connecting) or server (listening)
> +in the socket communication.
> +
> +Message Specification
> +---------------------
> +
> +Note that all numbers are in the machine native byte order. A vhost-user message
> +consists of 3 header fields and a payload:
> +
> +------------------------------------
> +| request | flags | size | payload |
> +------------------------------------
> +
> + * Request: 32-bit type of the request
> + * Flags: 32-bit bit field:
> +   - Lower 2 bits are the version (currently 0x01)
> +   - Bit 2 is the reply flag - needs to be sent on each reply from the slave
> + * Size - 32-bit size of the payload
> +
> +
> +Depending on the request type, payload can be:
> +
> + * A single 64-bit integer
> +   -------
> +   | u64 |
> +   -------
> +
> +   u64: a 64-bit unsigned integer
> +
> + * A vring state description
> +   ---------------
> +  | index | num |
> +  ---------------
> +
> +   Index: a 32-bit index
> +   Num: a 32-bit number
> +
> + * A vring address description
> +   --------------------------------------------------------------
> +   | index | flags | size | descriptor | used | available | log |
> +   --------------------------------------------------------------
> +
> +   Index: a 32-bit vring index
> +   Flags: a 32-bit vring flags
> +   Descriptor: a 64-bit user address of the vring descriptor table
> +   Used: a 64-bit user address of the vring used ring
> +   Available: a 64-bit user address of the vring available ring
> +   Log: a 64-bit guest address for logging
> +
> + * Memory regions description
> +   ---------------------------------------------------
> +   | num regions | padding | region0 | ... | region7 |
> +   ---------------------------------------------------
> +
> +   Num regions: a 32-bit number of regions
> +   Padding: 32-bit
> +
> +   A region is:
> +   ---------------------------------------
> +   | guest address | size | user address |
> +   ---------------------------------------
> +
> +   Guest address: a 64-bit guest address of the region
> +   Size: a 64-bit size
> +   User address: a 64-bit user address
> +
> +
> +In QEMU the vhost-user message is implemented with the following struct:
> +
> +typedef struct VhostUserMsg {
> +    VhostUserRequest request;
> +    uint32_t flags;
> +    uint32_t size;
> +    union {
> +        uint64_t u64;
> +        struct vhost_vring_state state;
> +        struct vhost_vring_addr addr;
> +        VhostUserMemory memory;
> +    };
> +} QEMU_PACKED VhostUserMsg;
> +
> +Communication
> +-------------
> +
> +The protocol for vhost-user is based on the existing implementation of vhost
> +for the Linux Kernel. Most messages that can be sent via the Unix domain socket
> +implementing vhost-user have an equivalent ioctl to the kernel implementation.
> +
> +The communication consists of master sending message requests and slave sending
> +message replies. Most of the requests don't require replies. Here is a list of
> +the ones that do:
> +
> + * VHOST_GET_FEATURES
> + * VHOST_GET_VRING_BASE
> +
> +There are several messages that the master sends with file descriptors passed
> +in the ancillary data:
> +
> + * VHOST_SET_MEM_TABLE
> + * VHOST_SET_LOG_FD
> + * VHOST_SET_VRING_KICK
> + * VHOST_SET_VRING_CALL
> + * VHOST_SET_VRING_ERR
> +
> +If Master is unable to send the full message or receives a wrong reply it will
> +close the connection. An optional reconnection mechanism can be implemented.
> +
> +Message types
> +-------------
> +
> + * VHOST_USER_GET_FEATURES
> +
> +      Id: 2
> +      Equivalent ioctl: VHOST_GET_FEATURES
> +      Master payload: N/A
> +      Slave payload: u64
> +
> +      Get from the underlying vhost implementation the features bitmask.
> +
> + * VHOST_USER_SET_FEATURES
> +
> +      Id: 3
> +      Ioctl: VHOST_SET_FEATURES
> +      Master payload: u64
> +
> +      Enable features in the underlying vhost implementation using a bitmask.
> +
> + * VHOST_USER_SET_OWNER
> +
> +      Id: 4
> +      Equivalent ioctl: VHOST_SET_OWNER
> +      Master payload: N/A
> +
> +      Issued when a new connection is established. It sets the current Master
> +      as an owner of the session. This can be used on the Slave as a
> +      "session start" flag.
> +
> + * VHOST_USER_RESET_OWNER
> +
> +      Id: 5
> +      Equivalent ioctl: VHOST_RESET_OWNER
> +      Master payload: N/A
> +
> +      Issued when a new connection is about to be closed. The Master will no
> +      longer own this connection (and will usually close it).
> +
> + * VHOST_USER_SET_MEM_TABLE
> +
> +      Id: 6
> +      Equivalent ioctl: VHOST_SET_MEM_TABLE
> +      Master payload: memory regions description
> +
> +      Sets the memory map regions on the slave so it can translate the vring
> +      addresses. In the ancillary data there is an array of file descriptors
> +      for each memory mapped region. The size and ordering of the fds matches
> +      the number and ordering of memory regions.
> +
> + * VHOST_USER_SET_LOG_BASE
> +
> +      Id: 7
> +      Equivalent ioctl: VHOST_SET_LOG_BASE
> +      Master payload: u64
> +
> +      Sets the logging base address.
> +
> + * VHOST_USER_SET_LOG_FD
> +
> +      Id: 8
> +      Equivalent ioctl: VHOST_SET_LOG_FD
> +      Master payload: N/A
> +
> +      Sets the logging file descriptor, which is passed as ancillary data.
> +
> + * VHOST_USER_SET_VRING_NUM
> +
> +      Id: 9
> +      Equivalent ioctl: VHOST_SET_VRING_NUM
> +      Master payload: vring state description
> +
> +      Sets the number of vrings for this owner.
> +
> + * VHOST_USER_SET_VRING_ADDR
> +
> +      Id: 10
> +      Equivalent ioctl: VHOST_SET_VRING_ADDR
> +      Master payload: vring address description
> +      Slave payload: N/A
> +
> +      Sets the addresses of the different aspects of the vring.
> +
> + * VHOST_USER_SET_VRING_BASE
> +
> +      Id: 11
> +      Equivalent ioctl: VHOST_SET_VRING_BASE
> +      Master payload: vring state description
> +
> +      Sets the base offset in the available vring.
> +
> + * VHOST_USER_GET_VRING_BASE
> +
> +      Id: 12
> +      Equivalent ioctl: VHOST_USER_GET_VRING_BASE
> +      Master payload: vring state description
> +      Slave payload: vring state description
> +
> +      Get the available vring base offset.
> +
> + * VHOST_USER_SET_VRING_KICK
> +
> +      Id: 13
> +      Equivalent ioctl: VHOST_SET_VRING_KICK
> +      Master payload: u64
> +
> +      Set the event file descriptor for adding buffers to the vring. It
> +      is passed in the ancillary data.
> +      Bits (0-7) of the payload contain the vring index. Bit 8 is the
> +      invalid FD flag. This flag is set when there is no file descriptor
> +      in the ancillary data. This signals that polling should be used
> +      instead of waiting for a kick.
> +
> + * VHOST_USER_SET_VRING_CALL
> +
> +      Id: 14
> +      Equivalent ioctl: VHOST_SET_VRING_CALL
> +      Master payload: u64
> +
> +      Set the event file descriptor to signal when buffers are used. It
> +      is passed in the ancillary data.
> +      Bits (0-7) of the payload contain the vring index. Bit 8 is the
> +      invalid FD flag. This flag is set when there is no file descriptor
> +      in the ancillary data. This signals that polling will be used
> +      instead of waiting for the call.
> +
> + * VHOST_USER_SET_VRING_ERR
> +
> +      Id: 15
> +      Equivalent ioctl: VHOST_SET_VRING_ERR
> +      Master payload: u64
> +
> +      Set the event file descriptor to signal when error occurs. It
> +      is passed in the ancillary data.
> +      Bits (0-7) of the payload contain the vring index. Bit 8 is the
> +      invalid FD flag. This flag is set when there is no file descriptor
> +      in the ancillary data.
> +

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
  2014-06-10 10:02 [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Nikolay Nikolaev
  2014-06-10 10:02 ` [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation Nikolay Nikolaev
  2014-06-10 10:03 ` [Qemu-devel] [PATCH v10-fix 18/18] Add qtest for vhost-user Nikolay Nikolaev
@ 2014-06-10 10:34 ` Michael S. Tsirkin
  2014-06-10 10:50   ` Michael S. Tsirkin
  2014-06-10 12:11 ` [Qemu-devel] " Eric Blake
  3 siblings, 1 reply; 11+ messages in thread
From: Michael S. Tsirkin @ 2014-06-10 10:34 UTC (permalink / raw)
  To: Nikolay Nikolaev; +Cc: a.motakis, luke, snabb-devel, qemu-devel, tech

On Tue, Jun 10, 2014 at 01:02:16PM +0300, Nikolay Nikolaev wrote:
> The supplied chardev id will be inspected for supported options. Only
> a socket backend, with a set path (i.e. a Unix socket) and optionally
> the server parameter set, will be allowed. Other options (nowait, telnet)
> will make the chardev unusable and the netdev will not be initialised.
> 
> Additional checks for validity:
>   - requires `-numa node,memdev=..`
>   - requires `-device virtio-net-*`
> 
> The `vhostforce` option is used to force vhost-net when we deal with
> non-MSIX guests.
> 
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
>
Applied, thanks!
In the future pls just number them 0-18.
Also in the future, pls include the changelog here after ---

>  hmp-commands.hx    |    4 +-
>  hw/net/vhost_net.c |    4 ++
>  net/hub.c          |    1 
>  net/net.c          |    3 +
>  net/vhost-user.c   |  115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  qapi-schema.json   |   19 ++++++++-
>  qemu-options.hx    |   18 ++++++++
>  7 files changed, 158 insertions(+), 6 deletions(-)
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 2e462c0..bc9c032 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1206,7 +1206,7 @@ ETEXI
>      {
>          .name       = "host_net_add",
>          .args_type  = "device:s,opts:s?",
> -        .params     = "tap|user|socket|vde|netmap|dump [options]",
> +        .params     = "tap|user|socket|vde|netmap|vhost-user|dump [options]",
>          .help       = "add host VLAN client",
>          .mhandler.cmd = net_host_device_add,
>      },
> @@ -1234,7 +1234,7 @@ ETEXI
>      {
>          .name       = "netdev_add",
>          .args_type  = "netdev:O",
> -        .params     = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]",
> +        .params     = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
>          .help       = "add host network device",
>          .mhandler.cmd = hmp_netdev_add,
>          .command_completion = netdev_add_completion,
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 5f06736..7ac7c21 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -15,6 +15,7 @@
>  
>  #include "net/net.h"
>  #include "net/tap.h"
> +#include "net/vhost-user.h"
>  
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
> @@ -360,6 +361,9 @@ VHostNetState *get_vhost_net(NetClientState *nc)
>      case NET_CLIENT_OPTIONS_KIND_TAP:
>          vhost_net = tap_get_vhost_net(nc);
>          break;
> +    case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> +        vhost_net = vhost_user_get_vhost_net(nc);
> +        break;
>      default:
>          break;
>      }
> diff --git a/net/hub.c b/net/hub.c
> index 33a99c9..7e0f2d6 100644
> --- a/net/hub.c
> +++ b/net/hub.c
> @@ -322,6 +322,7 @@ void net_hub_check_clients(void)
>              case NET_CLIENT_OPTIONS_KIND_TAP:
>              case NET_CLIENT_OPTIONS_KIND_SOCKET:
>              case NET_CLIENT_OPTIONS_KIND_VDE:
> +            case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
>                  has_host_dev = 1;
>                  break;
>              default:
> diff --git a/net/net.c b/net/net.c
> index 0ff2e40..c91b67b 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -786,6 +786,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
>          [NET_CLIENT_OPTIONS_KIND_BRIDGE]    = net_init_bridge,
>  #endif
>          [NET_CLIENT_OPTIONS_KIND_HUBPORT]   = net_init_hubport,
> +        [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
>  };
>  
>  
> @@ -819,6 +820,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
>          case NET_CLIENT_OPTIONS_KIND_BRIDGE:
>  #endif
>          case NET_CLIENT_OPTIONS_KIND_HUBPORT:
> +        case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
>              break;
>  
>          default:
> @@ -907,6 +909,7 @@ static int net_host_check_device(const char *device)
>  #ifdef CONFIG_VDE
>                                         ,"vde"
>  #endif
> +                                       ,"vhost-user"
>      };
>      for (i = 0; i < ARRAY_SIZE(valid_param_list); i++) {
>          if (!strncmp(valid_param_list[i], device,
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 4bdd19d..32b78fb 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -12,6 +12,7 @@
>  #include "net/vhost_net.h"
>  #include "net/vhost-user.h"
>  #include "sysemu/char.h"
> +#include "qemu/config-file.h"
>  #include "qemu/error-report.h"
>  
>  typedef struct VhostUserState {
> @@ -21,9 +22,17 @@ typedef struct VhostUserState {
>      VHostNetState *vhost_net;
>  } VhostUserState;
>  
> +typedef struct VhostUserChardevProps {
> +    bool is_socket;
> +    bool is_unix;
> +    bool is_server;
> +    bool has_unsupported;
> +} VhostUserChardevProps;
> +
>  VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
>  {
>      VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> +    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
>      return s->vhost_net;
>  }
>  
> @@ -82,7 +91,7 @@ static bool vhost_user_has_ufo(NetClientState *nc)
>  }
>  
>  static NetClientInfo net_vhost_user_info = {
> -        .type = 0,
> +        .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
>          .size = sizeof(VhostUserState),
>          .cleanup = vhost_user_cleanup,
>          .has_vnet_hdr = vhost_user_has_vnet_hdr,
> @@ -148,8 +157,108 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
>      return 0;
>  }
>  
> +static int net_vhost_chardev_opts(const char *name, const char *value,
> +                                  void *opaque)
> +{
> +    VhostUserChardevProps *props = opaque;
> +
> +    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
> +        props->is_socket = true;
> +    } else if (strcmp(name, "path") == 0) {
> +        props->is_unix = true;
> +    } else if (strcmp(name, "server") == 0) {
> +        props->is_server = true;
> +    } else {
> +        error_report("vhost-user does not support a chardev"
> +                     " with the following option:\n %s = %s",
> +                     name, value);
> +        props->has_unsupported = true;
> +        return -1;
> +    }
> +    return 0;
> +}
> +
> +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
> +{
> +    CharDriverState *chr = qemu_chr_find(opts->chardev);
> +    VhostUserChardevProps props;
> +
> +    if (chr == NULL) {
> +        error_report("chardev \"%s\" not found", opts->chardev);
> +        return NULL;
> +    }
> +
> +    /* inspect chardev opts */
> +    memset(&props, 0, sizeof(props));
> +    qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, false);
> +
> +    if (!props.is_socket || !props.is_unix) {
> +        error_report("chardev \"%s\" is not a unix socket",
> +                     opts->chardev);
> +        return NULL;
> +    }
> +
> +    if (props.has_unsupported) {
> +        error_report("chardev \"%s\" has an unsupported option",
> +                opts->chardev);
> +        return NULL;
> +    }
> +
> +    qemu_chr_fe_claim_no_fail(chr);
> +
> +    return chr;
> +}
> +
> +static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> +{
> +    const char *name = opaque;
> +    const char *driver, *netdev;
> +    const char virtio_name[] = "virtio-net-";
> +
> +    driver = qemu_opt_get(opts, "driver");
> +    netdev = qemu_opt_get(opts, "netdev");
> +
> +    if (!driver || !netdev) {
> +        return 0;
> +    }
> +
> +    if (strcmp(netdev, name) == 0 &&
> +        strncmp(driver, virtio_name, strlen(virtio_name)) != 0) {
> +        error_report("vhost-user requires frontend driver virtio-net-*");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
>  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> -                   NetClientState *peer)
> +                        NetClientState *peer)
>  {
> -    return net_vhost_user_init(peer, "vhost_user", 0, 0, 0);
> +    const NetdevVhostUserOptions *vhost_user_opts;
> +    CharDriverState *chr;
> +    bool vhostforce;
> +
> +    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> +    vhost_user_opts = opts->vhost_user;
> +
> +    chr = net_vhost_parse_chardev(vhost_user_opts);
> +    if (!chr) {
> +        error_report("No suitable chardev found");
> +        return -1;
> +    }
> +
> +    /* verify net frontend */
> +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> +                          (char *)name, true) == -1) {
> +        return -1;
> +    }
> +
> +    /* vhost-force for non-MSIX */
> +    if (vhost_user_opts->has_vhost_force) {
> +        vhostforce = vhost_user_opts->vhost_force;
> +    } else {
> +        vhostforce = false;
> +    }
> +
> +    return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce);
>  }
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 7bc33ea..f062ce9 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -3267,6 +3267,22 @@
>      '*devname':    'str' } }
>  
>  ##
> +# @NetdevVhostUserOptions
> +#
> +# Vhost-user network backend
> +#
> +# @chardev: name of a unix socket chardev
> +#
> +# @vhost-force: #optional vhost on for non-MSIX virtio guests (default: false).
> +#
> +# Since 2.1
> +##
> +{ 'type': 'NetdevVhostUserOptions',
> +  'data': {
> +    'chardev':        'str',
> +    '*vhost-force':    'bool' } }
> +
> +##
>  # @NetClientOptions
>  #
>  # A discriminated record of network device traits.
> @@ -3284,7 +3300,8 @@
>      'dump':     'NetdevDumpOptions',
>      'bridge':   'NetdevBridgeOptions',
>      'hubport':  'NetdevHubPortOptions',
> -    'netmap':   'NetdevNetmapOptions' } }
> +    'netmap':   'NetdevNetmapOptions',
> +    'vhost-user': 'NetdevVhostUserOptions' } }
>  
>  ##
>  # @NetLegacy
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 5a4eff9..1ad2528 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1460,6 +1460,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>  #ifdef CONFIG_NETMAP
>      "netmap|"
>  #endif
> +    "vhost-user|"
>      "socket|"
>      "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
>  STEXI
> @@ -1791,6 +1792,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
>  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
>  required hub automatically.
>  
> +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> +
> +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> +be a unix domain socket backed one. The vhost-user uses a specifically defined
> +protocol to pass vhost ioctl replacement messages to an application on the other
> +end of the socket. On non-MSIX guests, the feature can be forced with
> +@var{vhostforce}.
> +
> +Example:
> +@example
> +qemu -m 512 -object memory-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> +     -numa node,memdev=mem \
> +     -chardev socket,path=/path/to/socket \
> +     -netdev type=vhost-user,id=net0,chardev=chr0 \
> +     -device virtio-net-pci,netdev=net0
> +@end example
> +
>  @item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
>  Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
>  At most @var{len} bytes (64k by default) per packet are stored. The file format is

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 18/18] Add qtest for vhost-user
  2014-06-10 10:03 ` [Qemu-devel] [PATCH v10-fix 18/18] Add qtest for vhost-user Nikolay Nikolaev
@ 2014-06-10 10:49   ` Michael S. Tsirkin
  0 siblings, 0 replies; 11+ messages in thread
From: Michael S. Tsirkin @ 2014-06-10 10:49 UTC (permalink / raw)
  To: Nikolay Nikolaev; +Cc: a.motakis, luke, snabb-devel, qemu-devel, tech

On Tue, Jun 10, 2014 at 01:03:23PM +0300, Nikolay Nikolaev wrote:
> This test creates a 'server' chardev to listen for vhost-user messages.
> Once VHOST_USER_SET_MEM_TABLE is received it mmaps each received region,
> and read 1k bytes from it. The read data is compared to data from readl.
> 
> The test requires hugetlbfs to be already mounted and writable. The mount
> point defaults to '/hugetlbfs' and can be specified via the environment
> variable QTEST_HUGETLBFS_PATH.
> 
> The rom pc-bios/pxe-virtio.rom is used to instantiate a virtio pcicontroller.
> 
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
>  tests/Makefile          |    4 +
>  tests/vhost-user-test.c |  312 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 316 insertions(+)
>  create mode 100644 tests/vhost-user-test.c
> 
> diff --git a/tests/Makefile b/tests/Makefile
> index 361bb7b..d1c399c 100644
> --- a/tests/Makefile
> +++ b/tests/Makefile
> @@ -156,6 +156,7 @@ gcov-files-i386-y += hw/usb/hcd-ehci.c
>  gcov-files-i386-y += hw/usb/hcd-uhci.c
>  gcov-files-i386-y += hw/usb/dev-hid.c
>  gcov-files-i386-y += hw/usb/dev-storage.c
> +check-qtest-i386-y += tests/vhost-user-test$(EXESUF)
>  check-qtest-x86_64-y = $(check-qtest-i386-y)
>  gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
>  gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
> @@ -322,9 +323,12 @@ tests/es1370-test$(EXESUF): tests/es1370-test.o
>  tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o
>  tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o
>  tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-pc-obj-y)
> +tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o libqemuutil.a libqemustub.a
>  tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
>  tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o libqemuutil.a libqemustub.a
>  
> +LIBS+= -lutil
> +
>  # QTest rules
>  
>  TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
> diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
> new file mode 100644
> index 0000000..4bdfa8b
> --- /dev/null
> +++ b/tests/vhost-user-test.c
> @@ -0,0 +1,312 @@
> +/*
> + * QTest testcase for the vhost-user
> + *
> + * Copyright (c) 2014 Virtual Open Systems Sarl.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "libqtest.h"
> +#include "qemu/option.h"
> +#include "sysemu/char.h"
> +#include "sysemu/sysemu.h"
> +
> +#include <glib.h>
> +#include <linux/vhost.h>
> +#include <sys/mman.h>
> +#include <sys/vfs.h>
> +#include <qemu/sockets.h>
> +
> +#define QEMU_CMD_ACCEL  " -machine accel=tcg"
> +#define QEMU_CMD_MEM    " -m 512 -object memory-file,id=mem,size=512M,"\
> +                        "mem-path=%s,share=on -numa node,memdev=mem"
> +#define QEMU_CMD_CHR    " -chardev socket,id=chr0,path=%s"
> +#define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=chr0,vhostforce"
> +#define QEMU_CMD_NET    " -device virtio-net-pci,netdev=net0 "
> +#define QEMU_CMD_ROM    " -option-rom ../pc-bios/pxe-virtio.rom"
> +
> +#define QEMU_CMD        QEMU_CMD_ACCEL QEMU_CMD_MEM QEMU_CMD_CHR \
> +                        QEMU_CMD_NETDEV QEMU_CMD_NET QEMU_CMD_ROM
> +
> +#define HUGETLBFS_MAGIC       0x958458f6
> +
> +/*********** FROM hw/virtio/vhost-user.c *************************************/
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +
> +typedef enum VhostUserRequest {
> +    VHOST_USER_NONE = 0,
> +    VHOST_USER_GET_FEATURES = 1,
> +    VHOST_USER_SET_FEATURES = 2,
> +    VHOST_USER_SET_OWNER = 3,
> +    VHOST_USER_RESET_OWNER = 4,
> +    VHOST_USER_SET_MEM_TABLE = 5,
> +    VHOST_USER_SET_LOG_BASE = 6,
> +    VHOST_USER_SET_LOG_FD = 7,
> +    VHOST_USER_SET_VRING_NUM = 8,
> +    VHOST_USER_SET_VRING_ADDR = 9,
> +    VHOST_USER_SET_VRING_BASE = 10,
> +    VHOST_USER_GET_VRING_BASE = 11,
> +    VHOST_USER_SET_VRING_KICK = 12,
> +    VHOST_USER_SET_VRING_CALL = 13,
> +    VHOST_USER_SET_VRING_ERR = 14,
> +    VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +    uint64_t guest_phys_addr;
> +    uint64_t memory_size;
> +    uint64_t userspace_addr;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +    uint32_t nregions;
> +    uint32_t padding;
> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +    VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK     (0x3)
> +#define VHOST_USER_REPLY_MASK       (0x1<<2)
> +    uint32_t flags;
> +    uint32_t size; /* the following payload size */
> +    union {
> +        uint64_t u64;
> +        struct vhost_vring_state state;
> +        struct vhost_vring_addr addr;
> +        VhostUserMemory memory;
> +    };
> +} QEMU_PACKED VhostUserMsg;
> +
> +static VhostUserMsg m __attribute__ ((unused));
> +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> +                            + sizeof(m.flags) \
> +                            + sizeof(m.size))
> +
> +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    (0x1)
> +/*****************************************************************************/
> +
> +int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS];
> +static VhostUserMemory memory;
> +static GMutex data_mutex;
> +static GCond data_cond;
> +
> +static void read_guest_mem(void)
> +{
> +    uint32_t *guest_mem;
> +    gint64 end_time;
> +    int i, j;
> +
> +    g_mutex_lock(&data_mutex);
> +
> +    end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
> +    while (!fds_num) {
> +        if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) {
> +            /* timeout has passed */
> +            g_assert(fds_num);
> +            break;
> +        }
> +    }
> +
> +    /* check for sanity */
> +    g_assert_cmpint(fds_num, >, 0);
> +    g_assert_cmpint(fds_num, ==, memory.nregions);
> +
> +    /* iterate all regions */
> +    for (i = 0; i < fds_num; i++) {
> +
> +        /* We'll check only the region statring at 0x0*/
> +        if (memory.regions[i].guest_phys_addr != 0x0) {
> +            continue;
> +        }
> +
> +        g_assert_cmpint(memory.regions[i].memory_size, >, 1024);
> +
> +        guest_mem = mmap(0, memory.regions[i].memory_size,
> +        PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0);
> +
> +        for (j = 0; j < 256; j++) {
> +            uint32_t a = readl(memory.regions[i].guest_phys_addr + j*4);
> +            uint32_t b = guest_mem[j];
> +
> +            g_assert_cmpint(a, ==, b);
> +        }
> +
> +        munmap(guest_mem, memory.regions[i].memory_size);
> +    }
> +
> +    g_assert_cmpint(1, ==, 1);
> +    g_mutex_unlock(&data_mutex);
> +}
> +
> +static void *thread_function(void *data)
> +{
> +    GMainLoop *loop;
> +    loop = g_main_loop_new(NULL, FALSE);
> +    g_main_loop_run(loop);
> +    return NULL;
> +}
> +
> +static int chr_can_read(void *opaque)
> +{
> +    return VHOST_USER_HDR_SIZE;
> +}
> +
> +static void chr_read(void *opaque, const uint8_t *buf, int size)
> +{
> +    CharDriverState *chr = opaque;
> +    VhostUserMsg msg;
> +    uint8_t *p = (uint8_t *) &msg;
> +    int fd;
> +
> +    if (size != VHOST_USER_HDR_SIZE) {
> +        g_test_message("Wrong message size received %d\n", size);
> +        return;
> +    }
> +
> +    memcpy(p, buf, VHOST_USER_HDR_SIZE);
> +
> +    if (msg.size) {
> +        p += VHOST_USER_HDR_SIZE;
> +        qemu_chr_fe_read_all(chr, p, msg.size);
> +    }
> +
> +    switch (msg.request) {
> +    case VHOST_USER_GET_FEATURES:
> +        /* send back features to qemu */
> +        msg.flags |= VHOST_USER_REPLY_MASK;
> +        msg.size = sizeof(m.u64);
> +        msg.u64 = 0;
> +        p = (uint8_t *) &msg;
> +        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
> +        break;
> +
> +    case VHOST_USER_GET_VRING_BASE:
> +        /* send back vring base to qemu */
> +        msg.flags |= VHOST_USER_REPLY_MASK;
> +        msg.size = sizeof(m.state);
> +        msg.state.num = 0;
> +        p = (uint8_t *) &msg;
> +        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
> +        break;
> +
> +    case VHOST_USER_SET_MEM_TABLE:
> +        /* received the mem table */
> +        memcpy(&memory, &msg.memory, sizeof(msg.memory));
> +        fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int));
> +
> +        /* signal the test that it can continue */
> +        g_cond_signal(&data_cond);
> +        g_mutex_unlock(&data_mutex);
> +        break;
> +
> +    case VHOST_USER_SET_VRING_KICK:
> +    case VHOST_USER_SET_VRING_CALL:
> +        /* consume the fd */
> +        qemu_chr_fe_get_msgfds(chr, &fd, 1);
> +        /*
> +         * This is a non-blocking eventfd.
> +         * The receive function forces it to be blocking,
> +         * so revert it back to non-blocking.
> +         */
> +        qemu_set_nonblock(fd);
> +        break;
> +    default:
> +        break;
> +    }
> +}
> +
> +static const char *init_hugepagefs(void)
> +{
> +    const char *path;
> +    struct statfs fs;
> +    int ret;
> +
> +    path = getenv("QTEST_HUGETLBFS_PATH");
> +    if (!path) {
> +        path = "/hugetlbfs";
> +    }
> +
> +    if (access(path, R_OK | W_OK | X_OK)) {
> +        g_test_message("access on path (%s): %s\n", path, strerror(errno));
> +        return 0;
> +    }
> +
> +    do {
> +        ret = statfs(path, &fs);
> +    } while (ret != 0 && errno == EINTR);
> +
> +    if (ret != 0) {
> +        g_test_message("statfs on path (%s): %s\n", path, strerror(errno));
> +        return 0;
> +    }
> +
> +    if (fs.f_type != HUGETLBFS_MAGIC) {
> +        g_test_message("Warning: path not on HugeTLBFS: %s\n", path);
> +        return 0;
> +    }
> +
> +    return path;
> +}


I fixed above to return NULL on error.

> +
> +int main(int argc, char **argv)
> +{
> +    QTestState *s = NULL;
> +    CharDriverState *chr = NULL;
> +    const char *hugefs = 0;
> +    char *socket_path = 0;
> +    char *qemu_cmd = 0;
> +    char *chr_path = 0;
> +    int ret;
> +
> +    g_test_init(&argc, &argv, NULL);
> +
> +    module_call_init(MODULE_INIT_QOM);
> +
> +    hugefs = init_hugepagefs();
> +    if (!hugefs) {
> +        return 0;
> +    }
> +
> +    socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid());
> +
> +    /* create char dev and add read handlers */
> +    qemu_add_opts(&qemu_chardev_opts);
> +    chr_path = g_strdup_printf("unix:%s,server,nowait", socket_path);
> +    chr = qemu_chr_new("chr0", chr_path, NULL);
> +    g_free(chr_path);
> +    qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr);
> +
> +    /* run the main loop thread so the chardev may operate */
> +    g_mutex_init(&data_mutex);
> +    g_cond_init(&data_cond);
> +    g_mutex_lock(&data_mutex);
> +    g_thread_new(NULL, thread_function, NULL);
> +
> +    qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path);
> +    s = qtest_start(qemu_cmd);
> +    g_free(qemu_cmd);
> +
> +    qtest_add_func("/vhost-user/read-guest-mem", read_guest_mem);
> +
> +    ret = g_test_run();
> +
> +    if (s) {
> +        qtest_quit(s);
> +    }
> +
> +    /* cleanup */
> +    unlink(socket_path);
> +    g_free(socket_path);
> +    g_cond_clear(&data_cond);
> +    g_mutex_clear(&data_mutex);
> +
> +    return ret;
> +}

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
  2014-06-10 10:34 ` [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Michael S. Tsirkin
@ 2014-06-10 10:50   ` Michael S. Tsirkin
  2014-06-10 11:04     ` [Qemu-devel] [snabb-devel] " Nikolay Nikolaev
  0 siblings, 1 reply; 11+ messages in thread
From: Michael S. Tsirkin @ 2014-06-10 10:50 UTC (permalink / raw)
  To: Nikolay Nikolaev; +Cc: a.motakis, luke, snabb-devel, qemu-devel, tech

On Tue, Jun 10, 2014 at 01:34:13PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 10, 2014 at 01:02:16PM +0300, Nikolay Nikolaev wrote:
> > The supplied chardev id will be inspected for supported options. Only
> > a socket backend, with a set path (i.e. a Unix socket) and optionally
> > the server parameter set, will be allowed. Other options (nowait, telnet)
> > will make the chardev unusable and the netdev will not be initialised.
> > 
> > Additional checks for validity:
> >   - requires `-numa node,memdev=..`
> >   - requires `-device virtio-net-*`
> > 
> > The `vhostforce` option is used to force vhost-net when we deal with
> > non-MSIX guests.
> > 
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> >
> Applied, thanks!
> In the future pls just number them 0-18.
> Also in the future, pls include the changelog here after ---

Please check out and test the vhost branch, if everything is
in order let me know and I'll merge this upstream.

> >  hw/net/vhost_net.c |    4 ++
> >  net/hub.c          |    1 
> >  net/net.c          |    3 +
> >  net/vhost-user.c   |  115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  qapi-schema.json   |   19 ++++++++-
> >  qemu-options.hx    |   18 ++++++++
> >  7 files changed, 158 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hmp-commands.hx b/hmp-commands.hx
> > index 2e462c0..bc9c032 100644
> > --- a/hmp-commands.hx
> > +++ b/hmp-commands.hx
> > @@ -1206,7 +1206,7 @@ ETEXI
> >      {
> >          .name       = "host_net_add",
> >          .args_type  = "device:s,opts:s?",
> > -        .params     = "tap|user|socket|vde|netmap|dump [options]",
> > +        .params     = "tap|user|socket|vde|netmap|vhost-user|dump [options]",
> >          .help       = "add host VLAN client",
> >          .mhandler.cmd = net_host_device_add,
> >      },
> > @@ -1234,7 +1234,7 @@ ETEXI
> >      {
> >          .name       = "netdev_add",
> >          .args_type  = "netdev:O",
> > -        .params     = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]",
> > +        .params     = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
> >          .help       = "add host network device",
> >          .mhandler.cmd = hmp_netdev_add,
> >          .command_completion = netdev_add_completion,
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 5f06736..7ac7c21 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -15,6 +15,7 @@
> >  
> >  #include "net/net.h"
> >  #include "net/tap.h"
> > +#include "net/vhost-user.h"
> >  
> >  #include "hw/virtio/virtio-net.h"
> >  #include "net/vhost_net.h"
> > @@ -360,6 +361,9 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> >      case NET_CLIENT_OPTIONS_KIND_TAP:
> >          vhost_net = tap_get_vhost_net(nc);
> >          break;
> > +    case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > +        vhost_net = vhost_user_get_vhost_net(nc);
> > +        break;
> >      default:
> >          break;
> >      }
> > diff --git a/net/hub.c b/net/hub.c
> > index 33a99c9..7e0f2d6 100644
> > --- a/net/hub.c
> > +++ b/net/hub.c
> > @@ -322,6 +322,7 @@ void net_hub_check_clients(void)
> >              case NET_CLIENT_OPTIONS_KIND_TAP:
> >              case NET_CLIENT_OPTIONS_KIND_SOCKET:
> >              case NET_CLIENT_OPTIONS_KIND_VDE:
> > +            case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> >                  has_host_dev = 1;
> >                  break;
> >              default:
> > diff --git a/net/net.c b/net/net.c
> > index 0ff2e40..c91b67b 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -786,6 +786,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
> >          [NET_CLIENT_OPTIONS_KIND_BRIDGE]    = net_init_bridge,
> >  #endif
> >          [NET_CLIENT_OPTIONS_KIND_HUBPORT]   = net_init_hubport,
> > +        [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
> >  };
> >  
> >  
> > @@ -819,6 +820,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
> >          case NET_CLIENT_OPTIONS_KIND_BRIDGE:
> >  #endif
> >          case NET_CLIENT_OPTIONS_KIND_HUBPORT:
> > +        case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> >              break;
> >  
> >          default:
> > @@ -907,6 +909,7 @@ static int net_host_check_device(const char *device)
> >  #ifdef CONFIG_VDE
> >                                         ,"vde"
> >  #endif
> > +                                       ,"vhost-user"
> >      };
> >      for (i = 0; i < ARRAY_SIZE(valid_param_list); i++) {
> >          if (!strncmp(valid_param_list[i], device,
> > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > index 4bdd19d..32b78fb 100644
> > --- a/net/vhost-user.c
> > +++ b/net/vhost-user.c
> > @@ -12,6 +12,7 @@
> >  #include "net/vhost_net.h"
> >  #include "net/vhost-user.h"
> >  #include "sysemu/char.h"
> > +#include "qemu/config-file.h"
> >  #include "qemu/error-report.h"
> >  
> >  typedef struct VhostUserState {
> > @@ -21,9 +22,17 @@ typedef struct VhostUserState {
> >      VHostNetState *vhost_net;
> >  } VhostUserState;
> >  
> > +typedef struct VhostUserChardevProps {
> > +    bool is_socket;
> > +    bool is_unix;
> > +    bool is_server;
> > +    bool has_unsupported;
> > +} VhostUserChardevProps;
> > +
> >  VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> >  {
> >      VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > +    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> >      return s->vhost_net;
> >  }
> >  
> > @@ -82,7 +91,7 @@ static bool vhost_user_has_ufo(NetClientState *nc)
> >  }
> >  
> >  static NetClientInfo net_vhost_user_info = {
> > -        .type = 0,
> > +        .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
> >          .size = sizeof(VhostUserState),
> >          .cleanup = vhost_user_cleanup,
> >          .has_vnet_hdr = vhost_user_has_vnet_hdr,
> > @@ -148,8 +157,108 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
> >      return 0;
> >  }
> >  
> > +static int net_vhost_chardev_opts(const char *name, const char *value,
> > +                                  void *opaque)
> > +{
> > +    VhostUserChardevProps *props = opaque;
> > +
> > +    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
> > +        props->is_socket = true;
> > +    } else if (strcmp(name, "path") == 0) {
> > +        props->is_unix = true;
> > +    } else if (strcmp(name, "server") == 0) {
> > +        props->is_server = true;
> > +    } else {
> > +        error_report("vhost-user does not support a chardev"
> > +                     " with the following option:\n %s = %s",
> > +                     name, value);
> > +        props->has_unsupported = true;
> > +        return -1;
> > +    }
> > +    return 0;
> > +}
> > +
> > +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
> > +{
> > +    CharDriverState *chr = qemu_chr_find(opts->chardev);
> > +    VhostUserChardevProps props;
> > +
> > +    if (chr == NULL) {
> > +        error_report("chardev \"%s\" not found", opts->chardev);
> > +        return NULL;
> > +    }
> > +
> > +    /* inspect chardev opts */
> > +    memset(&props, 0, sizeof(props));
> > +    qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, false);
> > +
> > +    if (!props.is_socket || !props.is_unix) {
> > +        error_report("chardev \"%s\" is not a unix socket",
> > +                     opts->chardev);
> > +        return NULL;
> > +    }
> > +
> > +    if (props.has_unsupported) {
> > +        error_report("chardev \"%s\" has an unsupported option",
> > +                opts->chardev);
> > +        return NULL;
> > +    }
> > +
> > +    qemu_chr_fe_claim_no_fail(chr);
> > +
> > +    return chr;
> > +}
> > +
> > +static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> > +{
> > +    const char *name = opaque;
> > +    const char *driver, *netdev;
> > +    const char virtio_name[] = "virtio-net-";
> > +
> > +    driver = qemu_opt_get(opts, "driver");
> > +    netdev = qemu_opt_get(opts, "netdev");
> > +
> > +    if (!driver || !netdev) {
> > +        return 0;
> > +    }
> > +
> > +    if (strcmp(netdev, name) == 0 &&
> > +        strncmp(driver, virtio_name, strlen(virtio_name)) != 0) {
> > +        error_report("vhost-user requires frontend driver virtio-net-*");
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > -                   NetClientState *peer)
> > +                        NetClientState *peer)
> >  {
> > -    return net_vhost_user_init(peer, "vhost_user", 0, 0, 0);
> > +    const NetdevVhostUserOptions *vhost_user_opts;
> > +    CharDriverState *chr;
> > +    bool vhostforce;
> > +
> > +    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> > +    vhost_user_opts = opts->vhost_user;
> > +
> > +    chr = net_vhost_parse_chardev(vhost_user_opts);
> > +    if (!chr) {
> > +        error_report("No suitable chardev found");
> > +        return -1;
> > +    }
> > +
> > +    /* verify net frontend */
> > +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> > +                          (char *)name, true) == -1) {
> > +        return -1;
> > +    }
> > +
> > +    /* vhost-force for non-MSIX */
> > +    if (vhost_user_opts->has_vhost_force) {
> > +        vhostforce = vhost_user_opts->vhost_force;
> > +    } else {
> > +        vhostforce = false;
> > +    }
> > +
> > +    return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce);
> >  }
> > diff --git a/qapi-schema.json b/qapi-schema.json
> > index 7bc33ea..f062ce9 100644
> > --- a/qapi-schema.json
> > +++ b/qapi-schema.json
> > @@ -3267,6 +3267,22 @@
> >      '*devname':    'str' } }
> >  
> >  ##
> > +# @NetdevVhostUserOptions
> > +#
> > +# Vhost-user network backend
> > +#
> > +# @chardev: name of a unix socket chardev
> > +#
> > +# @vhost-force: #optional vhost on for non-MSIX virtio guests (default: false).
> > +#
> > +# Since 2.1
> > +##
> > +{ 'type': 'NetdevVhostUserOptions',
> > +  'data': {
> > +    'chardev':        'str',
> > +    '*vhost-force':    'bool' } }
> > +
> > +##
> >  # @NetClientOptions
> >  #
> >  # A discriminated record of network device traits.
> > @@ -3284,7 +3300,8 @@
> >      'dump':     'NetdevDumpOptions',
> >      'bridge':   'NetdevBridgeOptions',
> >      'hubport':  'NetdevHubPortOptions',
> > -    'netmap':   'NetdevNetmapOptions' } }
> > +    'netmap':   'NetdevNetmapOptions',
> > +    'vhost-user': 'NetdevVhostUserOptions' } }
> >  
> >  ##
> >  # @NetLegacy
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 5a4eff9..1ad2528 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -1460,6 +1460,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
> >  #ifdef CONFIG_NETMAP
> >      "netmap|"
> >  #endif
> > +    "vhost-user|"
> >      "socket|"
> >      "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
> >  STEXI
> > @@ -1791,6 +1792,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
> >  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
> >  required hub automatically.
> >  
> > +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > +
> > +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> > +be a unix domain socket backed one. The vhost-user uses a specifically defined
> > +protocol to pass vhost ioctl replacement messages to an application on the other
> > +end of the socket. On non-MSIX guests, the feature can be forced with
> > +@var{vhostforce}.
> > +
> > +Example:
> > +@example
> > +qemu -m 512 -object memory-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> > +     -numa node,memdev=mem \
> > +     -chardev socket,path=/path/to/socket \
> > +     -netdev type=vhost-user,id=net0,chardev=chr0 \
> > +     -device virtio-net-pci,netdev=net0
> > +@end example
> > +
> >  @item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
> >  Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
> >  At most @var{len} bytes (64k by default) per packet are stored. The file format is

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [snabb-devel] Re: [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
  2014-06-10 10:50   ` Michael S. Tsirkin
@ 2014-06-10 11:04     ` Nikolay Nikolaev
  0 siblings, 0 replies; 11+ messages in thread
From: Nikolay Nikolaev @ 2014-06-10 11:04 UTC (permalink / raw)
  To: snabb-devel@googlegroups.com
  Cc: Antonios Motakis, Luke Gorrie, VirtualOpenSystems Technical Team,
	qemu-devel

Hello,


On Tue, Jun 10, 2014 at 1:50 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Tue, Jun 10, 2014 at 01:34:13PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 10, 2014 at 01:02:16PM +0300, Nikolay Nikolaev wrote:
> > > The supplied chardev id will be inspected for supported options. Only
> > > a socket backend, with a set path (i.e. a Unix socket) and optionally
> > > the server parameter set, will be allowed. Other options (nowait, telnet)
> > > will make the chardev unusable and the netdev will not be initialised.
> > >
> > > Additional checks for validity:
> > >   - requires `-numa node,memdev=..`
> > >   - requires `-device virtio-net-*`
> > >
> > > The `vhostforce` option is used to force vhost-net when we deal with
> > > non-MSIX guests.
> > >
> > > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > > ---
> > >
> > Applied, thanks!
> > In the future pls just number them 0-18.
> > Also in the future, pls include the changelog here after ---
>
> Please check out and test the vhost branch, if everything is
> in order let me know and I'll merge this upstream.
>
This patchseries requires the NUMA patches (I see v4 is underway). It
is not working from your tree.
I wrote yesterday in reply to your question - memdev is not enough. We
want to enable memory sharing on HUGETLBFS and this is not available
on this branch. In effect vhost-user is compiled but it can not be
used. Probably you were misled by mentioning 'memdev' in the
changelog, while it shoudl have been NUMA.


>
> > >  hw/net/vhost_net.c |    4 ++
> > >  net/hub.c          |    1
> > >  net/net.c          |    3 +
> > >  net/vhost-user.c   |  115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > >  qapi-schema.json   |   19 ++++++++-
> > >  qemu-options.hx    |   18 ++++++++
> > >  7 files changed, 158 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/hmp-commands.hx b/hmp-commands.hx
> > > index 2e462c0..bc9c032 100644
> > > --- a/hmp-commands.hx
> > > +++ b/hmp-commands.hx
> > > @@ -1206,7 +1206,7 @@ ETEXI
> > >      {
> > >          .name       = "host_net_add",
> > >          .args_type  = "device:s,opts:s?",
> > > -        .params     = "tap|user|socket|vde|netmap|dump [options]",
> > > +        .params     = "tap|user|socket|vde|netmap|vhost-user|dump [options]",
> > >          .help       = "add host VLAN client",
> > >          .mhandler.cmd = net_host_device_add,
> > >      },
> > > @@ -1234,7 +1234,7 @@ ETEXI
> > >      {
> > >          .name       = "netdev_add",
> > >          .args_type  = "netdev:O",
> > > -        .params     = "[user|tap|socket|vde|bridge|hubport|netmap],id=str[,prop=value][,...]",
> > > +        .params     = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
> > >          .help       = "add host network device",
> > >          .mhandler.cmd = hmp_netdev_add,
> > >          .command_completion = netdev_add_completion,
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > index 5f06736..7ac7c21 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> > > @@ -15,6 +15,7 @@
> > >
> > >  #include "net/net.h"
> > >  #include "net/tap.h"
> > > +#include "net/vhost-user.h"
> > >
> > >  #include "hw/virtio/virtio-net.h"
> > >  #include "net/vhost_net.h"
> > > @@ -360,6 +361,9 @@ VHostNetState *get_vhost_net(NetClientState *nc)
> > >      case NET_CLIENT_OPTIONS_KIND_TAP:
> > >          vhost_net = tap_get_vhost_net(nc);
> > >          break;
> > > +    case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > > +        vhost_net = vhost_user_get_vhost_net(nc);
> > > +        break;
> > >      default:
> > >          break;
> > >      }
> > > diff --git a/net/hub.c b/net/hub.c
> > > index 33a99c9..7e0f2d6 100644
> > > --- a/net/hub.c
> > > +++ b/net/hub.c
> > > @@ -322,6 +322,7 @@ void net_hub_check_clients(void)
> > >              case NET_CLIENT_OPTIONS_KIND_TAP:
> > >              case NET_CLIENT_OPTIONS_KIND_SOCKET:
> > >              case NET_CLIENT_OPTIONS_KIND_VDE:
> > > +            case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > >                  has_host_dev = 1;
> > >                  break;
> > >              default:
> > > diff --git a/net/net.c b/net/net.c
> > > index 0ff2e40..c91b67b 100644
> > > --- a/net/net.c
> > > +++ b/net/net.c
> > > @@ -786,6 +786,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
> > >          [NET_CLIENT_OPTIONS_KIND_BRIDGE]    = net_init_bridge,
> > >  #endif
> > >          [NET_CLIENT_OPTIONS_KIND_HUBPORT]   = net_init_hubport,
> > > +        [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
> > >  };
> > >
> > >
> > > @@ -819,6 +820,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
> > >          case NET_CLIENT_OPTIONS_KIND_BRIDGE:
> > >  #endif
> > >          case NET_CLIENT_OPTIONS_KIND_HUBPORT:
> > > +        case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > >              break;
> > >
> > >          default:
> > > @@ -907,6 +909,7 @@ static int net_host_check_device(const char *device)
> > >  #ifdef CONFIG_VDE
> > >                                         ,"vde"
> > >  #endif
> > > +                                       ,"vhost-user"
> > >      };
> > >      for (i = 0; i < ARRAY_SIZE(valid_param_list); i++) {
> > >          if (!strncmp(valid_param_list[i], device,
> > > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > > index 4bdd19d..32b78fb 100644
> > > --- a/net/vhost-user.c
> > > +++ b/net/vhost-user.c
> > > @@ -12,6 +12,7 @@
> > >  #include "net/vhost_net.h"
> > >  #include "net/vhost-user.h"
> > >  #include "sysemu/char.h"
> > > +#include "qemu/config-file.h"
> > >  #include "qemu/error-report.h"
> > >
> > >  typedef struct VhostUserState {
> > > @@ -21,9 +22,17 @@ typedef struct VhostUserState {
> > >      VHostNetState *vhost_net;
> > >  } VhostUserState;
> > >
> > > +typedef struct VhostUserChardevProps {
> > > +    bool is_socket;
> > > +    bool is_unix;
> > > +    bool is_server;
> > > +    bool has_unsupported;
> > > +} VhostUserChardevProps;
> > > +
> > >  VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> > >  {
> > >      VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > > +    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> > >      return s->vhost_net;
> > >  }
> > >
> > > @@ -82,7 +91,7 @@ static bool vhost_user_has_ufo(NetClientState *nc)
> > >  }
> > >
> > >  static NetClientInfo net_vhost_user_info = {
> > > -        .type = 0,
> > > +        .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
> > >          .size = sizeof(VhostUserState),
> > >          .cleanup = vhost_user_cleanup,
> > >          .has_vnet_hdr = vhost_user_has_vnet_hdr,
> > > @@ -148,8 +157,108 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
> > >      return 0;
> > >  }
> > >
> > > +static int net_vhost_chardev_opts(const char *name, const char *value,
> > > +                                  void *opaque)
> > > +{
> > > +    VhostUserChardevProps *props = opaque;
> > > +
> > > +    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
> > > +        props->is_socket = true;
> > > +    } else if (strcmp(name, "path") == 0) {
> > > +        props->is_unix = true;
> > > +    } else if (strcmp(name, "server") == 0) {
> > > +        props->is_server = true;
> > > +    } else {
> > > +        error_report("vhost-user does not support a chardev"
> > > +                     " with the following option:\n %s = %s",
> > > +                     name, value);
> > > +        props->has_unsupported = true;
> > > +        return -1;
> > > +    }
> > > +    return 0;
> > > +}
> > > +
> > > +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
> > > +{
> > > +    CharDriverState *chr = qemu_chr_find(opts->chardev);
> > > +    VhostUserChardevProps props;
> > > +
> > > +    if (chr == NULL) {
> > > +        error_report("chardev \"%s\" not found", opts->chardev);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    /* inspect chardev opts */
> > > +    memset(&props, 0, sizeof(props));
> > > +    qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, false);
> > > +
> > > +    if (!props.is_socket || !props.is_unix) {
> > > +        error_report("chardev \"%s\" is not a unix socket",
> > > +                     opts->chardev);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    if (props.has_unsupported) {
> > > +        error_report("chardev \"%s\" has an unsupported option",
> > > +                opts->chardev);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    qemu_chr_fe_claim_no_fail(chr);
> > > +
> > > +    return chr;
> > > +}
> > > +
> > > +static int net_vhost_check_net(QemuOpts *opts, void *opaque)
> > > +{
> > > +    const char *name = opaque;
> > > +    const char *driver, *netdev;
> > > +    const char virtio_name[] = "virtio-net-";
> > > +
> > > +    driver = qemu_opt_get(opts, "driver");
> > > +    netdev = qemu_opt_get(opts, "netdev");
> > > +
> > > +    if (!driver || !netdev) {
> > > +        return 0;
> > > +    }
> > > +
> > > +    if (strcmp(netdev, name) == 0 &&
> > > +        strncmp(driver, virtio_name, strlen(virtio_name)) != 0) {
> > > +        error_report("vhost-user requires frontend driver virtio-net-*");
> > > +        return -1;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > >  int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > > -                   NetClientState *peer)
> > > +                        NetClientState *peer)
> > >  {
> > > -    return net_vhost_user_init(peer, "vhost_user", 0, 0, 0);
> > > +    const NetdevVhostUserOptions *vhost_user_opts;
> > > +    CharDriverState *chr;
> > > +    bool vhostforce;
> > > +
> > > +    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> > > +    vhost_user_opts = opts->vhost_user;
> > > +
> > > +    chr = net_vhost_parse_chardev(vhost_user_opts);
> > > +    if (!chr) {
> > > +        error_report("No suitable chardev found");
> > > +        return -1;
> > > +    }
> > > +
> > > +    /* verify net frontend */
> > > +    if (qemu_opts_foreach(qemu_find_opts("device"), net_vhost_check_net,
> > > +                          (char *)name, true) == -1) {
> > > +        return -1;
> > > +    }
> > > +
> > > +    /* vhost-force for non-MSIX */
> > > +    if (vhost_user_opts->has_vhost_force) {
> > > +        vhostforce = vhost_user_opts->vhost_force;
> > > +    } else {
> > > +        vhostforce = false;
> > > +    }
> > > +
> > > +    return net_vhost_user_init(peer, "vhost_user", name, chr, vhostforce);
> > >  }
> > > diff --git a/qapi-schema.json b/qapi-schema.json
> > > index 7bc33ea..f062ce9 100644
> > > --- a/qapi-schema.json
> > > +++ b/qapi-schema.json
> > > @@ -3267,6 +3267,22 @@
> > >      '*devname':    'str' } }
> > >
> > >  ##
> > > +# @NetdevVhostUserOptions
> > > +#
> > > +# Vhost-user network backend
> > > +#
> > > +# @chardev: name of a unix socket chardev
> > > +#
> > > +# @vhost-force: #optional vhost on for non-MSIX virtio guests (default: false).
> > > +#
> > > +# Since 2.1
> > > +##
> > > +{ 'type': 'NetdevVhostUserOptions',
> > > +  'data': {
> > > +    'chardev':        'str',
> > > +    '*vhost-force':    'bool' } }
> > > +
> > > +##
> > >  # @NetClientOptions
> > >  #
> > >  # A discriminated record of network device traits.
> > > @@ -3284,7 +3300,8 @@
> > >      'dump':     'NetdevDumpOptions',
> > >      'bridge':   'NetdevBridgeOptions',
> > >      'hubport':  'NetdevHubPortOptions',
> > > -    'netmap':   'NetdevNetmapOptions' } }
> > > +    'netmap':   'NetdevNetmapOptions',
> > > +    'vhost-user': 'NetdevVhostUserOptions' } }
> > >
> > >  ##
> > >  # @NetLegacy
> > > diff --git a/qemu-options.hx b/qemu-options.hx
> > > index 5a4eff9..1ad2528 100644
> > > --- a/qemu-options.hx
> > > +++ b/qemu-options.hx
> > > @@ -1460,6 +1460,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
> > >  #ifdef CONFIG_NETMAP
> > >      "netmap|"
> > >  #endif
> > > +    "vhost-user|"
> > >      "socket|"
> > >      "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
> > >  STEXI
> > > @@ -1791,6 +1792,23 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
> > >  netdev.  @code{-net} and @code{-device} with parameter @option{vlan} create the
> > >  required hub automatically.
> > >
> > > +@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
> > > +
> > > +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> > > +be a unix domain socket backed one. The vhost-user uses a specifically defined
> > > +protocol to pass vhost ioctl replacement messages to an application on the other
> > > +end of the socket. On non-MSIX guests, the feature can be forced with
> > > +@var{vhostforce}.
> > > +
> > > +Example:
> > > +@example
> > > +qemu -m 512 -object memory-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
> > > +     -numa node,memdev=mem \
Here we give and example and rely on NUMA,memdev, memory-file and share=on.

> > > +     -chardev socket,path=/path/to/socket \
> > > +     -netdev type=vhost-user,id=net0,chardev=chr0 \
> > > +     -device virtio-net-pci,netdev=net0
> > > +@end example
> > > +
> > >  @item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
> > >  Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
> > >  At most @var{len} bytes (64k by default) per packet are stored. The file format is
>
> --
> You received this message because you are subscribed to the Google Groups "Snabb Switch development" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to snabb-devel+unsubscribe@googlegroups.com.
> To post to this group, send an email to snabb-devel@googlegroups.com.
> Visit this group at http://groups.google.com/group/snabb-devel.

Once again, I'm sorry if you were misled by seeing 'memdev' in the
changelog, it's a confusion on our side.
Please advice how to proceed.

regards,
Nikolay Nikolaev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
  2014-06-10 10:02 [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Nikolay Nikolaev
                   ` (2 preceding siblings ...)
  2014-06-10 10:34 ` [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Michael S. Tsirkin
@ 2014-06-10 12:11 ` Eric Blake
  2014-06-10 13:03   ` Michael S. Tsirkin
  3 siblings, 1 reply; 11+ messages in thread
From: Eric Blake @ 2014-06-10 12:11 UTC (permalink / raw)
  To: Nikolay Nikolaev, snabb-devel, qemu-devel; +Cc: a.motakis, luke, tech, mst

[-- Attachment #1: Type: text/plain, Size: 3358 bytes --]

On 06/10/2014 04:02 AM, Nikolay Nikolaev wrote:
> The supplied chardev id will be inspected for supported options. Only
> a socket backend, with a set path (i.e. a Unix socket) and optionally
> the server parameter set, will be allowed. Other options (nowait, telnet)
> will make the chardev unusable and the netdev will not be initialised.
> 
> Additional checks for validity:
>   - requires `-numa node,memdev=..`
>   - requires `-device virtio-net-*`
> 
> The `vhostforce` option is used to force vhost-net when we deal with
> non-MSIX guests.

Here you call it vhostforce...[1]

>  
> +static int net_vhost_chardev_opts(const char *name, const char *value,
> +                                  void *opaque)
> +{
> +    VhostUserChardevProps *props = opaque;
> +
> +    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
> +        props->is_socket = true;
> +    } else if (strcmp(name, "path") == 0) {
> +        props->is_unix = true;
> +    } else if (strcmp(name, "server") == 0) {
> +        props->is_server = true;
> +    } else {
> +        error_report("vhost-user does not support a chardev"
> +                     " with the following option:\n %s = %s",
> +                     name, value);
> +        props->has_unsupported = true;
> +        return -1;

Here you reported an error...[2]

> +    }
> +    return 0;
> +}
> +
> +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
> +{
> +    CharDriverState *chr = qemu_chr_find(opts->chardev);
> +    VhostUserChardevProps props;
> +
> +    if (chr == NULL) {
> +        error_report("chardev \"%s\" not found", opts->chardev);
> +        return NULL;
> +    }
> +
> +    /* inspect chardev opts */
> +    memset(&props, 0, sizeof(props));
> +    qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, false);
> +
> +    if (!props.is_socket || !props.is_unix) {
> +        error_report("chardev \"%s\" is not a unix socket",
> +                     opts->chardev);
> +        return NULL;
> +    }
> +
> +    if (props.has_unsupported) {
> +        error_report("chardev \"%s\" has an unsupported option",
> +                opts->chardev);
> +        return NULL;

[2]...and again another error.  One report is sufficient.  For that
matter, I highly doubt you even need a has_unsupported member - since
you always error out early before allowing the device to be created, it
is just redundant information and will always be false for any device
that gets past creation without reporting an error.


> +##
> +{ 'type': 'NetdevVhostUserOptions',
> +  'data': {
> +    'chardev':        'str',
> +    '*vhost-force':    'bool' } }

[1]...and here you call it vhost-force...


> +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> +be a unix domain socket backed one. The vhost-user uses a specifically defined
> +protocol to pass vhost ioctl replacement messages to an application on the other
> +end of the socket. On non-MSIX guests, the feature can be forced with
> +@var{vhostforce}.

[1]...yet document it as vhostforce.

If this has already been applied, then you need to submit a followup patch.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
  2014-06-10 12:11 ` [Qemu-devel] " Eric Blake
@ 2014-06-10 13:03   ` Michael S. Tsirkin
  2014-06-10 13:10     ` Eric Blake
  0 siblings, 1 reply; 11+ messages in thread
From: Michael S. Tsirkin @ 2014-06-10 13:03 UTC (permalink / raw)
  To: Eric Blake
  Cc: snabb-devel, qemu-devel, Nikolay Nikolaev, luke, a.motakis, tech

On Tue, Jun 10, 2014 at 06:11:16AM -0600, Eric Blake wrote:
> On 06/10/2014 04:02 AM, Nikolay Nikolaev wrote:
> > The supplied chardev id will be inspected for supported options. Only
> > a socket backend, with a set path (i.e. a Unix socket) and optionally
> > the server parameter set, will be allowed. Other options (nowait, telnet)
> > will make the chardev unusable and the netdev will not be initialised.
> > 
> > Additional checks for validity:
> >   - requires `-numa node,memdev=..`
> >   - requires `-device virtio-net-*`
> > 
> > The `vhostforce` option is used to force vhost-net when we deal with
> > non-MSIX guests.
> 
> Here you call it vhostforce...[1]
> 
> >  
> > +static int net_vhost_chardev_opts(const char *name, const char *value,
> > +                                  void *opaque)
> > +{
> > +    VhostUserChardevProps *props = opaque;
> > +
> > +    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
> > +        props->is_socket = true;
> > +    } else if (strcmp(name, "path") == 0) {
> > +        props->is_unix = true;
> > +    } else if (strcmp(name, "server") == 0) {
> > +        props->is_server = true;
> > +    } else {
> > +        error_report("vhost-user does not support a chardev"
> > +                     " with the following option:\n %s = %s",
> > +                     name, value);
> > +        props->has_unsupported = true;
> > +        return -1;
> 
> Here you reported an error...[2]
> 
> > +    }
> > +    return 0;
> > +}
> > +
> > +static CharDriverState *net_vhost_parse_chardev(const NetdevVhostUserOptions *opts)
> > +{
> > +    CharDriverState *chr = qemu_chr_find(opts->chardev);
> > +    VhostUserChardevProps props;
> > +
> > +    if (chr == NULL) {
> > +        error_report("chardev \"%s\" not found", opts->chardev);
> > +        return NULL;
> > +    }
> > +
> > +    /* inspect chardev opts */
> > +    memset(&props, 0, sizeof(props));
> > +    qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, false);
> > +
> > +    if (!props.is_socket || !props.is_unix) {
> > +        error_report("chardev \"%s\" is not a unix socket",
> > +                     opts->chardev);
> > +        return NULL;
> > +    }
> > +
> > +    if (props.has_unsupported) {
> > +        error_report("chardev \"%s\" has an unsupported option",
> > +                opts->chardev);
> > +        return NULL;
> 
> [2]...and again another error.  One report is sufficient.  For that
> matter, I highly doubt you even need a has_unsupported member - since
> you always error out early before allowing the device to be created, it
> is just redundant information and will always be false for any device
> that gets past creation without reporting an error.
> 
> 
> > +##
> > +{ 'type': 'NetdevVhostUserOptions',
> > +  'data': {
> > +    'chardev':        'str',
> > +    '*vhost-force':    'bool' } }
> 
> [1]...and here you call it vhost-force...
> 

Should be vhostforce, consistent with tap.

> > +Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
> > +be a unix domain socket backed one. The vhost-user uses a specifically defined
> > +protocol to pass vhost ioctl replacement messages to an application on the other
> > +end of the socket. On non-MSIX guests, the feature can be forced with
> > +@var{vhostforce}.
> 
> [1]...yet document it as vhostforce.
> 
> If this has already been applied, then you need to submit a followup patch.


Pls do, use fixup! "original subject" for clarity.
> -- 
> Eric Blake   eblake redhat com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line
  2014-06-10 13:03   ` Michael S. Tsirkin
@ 2014-06-10 13:10     ` Eric Blake
  0 siblings, 0 replies; 11+ messages in thread
From: Eric Blake @ 2014-06-10 13:10 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: snabb-devel, qemu-devel, Nikolay Nikolaev, luke, a.motakis, tech

[-- Attachment #1: Type: text/plain, Size: 1018 bytes --]

On 06/10/2014 07:03 AM, Michael S. Tsirkin wrote:
> On Tue, Jun 10, 2014 at 06:11:16AM -0600, Eric Blake wrote:
>> On 06/10/2014 04:02 AM, Nikolay Nikolaev wrote:
>>> The supplied chardev id will be inspected for supported options. Only
>>> a socket backend, with a set path (i.e. a Unix socket) and optionally
>>> the server parameter set, will be allowed. Other options (nowait, telnet)
>>> will make the chardev unusable and the netdev will not be initialised.
>>>

>>> +##
>>> +{ 'type': 'NetdevVhostUserOptions',
>>> +  'data': {
>>> +    'chardev':        'str',
>>> +    '*vhost-force':    'bool' } }
>>
>> [1]...and here you call it vhost-force...
>>
> 
> Should be vhostforce, consistent with tap.

Oh, good point - no need to make the user have to guess which of two
spellings to use.  Sorry for not noticing that earlier when I was
bikeshedding in my request to add a '-'.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2014-06-10 13:10 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-06-10 10:02 [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Nikolay Nikolaev
2014-06-10 10:02 ` [Qemu-devel] [PATCH v10-fix 16/18] Add vhost-user protocol documentation Nikolay Nikolaev
2014-06-10 10:22   ` Michael S. Tsirkin
2014-06-10 10:03 ` [Qemu-devel] [PATCH v10-fix 18/18] Add qtest for vhost-user Nikolay Nikolaev
2014-06-10 10:49   ` Michael S. Tsirkin
2014-06-10 10:34 ` [Qemu-devel] [PATCH v10-fix 15/18] Add the vhost-user netdev backend to the command line Michael S. Tsirkin
2014-06-10 10:50   ` Michael S. Tsirkin
2014-06-10 11:04     ` [Qemu-devel] [snabb-devel] " Nikolay Nikolaev
2014-06-10 12:11 ` [Qemu-devel] " Eric Blake
2014-06-10 13:03   ` Michael S. Tsirkin
2014-06-10 13:10     ` Eric Blake

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).