* [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
@ 2014-01-09 14:59 ` Antonios Motakis
2014-01-09 16:01 ` Michael S. Tsirkin
2014-01-13 8:30 ` Edgar E. Iglesias
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 2/7] Decouple vhost from kernel interface Antonios Motakis
` (6 subsequent siblings)
7 siblings, 2 replies; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 14:59 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: Peter Maydell, Stefan Hajnoczi, Jan Kiszka, Michael Tokarev,
Alexander Graf, n.nikolaev, Markus Armbruster, Anthony Liguori,
Paolo Bonzini, lukego, Antonios Motakis, tech,
Andreas Färber, Richard Henderson
Extend -mem-path with additional properties:
- prealloc=on|off - default off, same as -mem-prealloc
- share=on|off - default off, memory is mmapped with MAP_SHARED flag
- unlink=on|off - default on, inlink the file after openinng it
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
exec.c | 57 +++++++++++++++++++++++++++++++++++++++++---------
include/exec/cpu-all.h | 3 ---
qemu-options.hx | 10 +++++++--
vl.c | 41 +++++++++++++++++++++++++++++++-----
4 files changed, 91 insertions(+), 20 deletions(-)
diff --git a/exec.c b/exec.c
index 7e49e8e..30f4019 100644
--- a/exec.c
+++ b/exec.c
@@ -957,6 +957,7 @@ void qemu_mutex_unlock_ramlist(void)
#include <sys/vfs.h>
#define HUGETLBFS_MAGIC 0x958458f6
+#define MIN_HUGE_PAGE_SIZE (2*1024*1024)
static long gethugepagesize(const char *path)
{
@@ -972,8 +973,9 @@ static long gethugepagesize(const char *path)
return 0;
}
- if (fs.f_type != HUGETLBFS_MAGIC)
- fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+ if (fs.f_type != HUGETLBFS_MAGIC) {
+ return 0;
+ }
return fs.f_bsize;
}
@@ -994,11 +996,14 @@ static void *file_ram_alloc(RAMBlock *block,
char *c;
void *area;
int fd;
+ int flags;
unsigned long hpagesize;
+ QemuOpts *opts;
+ unsigned int mem_prealloc = 0, mem_share = 0, mem_unlink = 1;
hpagesize = gethugepagesize(path);
if (!hpagesize) {
- return NULL;
+ hpagesize = MIN_HUGE_PAGE_SIZE;
}
if (memory < hpagesize) {
@@ -1010,6 +1015,14 @@ static void *file_ram_alloc(RAMBlock *block,
return NULL;
}
+ /* Fill config options */
+ opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
+ if (opts) {
+ mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
+ mem_share = qemu_opt_get_bool(opts, "share", 0);
+ mem_unlink = qemu_opt_get_bool(opts, "unlink", 1);
+ }
+
/* Make name safe to use with mkstemp by replacing '/' with '_'. */
sanitized_name = g_strdup(block->mr->name);
for (c = sanitized_name; *c != '\0'; c++) {
@@ -1017,20 +1030,28 @@ static void *file_ram_alloc(RAMBlock *block,
*c = '_';
}
- filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
- sanitized_name);
+ filename = g_strdup_printf("%s/qemu_back_mem.%s%s", path, sanitized_name,
+ (mem_unlink) ? ".XXXXXX" : "");
g_free(sanitized_name);
- fd = mkstemp(filename);
+ if (mem_unlink) {
+ fd = mkstemp(filename);
+ } else {
+ fd = open(filename, O_CREAT | O_RDWR | O_EXCL,
+ S_IRWXU | S_IRWXG | S_IRWXO);
+ }
if (fd < 0) {
- perror("unable to create backing store for hugepages");
+ perror("unable to create guest RAM backing store");
g_free(filename);
return NULL;
}
- unlink(filename);
+
+ if (mem_unlink) {
+ unlink(filename);
+ }
g_free(filename);
- memory = (memory+hpagesize-1) & ~(hpagesize-1);
+ memory = (memory + hpagesize - 1) & ~(hpagesize - 1);
/*
* ftruncate is not supported by hugetlbfs in older
@@ -1041,7 +1062,8 @@ static void *file_ram_alloc(RAMBlock *block,
if (ftruncate(fd, memory))
perror("ftruncate");
- area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ flags = mem_share ? MAP_SHARED : MAP_PRIVATE;
+ area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
if (area == MAP_FAILED) {
perror("file_ram_alloc: can't mmap RAM pages");
close(fd);
@@ -1211,11 +1233,18 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr)
{
RAMBlock *block, *new_block;
+ QemuOpts *opts;
+ const char *mem_path = 0;
size = TARGET_PAGE_ALIGN(size);
new_block = g_malloc0(sizeof(*new_block));
new_block->fd = -1;
+ opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
+ if (opts) {
+ mem_path = qemu_opt_get(opts, "path");
+ }
+
/* This assumes the iothread lock is taken here too. */
qemu_mutex_lock_ramlist();
new_block->mr = mr;
@@ -1348,6 +1377,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
ram_addr_t offset;
int flags;
void *area, *vaddr;
+ QemuOpts *opts;
+ unsigned int mem_prealloc = 0;
+
+ /* Fill config options */
+ opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
+ if (opts) {
+ mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
+ }
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
offset = addr - block->offset;
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index b6998f0..4f8e989 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -467,9 +467,6 @@ typedef struct RAMList {
} RAMList;
extern RAMList ram_list;
-extern const char *mem_path;
-extern int mem_prealloc;
-
/* Flags stored in the low bits of the TLB virtual address. These are
defined so that fast path ram access is all zeros. */
/* Zero if TLB entry is valid. */
diff --git a/qemu-options.hx b/qemu-options.hx
index bcfe9ea..0d35c9c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -221,9 +221,15 @@ gigabytes respectively.
ETEXI
DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
- "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
+ "-mem-path [path=]path[,prealloc=on|off][,share=on|off][,unlink=on|off]\n"
+ " provide backing storage for guest RAM\n"
+ " path= a directory path for the backing store\n"
+ " prealloc= preallocate guest memory [default disabled]\n"
+ " share= enable mmap share flag [default disabled]\n"
+ " unlink= enable unlinking the guest RAM files [default enabled]\n",
+ QEMU_ARCH_ALL)
STEXI
-@item -mem-path @var{path}
+@item -mem-path [path=]@var{path}[,prealloc=on|off][,share=on|off][,unlink=on|off]
@findex -mem-path
Allocate guest RAM from a temporarily created file in @var{path}.
ETEXI
diff --git a/vl.c b/vl.c
index 7511e70..4a52e0d 100644
--- a/vl.c
+++ b/vl.c
@@ -187,8 +187,6 @@ DisplayType display_type = DT_DEFAULT;
static int display_remote;
const char* keyboard_layout = NULL;
ram_addr_t ram_size;
-const char *mem_path = NULL;
-int mem_prealloc = 0; /* force preallocation of physical target memory */
int nb_nics;
NICInfo nd_table[MAX_NICS];
int autostart;
@@ -531,6 +529,31 @@ static QemuOptsList qemu_msg_opts = {
},
};
+static QemuOptsList qemu_mem_path_opts = {
+ .name = "mem-path",
+ .implied_opt_name = "path",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_path_opts.head),
+ .desc = {
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "prealloc",
+ .type = QEMU_OPT_BOOL,
+ },
+ {
+ .name = "share",
+ .type = QEMU_OPT_BOOL,
+ },
+ {
+ .name = "unlink",
+ .type = QEMU_OPT_BOOL,
+ },
+ { /* end of list */ }
+ },
+};
+
/**
* Get machine options
*
@@ -2892,6 +2915,7 @@ int main(int argc, char **argv, char **envp)
qemu_add_opts(&qemu_tpmdev_opts);
qemu_add_opts(&qemu_realtime_opts);
qemu_add_opts(&qemu_msg_opts);
+ qemu_add_opts(&qemu_mem_path_opts);
runstate_init();
@@ -3209,11 +3233,18 @@ int main(int argc, char **argv, char **envp)
break;
#endif
case QEMU_OPTION_mempath:
- mem_path = optarg;
+ if (!qemu_opts_parse(qemu_find_opts("mem-path"), optarg, 1)) {
+ exit(1);
+ }
break;
- case QEMU_OPTION_mem_prealloc:
- mem_prealloc = 1;
+ case QEMU_OPTION_mem_prealloc: {
+ QemuOpts *mem_opts = qemu_opts_find(qemu_find_opts("mem-path"),
+ NULL);
+ if (mem_opts) {
+ qemu_opt_set(mem_opts, "prealloc", "on");
+ }
break;
+ }
case QEMU_OPTION_d:
log_mask = optarg;
break;
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties Antonios Motakis
@ 2014-01-09 16:01 ` Michael S. Tsirkin
2014-01-10 11:05 ` Antonios Motakis
2014-01-13 8:30 ` Edgar E. Iglesias
1 sibling, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-09 16:01 UTC (permalink / raw)
To: Antonios Motakis
Cc: Peter Maydell, snabb-devel, Anthony Liguori, Jan Kiszka,
Michael Tokarev, qemu-devel, n.nikolaev, Alexander Graf,
Stefan Hajnoczi, lukego, Paolo Bonzini, tech, Andreas Färber,
Markus Armbruster, Richard Henderson
On Thu, Jan 09, 2014 at 03:59:55PM +0100, Antonios Motakis wrote:
> Extend -mem-path with additional properties:
>
> - prealloc=on|off - default off, same as -mem-prealloc
> - share=on|off - default off, memory is mmapped with MAP_SHARED flag
> - unlink=on|off - default on, inlink the file after openinng it
typos
>
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
> exec.c | 57 +++++++++++++++++++++++++++++++++++++++++---------
> include/exec/cpu-all.h | 3 ---
> qemu-options.hx | 10 +++++++--
> vl.c | 41 +++++++++++++++++++++++++++++++-----
> 4 files changed, 91 insertions(+), 20 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 7e49e8e..30f4019 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -957,6 +957,7 @@ void qemu_mutex_unlock_ramlist(void)
> #include <sys/vfs.h>
>
> #define HUGETLBFS_MAGIC 0x958458f6
> +#define MIN_HUGE_PAGE_SIZE (2*1024*1024)
>
> static long gethugepagesize(const char *path)
> {
> @@ -972,8 +973,9 @@ static long gethugepagesize(const char *path)
> return 0;
> }
>
> - if (fs.f_type != HUGETLBFS_MAGIC)
> - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> + if (fs.f_type != HUGETLBFS_MAGIC) {
> + return 0;
> + }
>
> return fs.f_bsize;
> }
> @@ -994,11 +996,14 @@ static void *file_ram_alloc(RAMBlock *block,
> char *c;
> void *area;
> int fd;
> + int flags;
> unsigned long hpagesize;
> + QemuOpts *opts;
> + unsigned int mem_prealloc = 0, mem_share = 0, mem_unlink = 1;
>
> hpagesize = gethugepagesize(path);
> if (!hpagesize) {
> - return NULL;
> + hpagesize = MIN_HUGE_PAGE_SIZE;
> }
>
> if (memory < hpagesize) {
> @@ -1010,6 +1015,14 @@ static void *file_ram_alloc(RAMBlock *block,
> return NULL;
> }
>
> + /* Fill config options */
> + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> + if (opts) {
> + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> + mem_share = qemu_opt_get_bool(opts, "share", 0);
> + mem_unlink = qemu_opt_get_bool(opts, "unlink", 1);
> + }
> +
> /* Make name safe to use with mkstemp by replacing '/' with '_'. */
> sanitized_name = g_strdup(block->mr->name);
> for (c = sanitized_name; *c != '\0'; c++) {
> @@ -1017,20 +1030,28 @@ static void *file_ram_alloc(RAMBlock *block,
> *c = '_';
> }
>
> - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
> - sanitized_name);
> + filename = g_strdup_printf("%s/qemu_back_mem.%s%s", path, sanitized_name,
> + (mem_unlink) ? ".XXXXXX" : "");
> g_free(sanitized_name);
>
> - fd = mkstemp(filename);
> + if (mem_unlink) {
> + fd = mkstemp(filename);
> + } else {
> + fd = open(filename, O_CREAT | O_RDWR | O_EXCL,
> + S_IRWXU | S_IRWXG | S_IRWXO);
> + }
> if (fd < 0) {
> - perror("unable to create backing store for hugepages");
> + perror("unable to create guest RAM backing store");
> g_free(filename);
> return NULL;
> }
> - unlink(filename);
> +
> + if (mem_unlink) {
> + unlink(filename);
> + }
If we don't unlink, what will?
> g_free(filename);
>
> - memory = (memory+hpagesize-1) & ~(hpagesize-1);
> + memory = (memory + hpagesize - 1) & ~(hpagesize - 1);
>
> /*
> * ftruncate is not supported by hugetlbfs in older
> @@ -1041,7 +1062,8 @@ static void *file_ram_alloc(RAMBlock *block,
> if (ftruncate(fd, memory))
> perror("ftruncate");
>
> - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> + flags = mem_share ? MAP_SHARED : MAP_PRIVATE;
> + area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> if (area == MAP_FAILED) {
> perror("file_ram_alloc: can't mmap RAM pages");
> close(fd);
> @@ -1211,11 +1233,18 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
> MemoryRegion *mr)
> {
> RAMBlock *block, *new_block;
> + QemuOpts *opts;
> + const char *mem_path = 0;
>
> size = TARGET_PAGE_ALIGN(size);
> new_block = g_malloc0(sizeof(*new_block));
> new_block->fd = -1;
>
> + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> + if (opts) {
> + mem_path = qemu_opt_get(opts, "path");
> + }
> +
> /* This assumes the iothread lock is taken here too. */
> qemu_mutex_lock_ramlist();
> new_block->mr = mr;
> @@ -1348,6 +1377,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
> ram_addr_t offset;
> int flags;
> void *area, *vaddr;
> + QemuOpts *opts;
> + unsigned int mem_prealloc = 0;
> +
> + /* Fill config options */
> + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> + if (opts) {
> + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> + }
>
> QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> offset = addr - block->offset;
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index b6998f0..4f8e989 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -467,9 +467,6 @@ typedef struct RAMList {
> } RAMList;
> extern RAMList ram_list;
>
> -extern const char *mem_path;
> -extern int mem_prealloc;
> -
> /* Flags stored in the low bits of the TLB virtual address. These are
> defined so that fast path ram access is all zeros. */
> /* Zero if TLB entry is valid. */
> diff --git a/qemu-options.hx b/qemu-options.hx
> index bcfe9ea..0d35c9c 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -221,9 +221,15 @@ gigabytes respectively.
> ETEXI
>
> DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> - "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
> + "-mem-path [path=]path[,prealloc=on|off][,share=on|off][,unlink=on|off]\n"
> + " provide backing storage for guest RAM\n"
> + " path= a directory path for the backing store\n"
> + " prealloc= preallocate guest memory [default disabled]\n"
> + " share= enable mmap share flag [default disabled]\n"
> + " unlink= enable unlinking the guest RAM files [default enabled]\n",
> + QEMU_ARCH_ALL)
> STEXI
> -@item -mem-path @var{path}
> +@item -mem-path [path=]@var{path}[,prealloc=on|off][,share=on|off][,unlink=on|off]
> @findex -mem-path
> Allocate guest RAM from a temporarily created file in @var{path}.
> ETEXI
> diff --git a/vl.c b/vl.c
> index 7511e70..4a52e0d 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -187,8 +187,6 @@ DisplayType display_type = DT_DEFAULT;
> static int display_remote;
> const char* keyboard_layout = NULL;
> ram_addr_t ram_size;
> -const char *mem_path = NULL;
> -int mem_prealloc = 0; /* force preallocation of physical target memory */
> int nb_nics;
> NICInfo nd_table[MAX_NICS];
> int autostart;
> @@ -531,6 +529,31 @@ static QemuOptsList qemu_msg_opts = {
> },
> };
>
> +static QemuOptsList qemu_mem_path_opts = {
> + .name = "mem-path",
> + .implied_opt_name = "path",
> + .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_path_opts.head),
> + .desc = {
> + {
> + .name = "path",
> + .type = QEMU_OPT_STRING,
> + },
> + {
> + .name = "prealloc",
> + .type = QEMU_OPT_BOOL,
> + },
> + {
> + .name = "share",
> + .type = QEMU_OPT_BOOL,
> + },
> + {
> + .name = "unlink",
> + .type = QEMU_OPT_BOOL,
> + },
> + { /* end of list */ }
> + },
> +};
> +
> /**
> * Get machine options
> *
> @@ -2892,6 +2915,7 @@ int main(int argc, char **argv, char **envp)
> qemu_add_opts(&qemu_tpmdev_opts);
> qemu_add_opts(&qemu_realtime_opts);
> qemu_add_opts(&qemu_msg_opts);
> + qemu_add_opts(&qemu_mem_path_opts);
>
> runstate_init();
>
> @@ -3209,11 +3233,18 @@ int main(int argc, char **argv, char **envp)
> break;
> #endif
> case QEMU_OPTION_mempath:
> - mem_path = optarg;
> + if (!qemu_opts_parse(qemu_find_opts("mem-path"), optarg, 1)) {
> + exit(1);
> + }
> break;
> - case QEMU_OPTION_mem_prealloc:
> - mem_prealloc = 1;
> + case QEMU_OPTION_mem_prealloc: {
> + QemuOpts *mem_opts = qemu_opts_find(qemu_find_opts("mem-path"),
> + NULL);
> + if (mem_opts) {
> + qemu_opt_set(mem_opts, "prealloc", "on");
> + }
> break;
> + }
> case QEMU_OPTION_d:
> log_mask = optarg;
> break;
> --
> 1.8.3.2
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties
2014-01-09 16:01 ` Michael S. Tsirkin
@ 2014-01-10 11:05 ` Antonios Motakis
2014-01-10 12:31 ` Michael S. Tsirkin
0 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-10 11:05 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Peter Maydell, snabb-devel, Anthony Liguori, Jan Kiszka,
Michael Tokarev, qemu-devel qemu-devel, Nikolay Nikolaev,
Alexander Graf, Stefan Hajnoczi, Luke Gorrie, Paolo Bonzini,
VirtualOpenSystems Technical Team, Andreas Färber,
Markus Armbruster, Richard Henderson
[-- Attachment #1: Type: text/plain, Size: 9960 bytes --]
On Thu, Jan 9, 2014 at 5:01 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Jan 09, 2014 at 03:59:55PM +0100, Antonios Motakis wrote:
> > Extend -mem-path with additional properties:
> >
> > - prealloc=on|off - default off, same as -mem-prealloc
> > - share=on|off - default off, memory is mmapped with MAP_SHARED flag
> > - unlink=on|off - default on, inlink the file after openinng it
>
> typos
>
Ack.
>
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> > exec.c | 57
> +++++++++++++++++++++++++++++++++++++++++---------
> > include/exec/cpu-all.h | 3 ---
> > qemu-options.hx | 10 +++++++--
> > vl.c | 41 +++++++++++++++++++++++++++++++-----
> > 4 files changed, 91 insertions(+), 20 deletions(-)
> >
> > diff --git a/exec.c b/exec.c
> > index 7e49e8e..30f4019 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -957,6 +957,7 @@ void qemu_mutex_unlock_ramlist(void)
> > #include <sys/vfs.h>
> >
> > #define HUGETLBFS_MAGIC 0x958458f6
> > +#define MIN_HUGE_PAGE_SIZE (2*1024*1024)
> >
> > static long gethugepagesize(const char *path)
> > {
> > @@ -972,8 +973,9 @@ static long gethugepagesize(const char *path)
> > return 0;
> > }
> >
> > - if (fs.f_type != HUGETLBFS_MAGIC)
> > - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> > + if (fs.f_type != HUGETLBFS_MAGIC) {
> > + return 0;
> > + }
> >
> > return fs.f_bsize;
> > }
> > @@ -994,11 +996,14 @@ static void *file_ram_alloc(RAMBlock *block,
> > char *c;
> > void *area;
> > int fd;
> > + int flags;
> > unsigned long hpagesize;
> > + QemuOpts *opts;
> > + unsigned int mem_prealloc = 0, mem_share = 0, mem_unlink = 1;
> >
> > hpagesize = gethugepagesize(path);
> > if (!hpagesize) {
> > - return NULL;
> > + hpagesize = MIN_HUGE_PAGE_SIZE;
> > }
> >
> > if (memory < hpagesize) {
> > @@ -1010,6 +1015,14 @@ static void *file_ram_alloc(RAMBlock *block,
> > return NULL;
> > }
> >
> > + /* Fill config options */
> > + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> > + if (opts) {
> > + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> > + mem_share = qemu_opt_get_bool(opts, "share", 0);
> > + mem_unlink = qemu_opt_get_bool(opts, "unlink", 1);
> > + }
> > +
> > /* Make name safe to use with mkstemp by replacing '/' with '_'. */
> > sanitized_name = g_strdup(block->mr->name);
> > for (c = sanitized_name; *c != '\0'; c++) {
> > @@ -1017,20 +1030,28 @@ static void *file_ram_alloc(RAMBlock *block,
> > *c = '_';
> > }
> >
> > - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
> > - sanitized_name);
> > + filename = g_strdup_printf("%s/qemu_back_mem.%s%s", path,
> sanitized_name,
> > + (mem_unlink) ? ".XXXXXX" : "");
> > g_free(sanitized_name);
> >
> > - fd = mkstemp(filename);
> > + if (mem_unlink) {
> > + fd = mkstemp(filename);
> > + } else {
> > + fd = open(filename, O_CREAT | O_RDWR | O_EXCL,
> > + S_IRWXU | S_IRWXG | S_IRWXO);
> > + }
> > if (fd < 0) {
> > - perror("unable to create backing store for hugepages");
> > + perror("unable to create guest RAM backing store");
> > g_free(filename);
> > return NULL;
> > }
> > - unlink(filename);
> > +
> > + if (mem_unlink) {
> > + unlink(filename);
> > + }
>
> If we don't unlink, what will?
>
The option to not unlink the file was added based on feedback on these
mailing lists:
http://lists.nongnu.org/archive/html/qemu-devel/2013-12/msg02769.html
>From our side we don't strictly need it, and we can remove it if it is
preferred that way.
>
> > g_free(filename);
> >
> > - memory = (memory+hpagesize-1) & ~(hpagesize-1);
> > + memory = (memory + hpagesize - 1) & ~(hpagesize - 1);
> >
> > /*
> > * ftruncate is not supported by hugetlbfs in older
> > @@ -1041,7 +1062,8 @@ static void *file_ram_alloc(RAMBlock *block,
> > if (ftruncate(fd, memory))
> > perror("ftruncate");
> >
> > - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> > + flags = mem_share ? MAP_SHARED : MAP_PRIVATE;
> > + area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> > if (area == MAP_FAILED) {
> > perror("file_ram_alloc: can't mmap RAM pages");
> > close(fd);
> > @@ -1211,11 +1233,18 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t
> size, void *host,
> > MemoryRegion *mr)
> > {
> > RAMBlock *block, *new_block;
> > + QemuOpts *opts;
> > + const char *mem_path = 0;
> >
> > size = TARGET_PAGE_ALIGN(size);
> > new_block = g_malloc0(sizeof(*new_block));
> > new_block->fd = -1;
> >
> > + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> > + if (opts) {
> > + mem_path = qemu_opt_get(opts, "path");
> > + }
> > +
> > /* This assumes the iothread lock is taken here too. */
> > qemu_mutex_lock_ramlist();
> > new_block->mr = mr;
> > @@ -1348,6 +1377,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t
> length)
> > ram_addr_t offset;
> > int flags;
> > void *area, *vaddr;
> > + QemuOpts *opts;
> > + unsigned int mem_prealloc = 0;
> > +
> > + /* Fill config options */
> > + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> > + if (opts) {
> > + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> > + }
> >
> > QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > offset = addr - block->offset;
> > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> > index b6998f0..4f8e989 100644
> > --- a/include/exec/cpu-all.h
> > +++ b/include/exec/cpu-all.h
> > @@ -467,9 +467,6 @@ typedef struct RAMList {
> > } RAMList;
> > extern RAMList ram_list;
> >
> > -extern const char *mem_path;
> > -extern int mem_prealloc;
> > -
> > /* Flags stored in the low bits of the TLB virtual address. These are
> > defined so that fast path ram access is all zeros. */
> > /* Zero if TLB entry is valid. */
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index bcfe9ea..0d35c9c 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -221,9 +221,15 @@ gigabytes respectively.
> > ETEXI
> >
> > DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> > - "-mem-path FILE provide backing storage for guest RAM\n",
> QEMU_ARCH_ALL)
> > + "-mem-path
> [path=]path[,prealloc=on|off][,share=on|off][,unlink=on|off]\n"
> > + " provide backing storage for guest RAM\n"
> > + " path= a directory path for the backing store\n"
> > + " prealloc= preallocate guest memory [default
> disabled]\n"
> > + " share= enable mmap share flag [default disabled]\n"
> > + " unlink= enable unlinking the guest RAM files
> [default enabled]\n",
> > + QEMU_ARCH_ALL)
> > STEXI
> > -@item -mem-path @var{path}
> > +@item -mem-path
> [path=]@var{path}[,prealloc=on|off][,share=on|off][,unlink=on|off]
> > @findex -mem-path
> > Allocate guest RAM from a temporarily created file in @var{path}.
> > ETEXI
> > diff --git a/vl.c b/vl.c
> > index 7511e70..4a52e0d 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -187,8 +187,6 @@ DisplayType display_type = DT_DEFAULT;
> > static int display_remote;
> > const char* keyboard_layout = NULL;
> > ram_addr_t ram_size;
> > -const char *mem_path = NULL;
> > -int mem_prealloc = 0; /* force preallocation of physical target memory
> */
> > int nb_nics;
> > NICInfo nd_table[MAX_NICS];
> > int autostart;
> > @@ -531,6 +529,31 @@ static QemuOptsList qemu_msg_opts = {
> > },
> > };
> >
> > +static QemuOptsList qemu_mem_path_opts = {
> > + .name = "mem-path",
> > + .implied_opt_name = "path",
> > + .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_path_opts.head),
> > + .desc = {
> > + {
> > + .name = "path",
> > + .type = QEMU_OPT_STRING,
> > + },
> > + {
> > + .name = "prealloc",
> > + .type = QEMU_OPT_BOOL,
> > + },
> > + {
> > + .name = "share",
> > + .type = QEMU_OPT_BOOL,
> > + },
> > + {
> > + .name = "unlink",
> > + .type = QEMU_OPT_BOOL,
> > + },
> > + { /* end of list */ }
> > + },
> > +};
> > +
> > /**
> > * Get machine options
> > *
> > @@ -2892,6 +2915,7 @@ int main(int argc, char **argv, char **envp)
> > qemu_add_opts(&qemu_tpmdev_opts);
> > qemu_add_opts(&qemu_realtime_opts);
> > qemu_add_opts(&qemu_msg_opts);
> > + qemu_add_opts(&qemu_mem_path_opts);
> >
> > runstate_init();
> >
> > @@ -3209,11 +3233,18 @@ int main(int argc, char **argv, char **envp)
> > break;
> > #endif
> > case QEMU_OPTION_mempath:
> > - mem_path = optarg;
> > + if (!qemu_opts_parse(qemu_find_opts("mem-path"),
> optarg, 1)) {
> > + exit(1);
> > + }
> > break;
> > - case QEMU_OPTION_mem_prealloc:
> > - mem_prealloc = 1;
> > + case QEMU_OPTION_mem_prealloc: {
> > + QemuOpts *mem_opts =
> qemu_opts_find(qemu_find_opts("mem-path"),
> > + NULL);
> > + if (mem_opts) {
> > + qemu_opt_set(mem_opts, "prealloc", "on");
> > + }
> > break;
> > + }
> > case QEMU_OPTION_d:
> > log_mask = optarg;
> > break;
> > --
> > 1.8.3.2
> >
>
[-- Attachment #2: Type: text/html, Size: 12974 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties
2014-01-10 11:05 ` Antonios Motakis
@ 2014-01-10 12:31 ` Michael S. Tsirkin
0 siblings, 0 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-10 12:31 UTC (permalink / raw)
To: Antonios Motakis
Cc: Peter Maydell, snabb-devel, Anthony Liguori, Jan Kiszka,
Michael Tokarev, qemu-devel qemu-devel, Nikolay Nikolaev,
Alexander Graf, Stefan Hajnoczi, Luke Gorrie, Paolo Bonzini,
VirtualOpenSystems Technical Team, Andreas Färber,
Markus Armbruster, Richard Henderson
On Fri, Jan 10, 2014 at 12:05:57PM +0100, Antonios Motakis wrote:
>
>
>
> On Thu, Jan 9, 2014 at 5:01 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Jan 09, 2014 at 03:59:55PM +0100, Antonios Motakis wrote:
> > Extend -mem-path with additional properties:
> >
> > - prealloc=on|off - default off, same as -mem-prealloc
> > - share=on|off - default off, memory is mmapped with MAP_SHARED flag
> > - unlink=on|off - default on, inlink the file after openinng it
>
> typos
>
>
> Ack.
>
>
>
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> > exec.c | 57
> +++++++++++++++++++++++++++++++++++++++++---------
> > include/exec/cpu-all.h | 3 ---
> > qemu-options.hx | 10 +++++++--
> > vl.c | 41 +++++++++++++++++++++++++++++++-----
> > 4 files changed, 91 insertions(+), 20 deletions(-)
> >
> > diff --git a/exec.c b/exec.c
> > index 7e49e8e..30f4019 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -957,6 +957,7 @@ void qemu_mutex_unlock_ramlist(void)
> > #include <sys/vfs.h>
> >
> > #define HUGETLBFS_MAGIC 0x958458f6
> > +#define MIN_HUGE_PAGE_SIZE (2*1024*1024)
> >
> > static long gethugepagesize(const char *path)
> > {
> > @@ -972,8 +973,9 @@ static long gethugepagesize(const char *path)
> > return 0;
> > }
> >
> > - if (fs.f_type != HUGETLBFS_MAGIC)
> > - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> > + if (fs.f_type != HUGETLBFS_MAGIC) {
> > + return 0;
> > + }
> >
> > return fs.f_bsize;
> > }
> > @@ -994,11 +996,14 @@ static void *file_ram_alloc(RAMBlock *block,
> > char *c;
> > void *area;
> > int fd;
> > + int flags;
> > unsigned long hpagesize;
> > + QemuOpts *opts;
> > + unsigned int mem_prealloc = 0, mem_share = 0, mem_unlink = 1;
> >
> > hpagesize = gethugepagesize(path);
> > if (!hpagesize) {
> > - return NULL;
> > + hpagesize = MIN_HUGE_PAGE_SIZE;
> > }
> >
> > if (memory < hpagesize) {
> > @@ -1010,6 +1015,14 @@ static void *file_ram_alloc(RAMBlock *block,
> > return NULL;
> > }
> >
> > + /* Fill config options */
> > + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> > + if (opts) {
> > + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> > + mem_share = qemu_opt_get_bool(opts, "share", 0);
> > + mem_unlink = qemu_opt_get_bool(opts, "unlink", 1);
> > + }
> > +
> > /* Make name safe to use with mkstemp by replacing '/' with '_'. */
> > sanitized_name = g_strdup(block->mr->name);
> > for (c = sanitized_name; *c != '\0'; c++) {
> > @@ -1017,20 +1030,28 @@ static void *file_ram_alloc(RAMBlock *block,
> > *c = '_';
> > }
> >
> > - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
> > - sanitized_name);
> > + filename = g_strdup_printf("%s/qemu_back_mem.%s%s", path,
> sanitized_name,
> > + (mem_unlink) ? ".XXXXXX" : "");
> > g_free(sanitized_name);
> >
> > - fd = mkstemp(filename);
> > + if (mem_unlink) {
> > + fd = mkstemp(filename);
> > + } else {
> > + fd = open(filename, O_CREAT | O_RDWR | O_EXCL,
> > + S_IRWXU | S_IRWXG | S_IRWXO);
> > + }
> > if (fd < 0) {
> > - perror("unable to create backing store for hugepages");
> > + perror("unable to create guest RAM backing store");
> > g_free(filename);
> > return NULL;
> > }
> > - unlink(filename);
> > +
> > + if (mem_unlink) {
> > + unlink(filename);
> > + }
>
> If we don't unlink, what will?
>
>
> The option to not unlink the file was added based on feedback on these mailing
> lists: http://lists.nongnu.org/archive/html/qemu-devel/2013-12/msg02769.html
>
> From our side we don't strictly need it, and we can remove it if it is
> preferred that way.
Maybe split this out as a separate patch.
>
>
> > g_free(filename);
> >
> > - memory = (memory+hpagesize-1) & ~(hpagesize-1);
> > + memory = (memory + hpagesize - 1) & ~(hpagesize - 1);
> >
> > /*
> > * ftruncate is not supported by hugetlbfs in older
> > @@ -1041,7 +1062,8 @@ static void *file_ram_alloc(RAMBlock *block,
> > if (ftruncate(fd, memory))
> > perror("ftruncate");
> >
> > - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> > + flags = mem_share ? MAP_SHARED : MAP_PRIVATE;
> > + area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> > if (area == MAP_FAILED) {
> > perror("file_ram_alloc: can't mmap RAM pages");
> > close(fd);
> > @@ -1211,11 +1233,18 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t
> size, void *host,
> > MemoryRegion *mr)
> > {
> > RAMBlock *block, *new_block;
> > + QemuOpts *opts;
> > + const char *mem_path = 0;
> >
> > size = TARGET_PAGE_ALIGN(size);
> > new_block = g_malloc0(sizeof(*new_block));
> > new_block->fd = -1;
> >
> > + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> > + if (opts) {
> > + mem_path = qemu_opt_get(opts, "path");
> > + }
> > +
> > /* This assumes the iothread lock is taken here too. */
> > qemu_mutex_lock_ramlist();
> > new_block->mr = mr;
> > @@ -1348,6 +1377,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t
> length)
> > ram_addr_t offset;
> > int flags;
> > void *area, *vaddr;
> > + QemuOpts *opts;
> > + unsigned int mem_prealloc = 0;
> > +
> > + /* Fill config options */
> > + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> > + if (opts) {
> > + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> > + }
> >
> > QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > offset = addr - block->offset;
> > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> > index b6998f0..4f8e989 100644
> > --- a/include/exec/cpu-all.h
> > +++ b/include/exec/cpu-all.h
> > @@ -467,9 +467,6 @@ typedef struct RAMList {
> > } RAMList;
> > extern RAMList ram_list;
> >
> > -extern const char *mem_path;
> > -extern int mem_prealloc;
> > -
> > /* Flags stored in the low bits of the TLB virtual address. These are
> > defined so that fast path ram access is all zeros. */
> > /* Zero if TLB entry is valid. */
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index bcfe9ea..0d35c9c 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -221,9 +221,15 @@ gigabytes respectively.
> > ETEXI
> >
> > DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> > - "-mem-path FILE provide backing storage for guest RAM\n",
> QEMU_ARCH_ALL)
> > + "-mem-path [path=]path[,prealloc=on|off][,share=on|off][,unlink=on|
> off]\n"
> > + " provide backing storage for guest RAM\n"
> > + " path= a directory path for the backing store\n"
> > + " prealloc= preallocate guest memory [default
> disabled]\n"
> > + " share= enable mmap share flag [default disabled]\n"
> > + " unlink= enable unlinking the guest RAM files
> [default enabled]\n",
> > + QEMU_ARCH_ALL)
> > STEXI
> > -@item -mem-path @var{path}
> > +@item -mem-path [path=]@var{path}[,prealloc=on|off][,share=on|off]
> [,unlink=on|off]
> > @findex -mem-path
> > Allocate guest RAM from a temporarily created file in @var{path}.
> > ETEXI
> > diff --git a/vl.c b/vl.c
> > index 7511e70..4a52e0d 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -187,8 +187,6 @@ DisplayType display_type = DT_DEFAULT;
> > static int display_remote;
> > const char* keyboard_layout = NULL;
> > ram_addr_t ram_size;
> > -const char *mem_path = NULL;
> > -int mem_prealloc = 0; /* force preallocation of physical target memory *
> /
> > int nb_nics;
> > NICInfo nd_table[MAX_NICS];
> > int autostart;
> > @@ -531,6 +529,31 @@ static QemuOptsList qemu_msg_opts = {
> > },
> > };
> >
> > +static QemuOptsList qemu_mem_path_opts = {
> > + .name = "mem-path",
> > + .implied_opt_name = "path",
> > + .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_path_opts.head),
> > + .desc = {
> > + {
> > + .name = "path",
> > + .type = QEMU_OPT_STRING,
> > + },
> > + {
> > + .name = "prealloc",
> > + .type = QEMU_OPT_BOOL,
> > + },
> > + {
> > + .name = "share",
> > + .type = QEMU_OPT_BOOL,
> > + },
> > + {
> > + .name = "unlink",
> > + .type = QEMU_OPT_BOOL,
> > + },
> > + { /* end of list */ }
> > + },
> > +};
> > +
> > /**
> > * Get machine options
> > *
> > @@ -2892,6 +2915,7 @@ int main(int argc, char **argv, char **envp)
> > qemu_add_opts(&qemu_tpmdev_opts);
> > qemu_add_opts(&qemu_realtime_opts);
> > qemu_add_opts(&qemu_msg_opts);
> > + qemu_add_opts(&qemu_mem_path_opts);
> >
> > runstate_init();
> >
> > @@ -3209,11 +3233,18 @@ int main(int argc, char **argv, char **envp)
> > break;
> > #endif
> > case QEMU_OPTION_mempath:
> > - mem_path = optarg;
> > + if (!qemu_opts_parse(qemu_find_opts("mem-path"), optarg,
> 1)) {
> > + exit(1);
> > + }
> > break;
> > - case QEMU_OPTION_mem_prealloc:
> > - mem_prealloc = 1;
> > + case QEMU_OPTION_mem_prealloc: {
> > + QemuOpts *mem_opts = qemu_opts_find(qemu_find_opts
> ("mem-path"),
> > + NULL);
> > + if (mem_opts) {
> > + qemu_opt_set(mem_opts, "prealloc", "on");
> > + }
> > break;
> > + }
> > case QEMU_OPTION_d:
> > log_mask = optarg;
> > break;
> > --
> > 1.8.3.2
> >
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties Antonios Motakis
2014-01-09 16:01 ` Michael S. Tsirkin
@ 2014-01-13 8:30 ` Edgar E. Iglesias
1 sibling, 0 replies; 24+ messages in thread
From: Edgar E. Iglesias @ 2014-01-13 8:30 UTC (permalink / raw)
To: Antonios Motakis
Cc: Peter Maydell, snabb-devel, Anthony Liguori, Jan Kiszka,
Michael Tokarev, qemu-devel, n.nikolaev, Alexander Graf,
Stefan Hajnoczi, lukego, Paolo Bonzini, tech, Andreas Färber,
Markus Armbruster, Richard Henderson
On Thu, Jan 09, 2014 at 03:59:55PM +0100, Antonios Motakis wrote:
> Extend -mem-path with additional properties:
>
> - prealloc=on|off - default off, same as -mem-prealloc
> - share=on|off - default off, memory is mmapped with MAP_SHARED flag
> - unlink=on|off - default on, inlink the file after openinng it
>
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
I like this approach and think it will work fine for my use-case, thanks.
Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com>
> ---
> exec.c | 57 +++++++++++++++++++++++++++++++++++++++++---------
> include/exec/cpu-all.h | 3 ---
> qemu-options.hx | 10 +++++++--
> vl.c | 41 +++++++++++++++++++++++++++++++-----
> 4 files changed, 91 insertions(+), 20 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 7e49e8e..30f4019 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -957,6 +957,7 @@ void qemu_mutex_unlock_ramlist(void)
> #include <sys/vfs.h>
>
> #define HUGETLBFS_MAGIC 0x958458f6
> +#define MIN_HUGE_PAGE_SIZE (2*1024*1024)
>
> static long gethugepagesize(const char *path)
> {
> @@ -972,8 +973,9 @@ static long gethugepagesize(const char *path)
> return 0;
> }
>
> - if (fs.f_type != HUGETLBFS_MAGIC)
> - fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> + if (fs.f_type != HUGETLBFS_MAGIC) {
> + return 0;
> + }
>
> return fs.f_bsize;
> }
> @@ -994,11 +996,14 @@ static void *file_ram_alloc(RAMBlock *block,
> char *c;
> void *area;
> int fd;
> + int flags;
> unsigned long hpagesize;
> + QemuOpts *opts;
> + unsigned int mem_prealloc = 0, mem_share = 0, mem_unlink = 1;
>
> hpagesize = gethugepagesize(path);
> if (!hpagesize) {
> - return NULL;
> + hpagesize = MIN_HUGE_PAGE_SIZE;
> }
>
> if (memory < hpagesize) {
> @@ -1010,6 +1015,14 @@ static void *file_ram_alloc(RAMBlock *block,
> return NULL;
> }
>
> + /* Fill config options */
> + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> + if (opts) {
> + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> + mem_share = qemu_opt_get_bool(opts, "share", 0);
> + mem_unlink = qemu_opt_get_bool(opts, "unlink", 1);
> + }
> +
> /* Make name safe to use with mkstemp by replacing '/' with '_'. */
> sanitized_name = g_strdup(block->mr->name);
> for (c = sanitized_name; *c != '\0'; c++) {
> @@ -1017,20 +1030,28 @@ static void *file_ram_alloc(RAMBlock *block,
> *c = '_';
> }
>
> - filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
> - sanitized_name);
> + filename = g_strdup_printf("%s/qemu_back_mem.%s%s", path, sanitized_name,
> + (mem_unlink) ? ".XXXXXX" : "");
> g_free(sanitized_name);
>
> - fd = mkstemp(filename);
> + if (mem_unlink) {
> + fd = mkstemp(filename);
> + } else {
> + fd = open(filename, O_CREAT | O_RDWR | O_EXCL,
> + S_IRWXU | S_IRWXG | S_IRWXO);
> + }
> if (fd < 0) {
> - perror("unable to create backing store for hugepages");
> + perror("unable to create guest RAM backing store");
> g_free(filename);
> return NULL;
> }
> - unlink(filename);
> +
> + if (mem_unlink) {
> + unlink(filename);
> + }
> g_free(filename);
>
> - memory = (memory+hpagesize-1) & ~(hpagesize-1);
> + memory = (memory + hpagesize - 1) & ~(hpagesize - 1);
>
> /*
> * ftruncate is not supported by hugetlbfs in older
> @@ -1041,7 +1062,8 @@ static void *file_ram_alloc(RAMBlock *block,
> if (ftruncate(fd, memory))
> perror("ftruncate");
>
> - area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
> + flags = mem_share ? MAP_SHARED : MAP_PRIVATE;
> + area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
> if (area == MAP_FAILED) {
> perror("file_ram_alloc: can't mmap RAM pages");
> close(fd);
> @@ -1211,11 +1233,18 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
> MemoryRegion *mr)
> {
> RAMBlock *block, *new_block;
> + QemuOpts *opts;
> + const char *mem_path = 0;
>
> size = TARGET_PAGE_ALIGN(size);
> new_block = g_malloc0(sizeof(*new_block));
> new_block->fd = -1;
>
> + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> + if (opts) {
> + mem_path = qemu_opt_get(opts, "path");
> + }
> +
> /* This assumes the iothread lock is taken here too. */
> qemu_mutex_lock_ramlist();
> new_block->mr = mr;
> @@ -1348,6 +1377,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
> ram_addr_t offset;
> int flags;
> void *area, *vaddr;
> + QemuOpts *opts;
> + unsigned int mem_prealloc = 0;
> +
> + /* Fill config options */
> + opts = qemu_opts_find(qemu_find_opts("mem-path"), NULL);
> + if (opts) {
> + mem_prealloc = qemu_opt_get_bool(opts, "prealloc", 0);
> + }
>
> QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> offset = addr - block->offset;
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index b6998f0..4f8e989 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -467,9 +467,6 @@ typedef struct RAMList {
> } RAMList;
> extern RAMList ram_list;
>
> -extern const char *mem_path;
> -extern int mem_prealloc;
> -
> /* Flags stored in the low bits of the TLB virtual address. These are
> defined so that fast path ram access is all zeros. */
> /* Zero if TLB entry is valid. */
> diff --git a/qemu-options.hx b/qemu-options.hx
> index bcfe9ea..0d35c9c 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -221,9 +221,15 @@ gigabytes respectively.
> ETEXI
>
> DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> - "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
> + "-mem-path [path=]path[,prealloc=on|off][,share=on|off][,unlink=on|off]\n"
> + " provide backing storage for guest RAM\n"
> + " path= a directory path for the backing store\n"
> + " prealloc= preallocate guest memory [default disabled]\n"
> + " share= enable mmap share flag [default disabled]\n"
> + " unlink= enable unlinking the guest RAM files [default enabled]\n",
> + QEMU_ARCH_ALL)
> STEXI
> -@item -mem-path @var{path}
> +@item -mem-path [path=]@var{path}[,prealloc=on|off][,share=on|off][,unlink=on|off]
> @findex -mem-path
> Allocate guest RAM from a temporarily created file in @var{path}.
> ETEXI
> diff --git a/vl.c b/vl.c
> index 7511e70..4a52e0d 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -187,8 +187,6 @@ DisplayType display_type = DT_DEFAULT;
> static int display_remote;
> const char* keyboard_layout = NULL;
> ram_addr_t ram_size;
> -const char *mem_path = NULL;
> -int mem_prealloc = 0; /* force preallocation of physical target memory */
> int nb_nics;
> NICInfo nd_table[MAX_NICS];
> int autostart;
> @@ -531,6 +529,31 @@ static QemuOptsList qemu_msg_opts = {
> },
> };
>
> +static QemuOptsList qemu_mem_path_opts = {
> + .name = "mem-path",
> + .implied_opt_name = "path",
> + .head = QTAILQ_HEAD_INITIALIZER(qemu_mem_path_opts.head),
> + .desc = {
> + {
> + .name = "path",
> + .type = QEMU_OPT_STRING,
> + },
> + {
> + .name = "prealloc",
> + .type = QEMU_OPT_BOOL,
> + },
> + {
> + .name = "share",
> + .type = QEMU_OPT_BOOL,
> + },
> + {
> + .name = "unlink",
> + .type = QEMU_OPT_BOOL,
> + },
> + { /* end of list */ }
> + },
> +};
> +
> /**
> * Get machine options
> *
> @@ -2892,6 +2915,7 @@ int main(int argc, char **argv, char **envp)
> qemu_add_opts(&qemu_tpmdev_opts);
> qemu_add_opts(&qemu_realtime_opts);
> qemu_add_opts(&qemu_msg_opts);
> + qemu_add_opts(&qemu_mem_path_opts);
>
> runstate_init();
>
> @@ -3209,11 +3233,18 @@ int main(int argc, char **argv, char **envp)
> break;
> #endif
> case QEMU_OPTION_mempath:
> - mem_path = optarg;
> + if (!qemu_opts_parse(qemu_find_opts("mem-path"), optarg, 1)) {
> + exit(1);
> + }
> break;
> - case QEMU_OPTION_mem_prealloc:
> - mem_prealloc = 1;
> + case QEMU_OPTION_mem_prealloc: {
> + QemuOpts *mem_opts = qemu_opts_find(qemu_find_opts("mem-path"),
> + NULL);
> + if (mem_opts) {
> + qemu_opt_set(mem_opts, "prealloc", "on");
> + }
> break;
> + }
> case QEMU_OPTION_d:
> log_mask = optarg;
> break;
> --
> 1.8.3.2
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH v5 2/7] Decouple vhost from kernel interface
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties Antonios Motakis
@ 2014-01-09 14:59 ` Antonios Motakis
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 3/7] Add vhost-user skeleton Antonios Motakis
` (5 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 14:59 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: Peter Maydell, Michael S. Tsirkin, n.nikolaev, Anthony Liguori,
Paolo Bonzini, lukego, Antonios Motakis, tech, KONRAD Frederic
We introduce the concept of vhost-backend, which can be either vhost-kernel
or vhost-user. The existing vhost interface to the kernel is abstracted
behind the vhost-kernel backend.
We replace all direct ioctls to the kernel with a vhost_call to the backend.
vhost dev->control is referenced only in the vhost-backend (ioctl, open, close).
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
hw/net/vhost_net.c | 13 +++++---
hw/scsi/vhost-scsi.c | 13 +++++---
hw/virtio/Makefile.objs | 2 +-
hw/virtio/vhost-backend.c | 65 +++++++++++++++++++++++++++++++++++++++
hw/virtio/vhost.c | 47 +++++++++++++++-------------
include/hw/virtio/vhost-backend.h | 37 ++++++++++++++++++++++
include/hw/virtio/vhost.h | 4 ++-
7 files changed, 148 insertions(+), 33 deletions(-)
create mode 100644 hw/virtio/vhost-backend.c
create mode 100644 include/hw/virtio/vhost-backend.h
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 006576d..4aaf0b4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -27,7 +27,6 @@
#include <sys/socket.h>
#include <linux/kvm.h>
#include <fcntl.h>
-#include <sys/ioctl.h>
#include <linux/virtio_ring.h>
#include <netpacket/packet.h>
#include <net/ethernet.h>
@@ -113,7 +112,8 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
- r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
+ r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net",
+ VHOST_BACKEND_TYPE_KERNEL, force);
if (r < 0) {
goto fail;
}
@@ -170,7 +170,8 @@ static int vhost_net_start_one(struct vhost_net *net,
qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
file.fd = net->backend;
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
if (r < 0) {
r = -errno;
goto fail;
@@ -180,7 +181,8 @@ static int vhost_net_start_one(struct vhost_net *net,
fail:
file.fd = -1;
while (file.index-- > 0) {
- int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
assert(r >= 0);
}
net->nc->info->poll(net->nc, true);
@@ -201,7 +203,8 @@ static void vhost_net_stop_one(struct vhost_net *net,
}
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
assert(r >= 0);
}
net->nc->info->poll(net->nc, true);
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 3983a5b..3faff65 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -27,12 +27,13 @@
static int vhost_scsi_set_endpoint(VHostSCSI *s)
{
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+ const VhostOps *vhost_ops = s->dev.vhost_ops;
struct vhost_scsi_target backend;
int ret;
memset(&backend, 0, sizeof(backend));
pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn);
- ret = ioctl(s->dev.control, VHOST_SCSI_SET_ENDPOINT, &backend);
+ ret = vhost_ops->vhost_call(&s->dev, VHOST_SCSI_SET_ENDPOINT, &backend);
if (ret < 0) {
return -errno;
}
@@ -43,10 +44,11 @@ static void vhost_scsi_clear_endpoint(VHostSCSI *s)
{
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
struct vhost_scsi_target backend;
+ const VhostOps *vhost_ops = s->dev.vhost_ops;
memset(&backend, 0, sizeof(backend));
pstrcpy(backend.vhost_wwpn, sizeof(backend.vhost_wwpn), vs->conf.wwpn);
- ioctl(s->dev.control, VHOST_SCSI_CLEAR_ENDPOINT, &backend);
+ vhost_ops->vhost_call(&s->dev, VHOST_SCSI_CLEAR_ENDPOINT, &backend);
}
static int vhost_scsi_start(VHostSCSI *s)
@@ -55,13 +57,15 @@ static int vhost_scsi_start(VHostSCSI *s)
VirtIODevice *vdev = VIRTIO_DEVICE(s);
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+ const VhostOps *vhost_ops = s->dev.vhost_ops;
if (!k->set_guest_notifiers) {
error_report("binding does not support guest notifiers");
return -ENOSYS;
}
- ret = ioctl(s->dev.control, VHOST_SCSI_GET_ABI_VERSION, &abi_version);
+ ret = vhost_ops->vhost_call(&s->dev,
+ VHOST_SCSI_GET_ABI_VERSION, &abi_version);
if (ret < 0) {
return -errno;
}
@@ -227,7 +231,8 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
s->dev.vq_index = 0;
- ret = vhost_dev_init(&s->dev, vhostfd, "/dev/vhost-scsi", true);
+ ret = vhost_dev_init(&s->dev, vhostfd, "/dev/vhost-scsi",
+ VHOST_BACKEND_TYPE_KERNEL, true);
if (ret < 0) {
error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
strerror(-ret));
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 1ba53d9..51e5bdb 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -5,4 +5,4 @@ common-obj-y += virtio-mmio.o
common-obj-$(CONFIG_VIRTIO_BLK_DATA_PLANE) += dataplane/
obj-y += virtio.o virtio-balloon.o
-obj-$(CONFIG_LINUX) += vhost.o
+obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
new file mode 100644
index 0000000..c0f11e4
--- /dev/null
+++ b/hw/virtio/vhost-backend.c
@@ -0,0 +1,65 @@
+/*
+ * vhost-backend
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
+#include "qemu/error-report.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request,
+ void *arg)
+{
+ int fd = dev->control;
+
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+ return ioctl(fd, request, arg);
+}
+
+static int vhost_kernel_init(struct vhost_dev *dev, const char *devpath)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+ dev->control = open(devpath, O_RDWR);
+ return dev->control;
+}
+
+static int vhost_kernel_cleanup(struct vhost_dev *dev)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_KERNEL);
+
+ return close(dev->control);
+}
+
+static const VhostOps kernel_ops = {
+ .backend_type = VHOST_BACKEND_TYPE_KERNEL,
+ .vhost_call = vhost_kernel_call,
+ .vhost_backend_init = vhost_kernel_init,
+ .vhost_backend_cleanup = vhost_kernel_cleanup
+};
+
+int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
+{
+ int r = 0;
+
+ switch (backend_type) {
+ case VHOST_BACKEND_TYPE_KERNEL:
+ dev->vhost_ops = &kernel_ops;
+ break;
+ default:
+ error_report("Unknown vhost backend type\n");
+ r = -1;
+ }
+
+ return r;
+}
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 9e336ad..a1137e1 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -13,8 +13,8 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include <sys/ioctl.h>
#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-backend.h"
#include "hw/hw.h"
#include "qemu/atomic.h"
#include "qemu/range.h"
@@ -291,7 +291,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
log = g_malloc0(size * sizeof *log);
log_base = (uint64_t)(unsigned long)log;
- r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base);
assert(r >= 0);
/* Sync only the range covered by the old log */
if (dev->log_size) {
@@ -460,7 +460,7 @@ static void vhost_commit(MemoryListener *listener)
}
if (!dev->log_enabled) {
- r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
assert(r >= 0);
dev->memory_changed = false;
return;
@@ -473,7 +473,7 @@ static void vhost_commit(MemoryListener *listener)
if (dev->log_size < log_size) {
vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
}
- r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem);
assert(r >= 0);
/* To log less, can only decrease log size after table update. */
if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
@@ -541,7 +541,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
.log_guest_addr = vq->used_phys,
.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
};
- int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr);
if (r < 0) {
return -errno;
}
@@ -555,7 +555,7 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
if (enable_log) {
features |= 0x1 << VHOST_F_LOG_ALL;
}
- r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features);
return r < 0 ? -errno : 0;
}
@@ -670,13 +670,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
vq->num = state.num = virtio_queue_get_num(vdev, idx);
- r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state);
if (r) {
return -errno;
}
state.num = virtio_queue_get_last_avail_idx(vdev, idx);
- r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state);
if (r) {
return -errno;
}
@@ -718,7 +718,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
- r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file);
if (r) {
r = -errno;
goto fail_kick;
@@ -756,7 +756,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
};
int r;
assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
- r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state);
if (r < 0) {
fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
fflush(stderr);
@@ -798,7 +798,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
- r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
@@ -815,24 +815,28 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
}
int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
- bool force)
+ VhostBackendType backend_type, bool force)
{
uint64_t features;
int i, r;
+
+ if (vhost_set_backend_type(hdev, backend_type) < 0) {
+ return -1;
+ }
+
if (devfd >= 0) {
hdev->control = devfd;
} else {
- hdev->control = open(devpath, O_RDWR);
- if (hdev->control < 0) {
+ if (hdev->vhost_ops->vhost_backend_init(hdev, devpath) < 0) {
return -errno;
}
}
- r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL);
if (r < 0) {
goto fail;
}
- r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features);
if (r < 0) {
goto fail;
}
@@ -877,7 +881,7 @@ fail_vq:
}
fail:
r = -errno;
- close(hdev->control);
+ hdev->vhost_ops->vhost_backend_cleanup(hdev);
return r;
}
@@ -890,7 +894,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
memory_listener_unregister(&hdev->memory_listener);
g_free(hdev->mem);
g_free(hdev->mem_sections);
- close(hdev->control);
+ hdev->vhost_ops->vhost_backend_cleanup(hdev);
}
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
@@ -992,7 +996,7 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
} else {
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
- r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file);
assert(r >= 0);
}
@@ -1007,7 +1011,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
if (r < 0) {
goto fail_features;
}
- r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
if (r < 0) {
r = -errno;
goto fail_mem;
@@ -1026,8 +1030,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
hdev->log_size = vhost_get_log_size(hdev);
hdev->log = hdev->log_size ?
g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
- r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
- (uint64_t)(unsigned long)hdev->log);
+ r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, hdev->log);
if (r < 0) {
r = -errno;
goto fail_log;
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
new file mode 100644
index 0000000..93fc55b
--- /dev/null
+++ b/include/hw/virtio/vhost-backend.h
@@ -0,0 +1,37 @@
+/*
+ * vhost-backend
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_BACKEND_H_
+#define VHOST_BACKEND_H_
+
+typedef enum VhostBackendType {
+ VHOST_BACKEND_TYPE_NONE = 0,
+ VHOST_BACKEND_TYPE_KERNEL = 1,
+ VHOST_BACKEND_TYPE_MAX = 2,
+} VhostBackendType;
+
+struct vhost_dev;
+
+typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request,
+ void *arg);
+typedef int (*vhost_backend_init)(struct vhost_dev *dev, const char *devpath);
+typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
+
+typedef struct VhostOps {
+ VhostBackendType backend_type;
+ vhost_call vhost_call;
+ vhost_backend_init vhost_backend_init;
+ vhost_backend_cleanup vhost_backend_cleanup;
+} VhostOps;
+
+int vhost_set_backend_type(struct vhost_dev *dev,
+ VhostBackendType backend_type);
+
+#endif /* VHOST_BACKEND_H_ */
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index de24746..bd650a5 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -2,6 +2,7 @@
#define VHOST_H
#include "hw/hw.h"
+#include "hw/virtio/vhost-backend.h"
#include "hw/virtio/virtio.h"
#include "exec/memory.h"
@@ -48,10 +49,11 @@ struct vhost_dev {
bool memory_changed;
hwaddr mem_changed_start_addr;
hwaddr mem_changed_end_addr;
+ const VhostOps *vhost_ops;
};
int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
- bool force);
+ VhostBackendType backend_type, bool force);
void vhost_dev_cleanup(struct vhost_dev *hdev);
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH v5 3/7] Add vhost-user skeleton
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 1/7] Convert -mem-path to QemuOpts and add prealloc, share and unlink properties Antonios Motakis
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 2/7] Decouple vhost from kernel interface Antonios Motakis
@ 2014-01-09 14:59 ` Antonios Motakis
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 4/7] Add domain socket communication for vhost-user backend Antonios Motakis
` (4 subsequent siblings)
7 siblings, 0 replies; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 14:59 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: Stefan Hajnoczi, Michael S. Tsirkin, Jason Wang, n.nikolaev,
Anthony Liguori, Paolo Bonzini, lukego, Antonios Motakis, tech
Add empty vhost_call, init and cleanup for the vhost-user backend.
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
hw/net/vhost_net.c | 57 ++++++++++++++++++++++-----------------
hw/virtio/vhost-backend.c | 35 ++++++++++++++++++++++++
include/hw/virtio/vhost-backend.h | 3 ++-
include/net/vhost_net.h | 13 ++++++++-
net/tap.c | 16 ++++++-----
5 files changed, 91 insertions(+), 33 deletions(-)
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 4aaf0b4..3614e6c 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -91,43 +91,51 @@ static int vhost_net_get_fd(NetClientState *backend)
}
}
-struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
- bool force)
+struct vhost_net *vhost_net_init(VhostNetOptions *options)
{
- int r;
+ int r = -1;
struct vhost_net *net = g_malloc(sizeof *net);
- if (!backend) {
- fprintf(stderr, "vhost-net requires backend to be setup\n");
+
+ if (!options->net_backend) {
+ fprintf(stderr, "vhost-net requires net backend to be setup\n");
goto fail;
}
- r = vhost_net_get_fd(backend);
- if (r < 0) {
- goto fail;
+
+ if (options->backend_type == VHOST_BACKEND_TYPE_KERNEL) {
+ r = vhost_net_get_fd(options->net_backend);
+ if (r < 0) {
+ goto fail;
+ }
+
+ net->dev.backend_features =
+ tap_has_vnet_hdr(options->net_backend) ? 0 :
+ (1 << VHOST_NET_F_VIRTIO_NET_HDR);
}
- net->nc = backend;
- net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
- (1 << VHOST_NET_F_VIRTIO_NET_HDR);
+
+ net->nc = options->net_backend;
net->backend = r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
- r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net",
- VHOST_BACKEND_TYPE_KERNEL, force);
+ r = vhost_dev_init(&net->dev, options->devfd, options->devpath,
+ options->backend_type, options->force);
if (r < 0) {
goto fail;
}
- if (!tap_has_vnet_hdr_len(backend,
- sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
- net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
- }
- if (~net->dev.features & net->dev.backend_features) {
- fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n",
- (uint64_t)(~net->dev.features & net->dev.backend_features));
- vhost_dev_cleanup(&net->dev);
- goto fail;
+ if (options->backend_type == VHOST_BACKEND_TYPE_KERNEL) {
+ if (!tap_has_vnet_hdr_len(options->net_backend,
+ sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+ net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+ }
+ if (~net->dev.features & net->dev.backend_features) {
+ fprintf(stderr, "vhost lacks feature mask %" PRIu64
+ " for backend\n",
+ (uint64_t)(~net->dev.features & net->dev.backend_features));
+ vhost_dev_cleanup(&net->dev);
+ goto fail;
+ }
}
-
/* Set sane init value. Override when guest acks. */
vhost_net_ack_features(net, 0);
return net;
@@ -286,8 +294,7 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
#else
-struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
- bool force)
+struct vhost_net *vhost_net_init(VhostNetOptions *options)
{
error_report("vhost-net support is not compiled in");
return NULL;
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index c0f11e4..1d83e1d 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -16,6 +16,38 @@
#include <unistd.h>
#include <sys/ioctl.h>
+static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
+ void *arg)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+ error_report("vhost_user_call not implemented\n");
+
+ return -1;
+}
+
+static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+ error_report("vhost_user_init not implemented\n");
+
+ return -1;
+}
+
+static int vhost_user_cleanup(struct vhost_dev *dev)
+{
+ assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
+ error_report("vhost_user_cleanup not implemented\n");
+
+ return -1;
+}
+
+static const VhostOps user_ops = {
+ .backend_type = VHOST_BACKEND_TYPE_USER,
+ .vhost_call = vhost_user_call,
+ .vhost_backend_init = vhost_user_init,
+ .vhost_backend_cleanup = vhost_user_cleanup
+};
+
static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request,
void *arg)
{
@@ -56,6 +88,9 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
case VHOST_BACKEND_TYPE_KERNEL:
dev->vhost_ops = &kernel_ops;
break;
+ case VHOST_BACKEND_TYPE_USER:
+ dev->vhost_ops = &user_ops;
+ break;
default:
error_report("Unknown vhost backend type\n");
r = -1;
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 93fc55b..ef87ffa 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -14,7 +14,8 @@
typedef enum VhostBackendType {
VHOST_BACKEND_TYPE_NONE = 0,
VHOST_BACKEND_TYPE_KERNEL = 1,
- VHOST_BACKEND_TYPE_MAX = 2,
+ VHOST_BACKEND_TYPE_USER = 2,
+ VHOST_BACKEND_TYPE_MAX = 3,
} VhostBackendType;
struct vhost_dev;
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 2d936bb..1169562 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -2,11 +2,22 @@
#define VHOST_NET_H
#include "net/net.h"
+#include "hw/virtio/vhost-backend.h"
+
+#define VHOST_NET_DEFAULT_PATH "/dev/vhost-net"
struct vhost_net;
typedef struct vhost_net VHostNetState;
-VHostNetState *vhost_net_init(NetClientState *backend, int devfd, bool force);
+typedef struct VhostNetOptions {
+ VhostBackendType backend_type;
+ NetClientState *net_backend;
+ const char *devpath;
+ int devfd;
+ bool force;
+} VhostNetOptions;
+
+struct vhost_net *vhost_net_init(VhostNetOptions *options);
bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues);
diff --git a/net/tap.c b/net/tap.c
index 39c1cda..776dbc4 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -621,19 +621,23 @@ static int net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
if (tap->has_vhost ? tap->vhost :
vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
- int vhostfd;
+ VhostNetOptions options;
+
+ options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
+ options.net_backend = &s->nc;
+ options.devpath = VHOST_NET_DEFAULT_PATH;
+ options.force = tap->has_vhostforce && tap->vhostforce;
if (tap->has_vhostfd || tap->has_vhostfds) {
- vhostfd = monitor_handle_fd_param(cur_mon, vhostfdname);
- if (vhostfd == -1) {
+ options.devfd = monitor_handle_fd_param(cur_mon, vhostfdname);
+ if (options.devfd == -1) {
return -1;
}
} else {
- vhostfd = -1;
+ options.devfd = -1;
}
- s->vhost_net = vhost_net_init(&s->nc, vhostfd,
- tap->has_vhostforce && tap->vhostforce);
+ s->vhost_net = vhost_net_init(&options);
if (!s->vhost_net) {
error_report("vhost-net requested but could not be initialized");
return -1;
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH v5 4/7] Add domain socket communication for vhost-user backend
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
` (2 preceding siblings ...)
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 3/7] Add vhost-user skeleton Antonios Motakis
@ 2014-01-09 14:59 ` Antonios Motakis
2014-01-09 15:31 ` Michael S. Tsirkin
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 5/7] Add vhost-user calls implementation Antonios Motakis
` (3 subsequent siblings)
7 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 14:59 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: lukego, Antonios Motakis, tech, n.nikolaev, Michael S. Tsirkin
Add structures for passing vhost-user messages over a unix domain socket.
This is the equivalent of the existing vhost-kernel ioctls.
Connect to the named unix domain socket. The system call sendmsg
is used for communication. To be able to pass file descriptors
between processes - we use SCM_RIGHTS type in the message control header.
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
hw/virtio/vhost-backend.c | 268 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 263 insertions(+), 5 deletions(-)
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 1d83e1d..b33d35f 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -11,34 +11,292 @@
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
#include "qemu/error-report.h"
+#include "qemu/sockets.h"
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <linux/vhost.h>
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+#define VHOST_USER_SOCKTO (300) /* msec */
+
+typedef enum VhostUserRequest {
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_NET_SET_BACKEND = 15,
+ VHOST_USER_ECHO = 16,
+ VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 userspace_addr;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+ __u32 nregions;
+ __u32 padding;
+ VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+ VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK (0x3)
+#define VHOST_USER_REPLY_MASK (0x1<<2)
+ __u32 flags;
+ __u32 size; /* the following payload size */
+ union {
+ __u64 u64;
+ int fd;
+ struct vhost_vring_state state;
+ struct vhost_vring_addr addr;
+ struct vhost_vring_file file;
+ VhostUserMemory memory;
+ };
+} QEMU_PACKED VhostUserMsg;
+
+#define MEMB_SIZE(t, m) (sizeof(((t *)0)->m))
+#define VHOST_USER_HDR_SIZE (MEMB_SIZE(VhostUserMsg, request) \
+ + MEMB_SIZE(VhostUserMsg, flags) \
+ + MEMB_SIZE(VhostUserMsg, size))
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION (0x1)
+
+static int vhost_user_cleanup(struct vhost_dev *dev);
+
+static int vhost_user_recv(int fd, VhostUserMsg *msg)
+{
+ ssize_t r;
+ __u8 *p = (__u8 *)msg;
+
+ /* read the header */
+ do {
+ r = read(fd, p, VHOST_USER_HDR_SIZE);
+ } while (r < 0 && errno == EINTR);
+
+ if (r > 0) {
+ p += VHOST_USER_HDR_SIZE;
+ /* read the payload */
+ do {
+ r = read(fd, p, msg->size);
+ } while (r < 0 && errno == EINTR);
+ }
+
+ if (r < 0) {
+ error_report("Failed to read msg, reason: %s\n", strerror(errno));
+ }
+
+ return (r < 0) ? -1 : 0;
+}
+
+static int vhost_user_send_fds(int fd, const VhostUserMsg *msg, int *fds,
+ size_t fd_num)
+{
+ int r;
+
+ struct msghdr msgh;
+ struct iovec iov[1];
+
+ size_t fd_size = fd_num * sizeof(int);
+ char control[CMSG_SPACE(fd_size)];
+ struct cmsghdr *cmsg;
+
+ memset(&msgh, 0, sizeof(msgh));
+ memset(control, 0, sizeof(control));
+
+ /* set the payload */
+ iov[0].iov_base = (void *)msg;
+ iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size;
+
+ msgh.msg_iov = iov;
+ msgh.msg_iovlen = 1;
+
+ if (fd_num) {
+ msgh.msg_control = control;
+ msgh.msg_controllen = sizeof(control);
+
+ cmsg = CMSG_FIRSTHDR(&msgh);
+
+ cmsg->cmsg_len = CMSG_LEN(fd_size);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), fds, fd_size);
+ } else {
+ msgh.msg_control = 0;
+ msgh.msg_controllen = 0;
+ }
+
+ do {
+ r = sendmsg(fd, &msgh, 0);
+ } while (r < 0 && errno == EINTR);
+
+ if (r < 0) {
+ error_report("Failed to send msg(%d), reason: %s\n",
+ msg->request, strerror(errno));
+ } else {
+ r = 0;
+ }
+
+ return r;
+}
+
+static int vhost_user_echo(struct vhost_dev *dev)
+{
+ VhostUserMsg msg;
+ int fd = dev->control;
+
+ if (fd < 0) {
+ return -1;
+ }
+
+ /* check connection */
+ msg.request = VHOST_USER_ECHO;
+ msg.flags = VHOST_USER_VERSION;
+ msg.size = 0;
+
+ if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
+ error_report("ECHO failed\n");
+ return -1;
+ }
+
+ if (vhost_user_recv(fd, &msg) < 0) {
+ error_report("ECHO failed\n");
+ return -1;
+ }
+
+ if ((msg.flags & VHOST_USER_REPLY_MASK) == 0 ||
+ (msg.flags & VHOST_USER_VERSION_MASK) != VHOST_USER_VERSION) {
+ error_report("ECHO failed\n");
+ return -1;
+ }
+
+ return 0;
+}
static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
void *arg)
{
+ int fd = dev->control;
+ VhostUserMsg msg;
+ int result = 0;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ size_t fd_num = 0;
+
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
- error_report("vhost_user_call not implemented\n");
- return -1;
+ if (fd < 0) {
+ return -1;
+ }
+
+ msg.request = VHOST_USER_NONE;
+ msg.flags = VHOST_USER_VERSION;
+ msg.size = 0;
+
+ switch (request) {
+ default:
+ error_report("vhost-user trying to send unhandled ioctl\n");
+ return -1;
+ break;
+ }
+
+ result = vhost_user_send_fds(fd, &msg, fds, fd_num);
+
+ return result;
}
static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
{
+ int fd = -1;
+ struct sockaddr_un un;
+ struct timeval tv;
+ size_t len;
+
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
- error_report("vhost_user_init not implemented\n");
+ /* Create the socket */
+ fd = qemu_socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1) {
+ error_report("socket: %s", strerror(errno));
+ return -1;
+ }
+
+ /* Set socket options */
+ tv.tv_sec = VHOST_USER_SOCKTO / 1000;
+ tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct timeval))
+ == -1) {
+ error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
+ goto fail;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval))
+ == -1) {
+ error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
+ goto fail;
+ }
+
+ /* Connect */
+ un.sun_family = AF_UNIX;
+ strcpy(un.sun_path, devpath);
+
+ len = sizeof(un.sun_family) + strlen(devpath);
+
+ if (connect(fd, (struct sockaddr *) &un, len) == -1) {
+ error_report("connect: %s", strerror(errno));
+ goto fail;
+ }
+
+ /* Cleanup if there is previous connection left */
+ if (dev->control >= 0) {
+ vhost_user_cleanup(dev);
+ }
+ dev->control = fd;
+
+ if (vhost_user_echo(dev) < 0) {
+ dev->control = -1;
+ goto fail;
+ }
+
+ return fd;
+
+fail:
+ close(fd);
return -1;
+
}
static int vhost_user_cleanup(struct vhost_dev *dev)
{
+ int r = 0;
+
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
- error_report("vhost_user_cleanup not implemented\n");
- return -1;
+ if (dev->control >= 0) {
+ r = close(dev->control);
+ }
+ dev->control = -1;
+
+ return r;
}
static const VhostOps user_ops = {
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 4/7] Add domain socket communication for vhost-user backend
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 4/7] Add domain socket communication for vhost-user backend Antonios Motakis
@ 2014-01-09 15:31 ` Michael S. Tsirkin
2014-01-10 11:09 ` Antonios Motakis
0 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-09 15:31 UTC (permalink / raw)
To: Antonios Motakis; +Cc: lukego, snabb-devel, n.nikolaev, qemu-devel, tech
On Thu, Jan 09, 2014 at 03:59:58PM +0100, Antonios Motakis wrote:
> Add structures for passing vhost-user messages over a unix domain socket.
> This is the equivalent of the existing vhost-kernel ioctls.
>
> Connect to the named unix domain socket. The system call sendmsg
> is used for communication. To be able to pass file descriptors
> between processes - we use SCM_RIGHTS type in the message control header.
>
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
> hw/virtio/vhost-backend.c | 268 +++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 263 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 1d83e1d..b33d35f 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -11,34 +11,292 @@
> #include "hw/virtio/vhost.h"
> #include "hw/virtio/vhost-backend.h"
> #include "qemu/error-report.h"
> +#include "qemu/sockets.h"
>
> #include <fcntl.h>
> #include <unistd.h>
> #include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <linux/vhost.h>
> +
> +#define VHOST_MEMORY_MAX_NREGIONS 8
> +#define VHOST_USER_SOCKTO (300) /* msec */
> +
> +typedef enum VhostUserRequest {
> + VHOST_USER_NONE = 0,
> + VHOST_USER_GET_FEATURES = 1,
> + VHOST_USER_SET_FEATURES = 2,
> + VHOST_USER_SET_OWNER = 3,
> + VHOST_USER_RESET_OWNER = 4,
> + VHOST_USER_SET_MEM_TABLE = 5,
> + VHOST_USER_SET_LOG_BASE = 6,
> + VHOST_USER_SET_LOG_FD = 7,
> + VHOST_USER_SET_VRING_NUM = 8,
> + VHOST_USER_SET_VRING_ADDR = 9,
> + VHOST_USER_SET_VRING_BASE = 10,
> + VHOST_USER_GET_VRING_BASE = 11,
> + VHOST_USER_SET_VRING_KICK = 12,
> + VHOST_USER_SET_VRING_CALL = 13,
> + VHOST_USER_SET_VRING_ERR = 14,
> + VHOST_USER_NET_SET_BACKEND = 15,
> + VHOST_USER_ECHO = 16,
> + VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> + __u64 guest_phys_addr;
> + __u64 memory_size;
> + __u64 userspace_addr;
Don't use __uXX like this.
Use posix uintXX_t types.
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> + __u32 nregions;
> + __u32 padding;
> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> + VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK (0x3)
> +#define VHOST_USER_REPLY_MASK (0x1<<2)
> + __u32 flags;
> + __u32 size; /* the following payload size */
> + union {
> + __u64 u64;
> + int fd;
> + struct vhost_vring_state state;
> + struct vhost_vring_addr addr;
> + struct vhost_vring_file file;
Hmm I don't get it. fd is passes in anciliarry data, right?
what are the fd #s doing in data portion?
> + VhostUserMemory memory;
> + };
> +} QEMU_PACKED VhostUserMsg;
> +
> +#define MEMB_SIZE(t, m) (sizeof(((t *)0)->m))
That's unnecessarily tricky.
Just do
static VhostUserMsg m;
And then you can use sizeof(m.request) without issues and macros.
> +#define VHOST_USER_HDR_SIZE (MEMB_SIZE(VhostUserMsg, request) \
> + + MEMB_SIZE(VhostUserMsg, flags) \
> + + MEMB_SIZE(VhostUserMsg, size))
or just split header to a separate structure and use sizeof on that.
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION (0x1)
> +
> +static int vhost_user_cleanup(struct vhost_dev *dev);
dont forward declare static functions.
order them logically instead.
> +
> +static int vhost_user_recv(int fd, VhostUserMsg *msg)
> +{
> + ssize_t r;
> + __u8 *p = (__u8 *)msg;
> +
> + /* read the header */
> + do {
> + r = read(fd, p, VHOST_USER_HDR_SIZE);
> + } while (r < 0 && errno == EINTR);
What if you get a partial buffer?
You use SOCK_STREAM so I think it can happen right?
> +
> + if (r > 0) {
> + p += VHOST_USER_HDR_SIZE;
> + /* read the payload */
> + do {
> + r = read(fd, p, msg->size);
> + } while (r < 0 && errno == EINTR);
That's a security bug.
What if msg->size is bigger than sizeof *msg?
you will corrupt memory.
You must validate size.
> + }
> +
> + if (r < 0) {
> + error_report("Failed to read msg, reason: %s\n", strerror(errno));
> + }
> +
> + return (r < 0) ? -1 : 0;
> +}
> +
> +static int vhost_user_send_fds(int fd, const VhostUserMsg *msg, int *fds,
> + size_t fd_num)
> +{
> + int r;
> +
> + struct msghdr msgh;
> + struct iovec iov[1];
same as struct iovec iov only longer and more confusing?
> +
> + size_t fd_size = fd_num * sizeof(int);
> + char control[CMSG_SPACE(fd_size)];
> + struct cmsghdr *cmsg;
> +
> + memset(&msgh, 0, sizeof(msgh));
> + memset(control, 0, sizeof(control));
> +
> + /* set the payload */
> + iov[0].iov_base = (void *)msg;
it's void * already, no need for cast
> + iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size;
> +
> + msgh.msg_iov = iov;
> + msgh.msg_iovlen = 1;
> +
> + if (fd_num) {
> + msgh.msg_control = control;
> + msgh.msg_controllen = sizeof(control);
> +
> + cmsg = CMSG_FIRSTHDR(&msgh);
> +
> + cmsg->cmsg_len = CMSG_LEN(fd_size);
> + cmsg->cmsg_level = SOL_SOCKET;
> + cmsg->cmsg_type = SCM_RIGHTS;
> + memcpy(CMSG_DATA(cmsg), fds, fd_size);
> + } else {
> + msgh.msg_control = 0;
> + msgh.msg_controllen = 0;
> + }
> +
> + do {
> + r = sendmsg(fd, &msgh, 0);
> + } while (r < 0 && errno == EINTR);
again I think this can write message partially for SOCK_STREAM.
> +
> + if (r < 0) {
> + error_report("Failed to send msg(%d), reason: %s\n",
> + msg->request, strerror(errno));
> + } else {
> + r = 0;
> + }
> +
> + return r;
> +}
> +
> +static int vhost_user_echo(struct vhost_dev *dev)
> +{
> + VhostUserMsg msg;
> + int fd = dev->control;
> +
> + if (fd < 0) {
> + return -1;
> + }
> +
> + /* check connection */
> + msg.request = VHOST_USER_ECHO;
> + msg.flags = VHOST_USER_VERSION;
> + msg.size = 0;
> +
> + if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
> + error_report("ECHO failed\n");
> + return -1;
> + }
> +
> + if (vhost_user_recv(fd, &msg) < 0) {
> + error_report("ECHO failed\n");
> + return -1;
> + }
> +
> + if ((msg.flags & VHOST_USER_REPLY_MASK) == 0 ||
> + (msg.flags & VHOST_USER_VERSION_MASK) != VHOST_USER_VERSION) {
> + error_report("ECHO failed\n");
> + return -1;
> + }
> +
> + return 0;
> +}
>
> static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> void *arg)
> {
> + int fd = dev->control;
> + VhostUserMsg msg;
> + int result = 0;
> + int fds[VHOST_MEMORY_MAX_NREGIONS];
> + size_t fd_num = 0;
> +
> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> - error_report("vhost_user_call not implemented\n");
>
> - return -1;
> + if (fd < 0) {
> + return -1;
> + }
> +
> + msg.request = VHOST_USER_NONE;
> + msg.flags = VHOST_USER_VERSION;
> + msg.size = 0;
> +
> + switch (request) {
> + default:
> + error_report("vhost-user trying to send unhandled ioctl\n");
> + return -1;
> + break;
> + }
> +
> + result = vhost_user_send_fds(fd, &msg, fds, fd_num);
> +
> + return result;
> }
>
> static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
> {
> + int fd = -1;
> + struct sockaddr_un un;
> + struct timeval tv;
> + size_t len;
> +
> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> - error_report("vhost_user_init not implemented\n");
>
> + /* Create the socket */
> + fd = qemu_socket(AF_UNIX, SOCK_STREAM, 0);
> + if (fd == -1) {
> + error_report("socket: %s", strerror(errno));
> + return -1;
> + }
> +
> + /* Set socket options */
> + tv.tv_sec = VHOST_USER_SOCKTO / 1000;
> + tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
> +
> + if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct timeval))
> + == -1) {
> + error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
> + goto fail;
> + }
> +
> + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval))
> + == -1) {
> + error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
> + goto fail;
> + }
> +
> + /* Connect */
> + un.sun_family = AF_UNIX;
> + strcpy(un.sun_path, devpath);
> +
> + len = sizeof(un.sun_family) + strlen(devpath);
> +
> + if (connect(fd, (struct sockaddr *) &un, len) == -1) {
> + error_report("connect: %s", strerror(errno));
> + goto fail;
> + }
> +
> + /* Cleanup if there is previous connection left */
> + if (dev->control >= 0) {
> + vhost_user_cleanup(dev);
> + }
> + dev->control = fd;
> +
> + if (vhost_user_echo(dev) < 0) {
> + dev->control = -1;
> + goto fail;
> + }
> +
> + return fd;
> +
> +fail:
> + close(fd);
> return -1;
> +
> }
>
> static int vhost_user_cleanup(struct vhost_dev *dev)
> {
> + int r = 0;
> +
> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> - error_report("vhost_user_cleanup not implemented\n");
>
> - return -1;
> + if (dev->control >= 0) {
> + r = close(dev->control);
> + }
> + dev->control = -1;
> +
> + return r;
> }
>
> static const VhostOps user_ops = {
> --
> 1.8.3.2
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 4/7] Add domain socket communication for vhost-user backend
2014-01-09 15:31 ` Michael S. Tsirkin
@ 2014-01-10 11:09 ` Antonios Motakis
0 siblings, 0 replies; 24+ messages in thread
From: Antonios Motakis @ 2014-01-10 11:09 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Luke Gorrie, snabb-devel, Nikolay Nikolaev, qemu-devel qemu-devel,
VirtualOpenSystems Technical Team
On Thu, Jan 9, 2014 at 4:31 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Jan 09, 2014 at 03:59:58PM +0100, Antonios Motakis wrote:
>> Add structures for passing vhost-user messages over a unix domain socket.
>> This is the equivalent of the existing vhost-kernel ioctls.
>>
>> Connect to the named unix domain socket. The system call sendmsg
>> is used for communication. To be able to pass file descriptors
>> between processes - we use SCM_RIGHTS type in the message control header.
>>
>> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
>> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
>> ---
>> hw/virtio/vhost-backend.c | 268 +++++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 263 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
>> index 1d83e1d..b33d35f 100644
>> --- a/hw/virtio/vhost-backend.c
>> +++ b/hw/virtio/vhost-backend.c
>> @@ -11,34 +11,292 @@
>> #include "hw/virtio/vhost.h"
>> #include "hw/virtio/vhost-backend.h"
>> #include "qemu/error-report.h"
>> +#include "qemu/sockets.h"
>>
>> #include <fcntl.h>
>> #include <unistd.h>
>> #include <sys/ioctl.h>
>> +#include <sys/socket.h>
>> +#include <sys/un.h>
>> +#include <linux/vhost.h>
>> +
>> +#define VHOST_MEMORY_MAX_NREGIONS 8
>> +#define VHOST_USER_SOCKTO (300) /* msec */
>> +
>> +typedef enum VhostUserRequest {
>> + VHOST_USER_NONE = 0,
>> + VHOST_USER_GET_FEATURES = 1,
>> + VHOST_USER_SET_FEATURES = 2,
>> + VHOST_USER_SET_OWNER = 3,
>> + VHOST_USER_RESET_OWNER = 4,
>> + VHOST_USER_SET_MEM_TABLE = 5,
>> + VHOST_USER_SET_LOG_BASE = 6,
>> + VHOST_USER_SET_LOG_FD = 7,
>> + VHOST_USER_SET_VRING_NUM = 8,
>> + VHOST_USER_SET_VRING_ADDR = 9,
>> + VHOST_USER_SET_VRING_BASE = 10,
>> + VHOST_USER_GET_VRING_BASE = 11,
>> + VHOST_USER_SET_VRING_KICK = 12,
>> + VHOST_USER_SET_VRING_CALL = 13,
>> + VHOST_USER_SET_VRING_ERR = 14,
>> + VHOST_USER_NET_SET_BACKEND = 15,
>> + VHOST_USER_ECHO = 16,
>> + VHOST_USER_MAX
>> +} VhostUserRequest;
>> +
>> +typedef struct VhostUserMemoryRegion {
>> + __u64 guest_phys_addr;
>> + __u64 memory_size;
>> + __u64 userspace_addr;
>
> Don't use __uXX like this.
> Use posix uintXX_t types.
>
>> +} VhostUserMemoryRegion;
>> +
>> +typedef struct VhostUserMemory {
>> + __u32 nregions;
>> + __u32 padding;
>> + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>> +} VhostUserMemory;
>> +
>> +typedef struct VhostUserMsg {
>> + VhostUserRequest request;
>> +
>> +#define VHOST_USER_VERSION_MASK (0x3)
>> +#define VHOST_USER_REPLY_MASK (0x1<<2)
>> + __u32 flags;
>> + __u32 size; /* the following payload size */
>> + union {
>> + __u64 u64;
>> + int fd;
>> + struct vhost_vring_state state;
>> + struct vhost_vring_addr addr;
>> + struct vhost_vring_file file;
>
> Hmm I don't get it. fd is passes in anciliarry data, right?
> what are the fd #s doing in data portion?
>
>> + VhostUserMemory memory;
>> + };
>> +} QEMU_PACKED VhostUserMsg;
>> +
>> +#define MEMB_SIZE(t, m) (sizeof(((t *)0)->m))
>
> That's unnecessarily tricky.
> Just do
>
> static VhostUserMsg m;
>
> And then you can use sizeof(m.request) without issues and macros.
>
>> +#define VHOST_USER_HDR_SIZE (MEMB_SIZE(VhostUserMsg, request) \
>> + + MEMB_SIZE(VhostUserMsg, flags) \
>> + + MEMB_SIZE(VhostUserMsg, size))
>
> or just split header to a separate structure and use sizeof on that.
>
>> +
>> +/* The version of the protocol we support */
>> +#define VHOST_USER_VERSION (0x1)
>> +
>> +static int vhost_user_cleanup(struct vhost_dev *dev);
>
> dont forward declare static functions.
> order them logically instead.
>
>> +
>> +static int vhost_user_recv(int fd, VhostUserMsg *msg)
>> +{
>> + ssize_t r;
>> + __u8 *p = (__u8 *)msg;
>> +
>> + /* read the header */
>> + do {
>> + r = read(fd, p, VHOST_USER_HDR_SIZE);
>> + } while (r < 0 && errno == EINTR);
>
> What if you get a partial buffer?
> You use SOCK_STREAM so I think it can happen right?
>
>
>> +
>> + if (r > 0) {
>> + p += VHOST_USER_HDR_SIZE;
>> + /* read the payload */
>> + do {
>> + r = read(fd, p, msg->size);
>> + } while (r < 0 && errno == EINTR);
>
> That's a security bug.
> What if msg->size is bigger than sizeof *msg?
> you will corrupt memory.
> You must validate size.
>
>> + }
>> +
>> + if (r < 0) {
>> + error_report("Failed to read msg, reason: %s\n", strerror(errno));
>> + }
>> +
>> + return (r < 0) ? -1 : 0;
>> +}
>> +
>> +static int vhost_user_send_fds(int fd, const VhostUserMsg *msg, int *fds,
>> + size_t fd_num)
>> +{
>> + int r;
>> +
>> + struct msghdr msgh;
>> + struct iovec iov[1];
>
> same as struct iovec iov only longer and more confusing?
>> +
>> + size_t fd_size = fd_num * sizeof(int);
>> + char control[CMSG_SPACE(fd_size)];
>> + struct cmsghdr *cmsg;
>> +
>> + memset(&msgh, 0, sizeof(msgh));
>> + memset(control, 0, sizeof(control));
>> +
>> + /* set the payload */
>> + iov[0].iov_base = (void *)msg;
>
> it's void * already, no need for cast
>
>> + iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size;
>> +
>> + msgh.msg_iov = iov;
>> + msgh.msg_iovlen = 1;
>> +
>> + if (fd_num) {
>> + msgh.msg_control = control;
>> + msgh.msg_controllen = sizeof(control);
>> +
>> + cmsg = CMSG_FIRSTHDR(&msgh);
>> +
>> + cmsg->cmsg_len = CMSG_LEN(fd_size);
>> + cmsg->cmsg_level = SOL_SOCKET;
>> + cmsg->cmsg_type = SCM_RIGHTS;
>> + memcpy(CMSG_DATA(cmsg), fds, fd_size);
>> + } else {
>> + msgh.msg_control = 0;
>> + msgh.msg_controllen = 0;
>> + }
>> +
>> + do {
>> + r = sendmsg(fd, &msgh, 0);
>> + } while (r < 0 && errno == EINTR);
>
> again I think this can write message partially for SOCK_STREAM.
>
>> +
>> + if (r < 0) {
>> + error_report("Failed to send msg(%d), reason: %s\n",
>> + msg->request, strerror(errno));
>> + } else {
>> + r = 0;
>> + }
>> +
>> + return r;
>> +}
>> +
>> +static int vhost_user_echo(struct vhost_dev *dev)
>> +{
>> + VhostUserMsg msg;
>> + int fd = dev->control;
>> +
>> + if (fd < 0) {
>> + return -1;
>> + }
>> +
>> + /* check connection */
>> + msg.request = VHOST_USER_ECHO;
>> + msg.flags = VHOST_USER_VERSION;
>> + msg.size = 0;
>> +
>> + if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
>> + error_report("ECHO failed\n");
>> + return -1;
>> + }
>> +
>> + if (vhost_user_recv(fd, &msg) < 0) {
>> + error_report("ECHO failed\n");
>> + return -1;
>> + }
>> +
>> + if ((msg.flags & VHOST_USER_REPLY_MASK) == 0 ||
>> + (msg.flags & VHOST_USER_VERSION_MASK) != VHOST_USER_VERSION) {
>> + error_report("ECHO failed\n");
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>>
>> static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>> void *arg)
>> {
>> + int fd = dev->control;
>> + VhostUserMsg msg;
>> + int result = 0;
>> + int fds[VHOST_MEMORY_MAX_NREGIONS];
>> + size_t fd_num = 0;
>> +
>> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
>> - error_report("vhost_user_call not implemented\n");
>>
>> - return -1;
>> + if (fd < 0) {
>> + return -1;
>> + }
>> +
>> + msg.request = VHOST_USER_NONE;
>> + msg.flags = VHOST_USER_VERSION;
>> + msg.size = 0;
>> +
>> + switch (request) {
>> + default:
>> + error_report("vhost-user trying to send unhandled ioctl\n");
>> + return -1;
>> + break;
>> + }
>> +
>> + result = vhost_user_send_fds(fd, &msg, fds, fd_num);
>> +
>> + return result;
>> }
>>
>> static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
>> {
>> + int fd = -1;
>> + struct sockaddr_un un;
>> + struct timeval tv;
>> + size_t len;
>> +
>> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
>> - error_report("vhost_user_init not implemented\n");
>>
>> + /* Create the socket */
>> + fd = qemu_socket(AF_UNIX, SOCK_STREAM, 0);
>> + if (fd == -1) {
>> + error_report("socket: %s", strerror(errno));
>> + return -1;
>> + }
>> +
>> + /* Set socket options */
>> + tv.tv_sec = VHOST_USER_SOCKTO / 1000;
>> + tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
>> +
>> + if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct timeval))
>> + == -1) {
>> + error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
>> + goto fail;
>> + }
>> +
>> + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval))
>> + == -1) {
>> + error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
>> + goto fail;
>> + }
>> +
>> + /* Connect */
>> + un.sun_family = AF_UNIX;
>> + strcpy(un.sun_path, devpath);
>> +
>> + len = sizeof(un.sun_family) + strlen(devpath);
>> +
>> + if (connect(fd, (struct sockaddr *) &un, len) == -1) {
>> + error_report("connect: %s", strerror(errno));
>> + goto fail;
>> + }
>> +
>> + /* Cleanup if there is previous connection left */
>> + if (dev->control >= 0) {
>> + vhost_user_cleanup(dev);
>> + }
>> + dev->control = fd;
>> +
>> + if (vhost_user_echo(dev) < 0) {
>> + dev->control = -1;
>> + goto fail;
>> + }
>> +
>> + return fd;
>> +
>> +fail:
>> + close(fd);
>> return -1;
>> +
>> }
>>
>> static int vhost_user_cleanup(struct vhost_dev *dev)
>> {
>> + int r = 0;
>> +
>> assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
>> - error_report("vhost_user_cleanup not implemented\n");
>>
>> - return -1;
>> + if (dev->control >= 0) {
>> + r = close(dev->control);
>> + }
>> + dev->control = -1;
>> +
>> + return r;
>> }
>>
>> static const VhostOps user_ops = {
>> --
>> 1.8.3.2
>>
Ack, thanks for your feeedback.
Best regards,
Antonios
^ permalink raw reply [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH v5 5/7] Add vhost-user calls implementation
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
` (3 preceding siblings ...)
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 4/7] Add domain socket communication for vhost-user backend Antonios Motakis
@ 2014-01-09 14:59 ` Antonios Motakis
2014-01-09 15:47 ` Michael S. Tsirkin
2014-01-09 15:00 ` [Qemu-devel] [PATCH v5 6/7] Add new vhost-user netdev backend Antonios Motakis
` (2 subsequent siblings)
7 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 14:59 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: lukego, Antonios Motakis, tech, n.nikolaev, Michael S. Tsirkin
Each ioctl request of vhost-kernel has a vhost-user message equivalent,
which is sent it over the control socket.
The general approach is to copy the data from the supplied argument
pointer to a designated field in the message. If a file descriptor is
to be passed it should be placed also in the fds array for inclusion in
the sendmsd control header.
VHOST_SET_MEM_TABLE ignores the supplied vhost_memory structure and scans
the global ram_list for ram blocks wiht a valid fd field set. This would
be set when the -mem-path option with shared=on property is used.
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
hw/virtio/vhost-backend.c | 134 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 132 insertions(+), 2 deletions(-)
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index b33d35f..50ea307 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -81,8 +81,41 @@ typedef struct VhostUserMsg {
/* The version of the protocol we support */
#define VHOST_USER_VERSION (0x1)
+static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
+ -1, /* VHOST_USER_NONE */
+ VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */
+ VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */
+ VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */
+ VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */
+ VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */
+ VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */
+ VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */
+ VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */
+ VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */
+ VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */
+ VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */
+ VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */
+ VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */
+ VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */
+ VHOST_NET_SET_BACKEND, /* VHOST_USER_NET_SET_BACKEND */
+ -1 /* VHOST_USER_ECHO */
+};
+
static int vhost_user_cleanup(struct vhost_dev *dev);
+static VhostUserRequest vhost_user_request_translate(unsigned long int request)
+{
+ VhostUserRequest idx;
+
+ for (idx = 0; idx < VHOST_USER_MAX; idx++) {
+ if (ioctl_to_vhost_user_request[idx] == request) {
+ break;
+ }
+ }
+
+ return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
+}
+
static int vhost_user_recv(int fd, VhostUserMsg *msg)
{
ssize_t r;
@@ -197,7 +230,8 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
{
int fd = dev->control;
VhostUserMsg msg;
- int result = 0;
+ RAMBlock *block = 0;
+ int result = 0, need_reply = 0;
int fds[VHOST_MEMORY_MAX_NREGIONS];
size_t fd_num = 0;
@@ -207,11 +241,78 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
return -1;
}
- msg.request = VHOST_USER_NONE;
+ msg.request = vhost_user_request_translate(request);
msg.flags = VHOST_USER_VERSION;
msg.size = 0;
switch (request) {
+ case VHOST_GET_FEATURES:
+ case VHOST_GET_VRING_BASE:
+ need_reply = 1;
+ break;
+
+ case VHOST_SET_FEATURES:
+ case VHOST_SET_LOG_BASE:
+ msg.u64 = *((__u64 *) arg);
+ msg.size = MEMB_SIZE(VhostUserMsg, u64);
+ break;
+
+ case VHOST_SET_OWNER:
+ case VHOST_RESET_OWNER:
+ break;
+
+ case VHOST_SET_MEM_TABLE:
+ QTAILQ_FOREACH(block, &ram_list.blocks, next)
+ {
+ if (block->fd > 0) {
+ msg.memory.regions[fd_num].userspace_addr = (__u64) block->host;
+ msg.memory.regions[fd_num].memory_size = block->length;
+ msg.memory.regions[fd_num].guest_phys_addr = block->offset;
+ fds[fd_num++] = block->fd;
+ }
+ }
+
+ msg.memory.nregions = fd_num;
+
+ if (!fd_num) {
+ error_report("Failed initializing vhost-user memory map\n"
+ "consider using -mem-path option\n");
+ return -1;
+ }
+
+ msg.size = MEMB_SIZE(VhostUserMemory, nregions);
+ msg.size += MEMB_SIZE(VhostUserMemory, padding);
+ msg.size += fd_num*sizeof(VhostUserMemoryRegion);
+
+ break;
+
+ case VHOST_SET_LOG_FD:
+ msg.fd = *((int *) arg);
+ msg.size = MEMB_SIZE(VhostUserMsg, fd);
+ break;
+
+ case VHOST_SET_VRING_NUM:
+ case VHOST_SET_VRING_BASE:
+ memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+ msg.size = MEMB_SIZE(VhostUserMsg, state);
+ break;
+
+ case VHOST_SET_VRING_ADDR:
+ memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
+ msg.size = MEMB_SIZE(VhostUserMsg, addr);
+ break;
+
+ case VHOST_SET_VRING_KICK:
+ case VHOST_SET_VRING_CALL:
+ case VHOST_SET_VRING_ERR:
+ case VHOST_NET_SET_BACKEND:
+ memcpy(&msg.file, arg, sizeof(struct vhost_vring_file));
+ msg.size = MEMB_SIZE(VhostUserMsg, file);
+ if (msg.file.fd > 0) {
+ fds[0] = msg.file.fd;
+ fd_num = 1;
+ }
+ break;
default:
error_report("vhost-user trying to send unhandled ioctl\n");
return -1;
@@ -220,6 +321,35 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
result = vhost_user_send_fds(fd, &msg, fds, fd_num);
+ if (!result && need_reply) {
+ result = vhost_user_recv(fd, &msg);
+
+ if ((msg.flags & VHOST_USER_REPLY_MASK) == 0 ||
+ (msg.flags & VHOST_USER_VERSION_MASK) != VHOST_USER_VERSION) {
+ error_report("Received bad msg.\n");
+ return -1;
+ }
+
+ if (!result) {
+ switch (request) {
+ case VHOST_GET_FEATURES:
+ if (msg.size != MEMB_SIZE(VhostUserMsg, u64)) {
+ error_report("Received bad msg.\n");
+ return -1;
+ }
+ *((__u64 *) arg) = msg.u64;
+ break;
+ case VHOST_GET_VRING_BASE:
+ if (msg.size != MEMB_SIZE(VhostUserMsg, state)) {
+ error_report("Received bad msg.\n");
+ return -1;
+ }
+ memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
+ break;
+ }
+ }
+ }
+
return result;
}
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 5/7] Add vhost-user calls implementation
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 5/7] Add vhost-user calls implementation Antonios Motakis
@ 2014-01-09 15:47 ` Michael S. Tsirkin
2014-01-10 11:07 ` Antonios Motakis
0 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-09 15:47 UTC (permalink / raw)
To: Antonios Motakis; +Cc: lukego, snabb-devel, n.nikolaev, qemu-devel, tech
On Thu, Jan 09, 2014 at 03:59:59PM +0100, Antonios Motakis wrote:
> Each ioctl request of vhost-kernel has a vhost-user message equivalent,
> which is sent it over the control socket.
>
> The general approach is to copy the data from the supplied argument
> pointer to a designated field in the message. If a file descriptor is
> to be passed it should be placed also in the fds array for inclusion in
> the sendmsd control header.
But why put it in the data part then? It seems useless to leak
local fd numbers.
>
> VHOST_SET_MEM_TABLE ignores the supplied vhost_memory structure and scans
> the global ram_list for ram blocks wiht
typo
> a valid fd field set. This would
> be set when the -mem-path option with shared=on property is used.
>
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
> hw/virtio/vhost-backend.c | 134 +++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 132 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index b33d35f..50ea307 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -81,8 +81,41 @@ typedef struct VhostUserMsg {
> /* The version of the protocol we support */
> #define VHOST_USER_VERSION (0x1)
>
> +static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
> + -1, /* VHOST_USER_NONE */
> + VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */
> + VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */
> + VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */
> + VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */
> + VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */
> + VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */
> + VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */
> + VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */
> + VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */
> + VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */
> + VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */
> + VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */
> + VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */
> + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */
> + VHOST_NET_SET_BACKEND, /* VHOST_USER_NET_SET_BACKEND */
> + -1 /* VHOST_USER_ECHO */
> +};
> +
> static int vhost_user_cleanup(struct vhost_dev *dev);
>
> +static VhostUserRequest vhost_user_request_translate(unsigned long int request)
> +{
> + VhostUserRequest idx;
> +
> + for (idx = 0; idx < VHOST_USER_MAX; idx++) {
> + if (ioctl_to_vhost_user_request[idx] == request) {
> + break;
> + }
> + }
> +
> + return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
> +}
> +
> static int vhost_user_recv(int fd, VhostUserMsg *msg)
> {
> ssize_t r;
> @@ -197,7 +230,8 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> {
> int fd = dev->control;
> VhostUserMsg msg;
> - int result = 0;
> + RAMBlock *block = 0;
> + int result = 0, need_reply = 0;
> int fds[VHOST_MEMORY_MAX_NREGIONS];
> size_t fd_num = 0;
>
> @@ -207,11 +241,78 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> return -1;
> }
>
> - msg.request = VHOST_USER_NONE;
> + msg.request = vhost_user_request_translate(request);
> msg.flags = VHOST_USER_VERSION;
> msg.size = 0;
>
> switch (request) {
> + case VHOST_GET_FEATURES:
> + case VHOST_GET_VRING_BASE:
> + need_reply = 1;
> + break;
> +
> + case VHOST_SET_FEATURES:
> + case VHOST_SET_LOG_BASE:
> + msg.u64 = *((__u64 *) arg);
> + msg.size = MEMB_SIZE(VhostUserMsg, u64);
> + break;
> +
> + case VHOST_SET_OWNER:
> + case VHOST_RESET_OWNER:
> + break;
> +
> + case VHOST_SET_MEM_TABLE:
> + QTAILQ_FOREACH(block, &ram_list.blocks, next)
> + {
> + if (block->fd > 0) {
> + msg.memory.regions[fd_num].userspace_addr = (__u64) block->host;
> + msg.memory.regions[fd_num].memory_size = block->length;
> + msg.memory.regions[fd_num].guest_phys_addr = block->offset;
> + fds[fd_num++] = block->fd;
> + }
> + }
> +
> + msg.memory.nregions = fd_num;
> +
> + if (!fd_num) {
> + error_report("Failed initializing vhost-user memory map\n"
> + "consider using -mem-path option\n");
> + return -1;
> + }
> +
> + msg.size = MEMB_SIZE(VhostUserMemory, nregions);
> + msg.size += MEMB_SIZE(VhostUserMemory, padding);
> + msg.size += fd_num*sizeof(VhostUserMemoryRegion);
need space around *
> +
> + break;
> +
> + case VHOST_SET_LOG_FD:
> + msg.fd = *((int *) arg);
> + msg.size = MEMB_SIZE(VhostUserMsg, fd);
> + break;
> +
> + case VHOST_SET_VRING_NUM:
> + case VHOST_SET_VRING_BASE:
> + memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> + msg.size = MEMB_SIZE(VhostUserMsg, state);
> + break;
> +
> + case VHOST_SET_VRING_ADDR:
> + memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
> + msg.size = MEMB_SIZE(VhostUserMsg, addr);
> + break;
> +
> + case VHOST_SET_VRING_KICK:
> + case VHOST_SET_VRING_CALL:
> + case VHOST_SET_VRING_ERR:
> + case VHOST_NET_SET_BACKEND:
> + memcpy(&msg.file, arg, sizeof(struct vhost_vring_file));
> + msg.size = MEMB_SIZE(VhostUserMsg, file);
> + if (msg.file.fd > 0) {
> + fds[0] = msg.file.fd;
> + fd_num = 1;
> + }
> + break;
> default:
> error_report("vhost-user trying to send unhandled ioctl\n");
> return -1;
> @@ -220,6 +321,35 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>
> result = vhost_user_send_fds(fd, &msg, fds, fd_num);
>
> + if (!result && need_reply) {
> + result = vhost_user_recv(fd, &msg);
shouldn't ypou check result here before using msg?
> +
> + if ((msg.flags & VHOST_USER_REPLY_MASK) == 0 ||
> + (msg.flags & VHOST_USER_VERSION_MASK) != VHOST_USER_VERSION) {
> + error_report("Received bad msg.\n");
> + return -1;
> + }
> +
> + if (!result) {
> + switch (request) {
> + case VHOST_GET_FEATURES:
> + if (msg.size != MEMB_SIZE(VhostUserMsg, u64)) {
> + error_report("Received bad msg.\n");
> + return -1;
> + }
> + *((__u64 *) arg) = msg.u64;
> + break;
> + case VHOST_GET_VRING_BASE:
> + if (msg.size != MEMB_SIZE(VhostUserMsg, state)) {
> + error_report("Received bad msg.\n");
> + return -1;
> + }
> + memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
> + break;
> + }
> + }
> + }
> +
> return result;
> }
>
> --
> 1.8.3.2
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 5/7] Add vhost-user calls implementation
2014-01-09 15:47 ` Michael S. Tsirkin
@ 2014-01-10 11:07 ` Antonios Motakis
0 siblings, 0 replies; 24+ messages in thread
From: Antonios Motakis @ 2014-01-10 11:07 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Luke Gorrie, snabb-devel, Nikolay Nikolaev, qemu-devel qemu-devel,
VirtualOpenSystems Technical Team
[-- Attachment #1: Type: text/plain, Size: 7652 bytes --]
On Thu, Jan 9, 2014 at 4:47 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Jan 09, 2014 at 03:59:59PM +0100, Antonios Motakis wrote:
> > Each ioctl request of vhost-kernel has a vhost-user message equivalent,
> > which is sent it over the control socket.
> >
> > The general approach is to copy the data from the supplied argument
> > pointer to a designated field in the message. If a file descriptor is
> > to be passed it should be placed also in the fds array for inclusion in
> > the sendmsd control header.
>
> But why put it in the data part then? It seems useless to leak
> local fd numbers.
>
> >
> > VHOST_SET_MEM_TABLE ignores the supplied vhost_memory structure and scans
> > the global ram_list for ram blocks wiht
>
> typo
>
> > a valid fd field set. This would
> > be set when the -mem-path option with shared=on property is used.
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> > hw/virtio/vhost-backend.c | 134
> +++++++++++++++++++++++++++++++++++++++++++++-
> > 1 file changed, 132 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index b33d35f..50ea307 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -81,8 +81,41 @@ typedef struct VhostUserMsg {
> > /* The version of the protocol we support */
> > #define VHOST_USER_VERSION (0x1)
> >
> > +static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
> > + -1, /* VHOST_USER_NONE */
> > + VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */
> > + VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */
> > + VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */
> > + VHOST_RESET_OWNER, /* VHOST_USER_RESET_OWNER */
> > + VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */
> > + VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */
> > + VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */
> > + VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */
> > + VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */
> > + VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */
> > + VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */
> > + VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */
> > + VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */
> > + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */
> > + VHOST_NET_SET_BACKEND, /* VHOST_USER_NET_SET_BACKEND */
> > + -1 /* VHOST_USER_ECHO */
> > +};
> > +
> > static int vhost_user_cleanup(struct vhost_dev *dev);
> >
> > +static VhostUserRequest vhost_user_request_translate(unsigned long int
> request)
> > +{
> > + VhostUserRequest idx;
> > +
> > + for (idx = 0; idx < VHOST_USER_MAX; idx++) {
> > + if (ioctl_to_vhost_user_request[idx] == request) {
> > + break;
> > + }
> > + }
> > +
> > + return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
> > +}
> > +
> > static int vhost_user_recv(int fd, VhostUserMsg *msg)
> > {
> > ssize_t r;
> > @@ -197,7 +230,8 @@ static int vhost_user_call(struct vhost_dev *dev,
> unsigned long int request,
> > {
> > int fd = dev->control;
> > VhostUserMsg msg;
> > - int result = 0;
> > + RAMBlock *block = 0;
> > + int result = 0, need_reply = 0;
> > int fds[VHOST_MEMORY_MAX_NREGIONS];
> > size_t fd_num = 0;
> >
> > @@ -207,11 +241,78 @@ static int vhost_user_call(struct vhost_dev *dev,
> unsigned long int request,
> > return -1;
> > }
> >
> > - msg.request = VHOST_USER_NONE;
> > + msg.request = vhost_user_request_translate(request);
> > msg.flags = VHOST_USER_VERSION;
> > msg.size = 0;
> >
> > switch (request) {
> > + case VHOST_GET_FEATURES:
> > + case VHOST_GET_VRING_BASE:
> > + need_reply = 1;
> > + break;
> > +
> > + case VHOST_SET_FEATURES:
> > + case VHOST_SET_LOG_BASE:
> > + msg.u64 = *((__u64 *) arg);
> > + msg.size = MEMB_SIZE(VhostUserMsg, u64);
> > + break;
> > +
> > + case VHOST_SET_OWNER:
> > + case VHOST_RESET_OWNER:
> > + break;
> > +
> > + case VHOST_SET_MEM_TABLE:
> > + QTAILQ_FOREACH(block, &ram_list.blocks, next)
> > + {
> > + if (block->fd > 0) {
> > + msg.memory.regions[fd_num].userspace_addr = (__u64)
> block->host;
> > + msg.memory.regions[fd_num].memory_size = block->length;
> > + msg.memory.regions[fd_num].guest_phys_addr =
> block->offset;
> > + fds[fd_num++] = block->fd;
> > + }
> > + }
> > +
> > + msg.memory.nregions = fd_num;
> > +
> > + if (!fd_num) {
> > + error_report("Failed initializing vhost-user memory map\n"
> > + "consider using -mem-path option\n");
> > + return -1;
> > + }
> > +
> > + msg.size = MEMB_SIZE(VhostUserMemory, nregions);
> > + msg.size += MEMB_SIZE(VhostUserMemory, padding);
> > + msg.size += fd_num*sizeof(VhostUserMemoryRegion);
>
> need space around *
> > +
> > + break;
> > +
> > + case VHOST_SET_LOG_FD:
> > + msg.fd = *((int *) arg);
> > + msg.size = MEMB_SIZE(VhostUserMsg, fd);
> > + break;
> > +
> > + case VHOST_SET_VRING_NUM:
> > + case VHOST_SET_VRING_BASE:
> > + memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
> > + msg.size = MEMB_SIZE(VhostUserMsg, state);
> > + break;
> > +
> > + case VHOST_SET_VRING_ADDR:
> > + memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
> > + msg.size = MEMB_SIZE(VhostUserMsg, addr);
> > + break;
> > +
> > + case VHOST_SET_VRING_KICK:
> > + case VHOST_SET_VRING_CALL:
> > + case VHOST_SET_VRING_ERR:
> > + case VHOST_NET_SET_BACKEND:
> > + memcpy(&msg.file, arg, sizeof(struct vhost_vring_file));
> > + msg.size = MEMB_SIZE(VhostUserMsg, file);
> > + if (msg.file.fd > 0) {
> > + fds[0] = msg.file.fd;
> > + fd_num = 1;
> > + }
> > + break;
> > default:
> > error_report("vhost-user trying to send unhandled ioctl\n");
> > return -1;
> > @@ -220,6 +321,35 @@ static int vhost_user_call(struct vhost_dev *dev,
> unsigned long int request,
> >
> > result = vhost_user_send_fds(fd, &msg, fds, fd_num);
> >
> > + if (!result && need_reply) {
> > + result = vhost_user_recv(fd, &msg);
>
> shouldn't ypou check result here before using msg?
>
>
> > +
> > + if ((msg.flags & VHOST_USER_REPLY_MASK) == 0 ||
> > + (msg.flags & VHOST_USER_VERSION_MASK) !=
> VHOST_USER_VERSION) {
> > + error_report("Received bad msg.\n");
> > + return -1;
> > + }
> > +
> > + if (!result) {
> > + switch (request) {
> > + case VHOST_GET_FEATURES:
> > + if (msg.size != MEMB_SIZE(VhostUserMsg, u64)) {
> > + error_report("Received bad msg.\n");
> > + return -1;
> > + }
> > + *((__u64 *) arg) = msg.u64;
> > + break;
> > + case VHOST_GET_VRING_BASE:
> > + if (msg.size != MEMB_SIZE(VhostUserMsg, state)) {
> > + error_report("Received bad msg.\n");
> > + return -1;
> > + }
> > + memcpy(arg, &msg.state, sizeof(struct
> vhost_vring_state));
> > + break;
> > + }
> > + }
> > + }
> > +
> > return result;
> > }
> >
> > --
> > 1.8.3.2
> >
>
Ack.
[-- Attachment #2: Type: text/html, Size: 9596 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH v5 6/7] Add new vhost-user netdev backend
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
` (4 preceding siblings ...)
2014-01-09 14:59 ` [Qemu-devel] [PATCH v5 5/7] Add vhost-user calls implementation Antonios Motakis
@ 2014-01-09 15:00 ` Antonios Motakis
2014-01-09 16:14 ` Michael S. Tsirkin
2014-01-09 15:00 ` [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection Antonios Motakis
2014-01-09 16:11 ` [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Michael S. Tsirkin
7 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 15:00 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: Stefan Hajnoczi, Michael S. Tsirkin, Jason Wang, Michael Tokarev,
Markus Armbruster, n.nikolaev, Luiz Capitulino, Anthony Liguori,
Paolo Bonzini, lukego, Antonios Motakis, tech
Add a new QEMU netdev backend that is intended to invoke vhost_net
with the vhost-user backend. Also decouple virtio-net from the tap
backend.
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
hmp-commands.hx | 4 +-
hw/net/vhost_net.c | 66 ++++++++++++++++++++++------
hw/net/virtio-net.c | 42 ++++++++----------
hw/virtio/vhost.c | 1 -
include/net/vhost-user.h | 17 ++++++++
include/net/vhost_net.h | 1 +
net/Makefile.objs | 2 +-
net/clients.h | 3 ++
net/hub.c | 1 +
net/net.c | 2 +
net/vhost-user.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++
qapi-schema.json | 18 +++++++-
qemu-options.hx | 3 ++
13 files changed, 227 insertions(+), 44 deletions(-)
create mode 100644 include/net/vhost-user.h
create mode 100644 net/vhost-user.c
diff --git a/hmp-commands.hx b/hmp-commands.hx
index ebe8e78..d5a3774 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1190,7 +1190,7 @@ ETEXI
{
.name = "host_net_add",
.args_type = "device:s,opts:s?",
- .params = "tap|user|socket|vde|netmap|dump [options]",
+ .params = "tap|user|socket|vde|netmap|vhost-user|dump [options]",
.help = "add host VLAN client",
.mhandler.cmd = net_host_device_add,
},
@@ -1218,7 +1218,7 @@ ETEXI
{
.name = "netdev_add",
.args_type = "netdev:O",
- .params = "[user|tap|socket|hubport|netmap],id=str[,prop=value][,...]",
+ .params = "[user|tap|socket|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
.help = "add host network device",
.mhandler.cmd = hmp_netdev_add,
},
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 3614e6c..e42f4d6 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -15,6 +15,7 @@
#include "net/net.h"
#include "net/tap.h"
+#include "net/vhost-user.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
@@ -174,15 +175,20 @@ static int vhost_net_start_one(struct vhost_net *net,
goto fail_start;
}
- net->nc->info->poll(net->nc, false);
- qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
- file.fd = net->backend;
- for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
- const VhostOps *vhost_ops = net->dev.vhost_ops;
- r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
- if (r < 0) {
- r = -errno;
- goto fail;
+ if (net->nc->info->poll) {
+ net->nc->info->poll(net->nc, false);
+ }
+
+ if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
+ qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
+ file.fd = net->backend;
+ for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ const VhostOps *vhost_ops = net->dev.vhost_ops;
+ r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
}
}
return 0;
@@ -193,7 +199,9 @@ fail:
int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
assert(r >= 0);
}
- net->nc->info->poll(net->nc, true);
+ if (net->nc->info->poll) {
+ net->nc->info->poll(net->nc, true);
+ }
vhost_dev_stop(&net->dev, dev);
fail_start:
vhost_dev_disable_notifiers(&net->dev, dev);
@@ -215,7 +223,9 @@ static void vhost_net_stop_one(struct vhost_net *net,
int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
assert(r >= 0);
}
- net->nc->info->poll(net->nc, true);
+ if (net->nc->info->poll) {
+ net->nc->info->poll(net->nc, true);
+ }
vhost_dev_stop(&net->dev, dev);
vhost_dev_disable_notifiers(&net->dev, dev);
}
@@ -235,7 +245,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
}
for (i = 0; i < total_queues; i++) {
- r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
+ r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2);
if (r < 0) {
goto err;
@@ -252,7 +262,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
err:
while (--i >= 0) {
- vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+ vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
}
return r;
}
@@ -273,7 +283,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
assert(r >= 0);
for (i = 0; i < total_queues; i++) {
- vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
+ vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
}
}
@@ -293,6 +303,29 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
{
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
+
+VHostNetState *get_vhost_net(NetClientState *nc)
+{
+ VHostNetState *vhost_net = 0;
+
+ if (!nc) {
+ return 0;
+ }
+
+ switch (nc->info->type) {
+ case NET_CLIENT_OPTIONS_KIND_TAP:
+ vhost_net = tap_get_vhost_net(nc);
+ break;
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
+ vhost_net = vhost_user_get_vhost_net(nc);
+ break;
+ default:
+ break;
+ }
+
+ return vhost_net;
+}
+
#else
struct vhost_net *vhost_net_init(VhostNetOptions *options)
{
@@ -338,4 +371,9 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
}
+
+VHostNetState *get_vhost_net(NetClientState *nc)
+{
+ return 0;
+}
#endif
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3626608..d49ee82 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -105,14 +105,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
NetClientState *nc = qemu_get_queue(n->nic);
int queues = n->multiqueue ? n->max_queues : 1;
- if (!nc->peer) {
- return;
- }
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return;
- }
-
- if (!tap_get_vhost_net(nc->peer)) {
+ if (!get_vhost_net(nc->peer)) {
return;
}
@@ -122,7 +115,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
}
if (!n->vhost_started) {
int r;
- if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) {
+ if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) {
return;
}
n->vhost_started = 1;
@@ -325,11 +318,16 @@ static void peer_test_vnet_hdr(VirtIONet *n)
return;
}
- if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- return;
+ switch (nc->peer->info->type) {
+ case NET_CLIENT_OPTIONS_KIND_TAP:
+ n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
+ break;
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
+ n->has_vnet_hdr = 0;
+ break;
+ default:
+ break;
}
-
- n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
}
static int peer_has_vnet_hdr(VirtIONet *n)
@@ -437,13 +435,10 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
}
- if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
+ if (!get_vhost_net(nc->peer)) {
return features;
}
- if (!tap_get_vhost_net(nc->peer)) {
- return features;
- }
- return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
+ return vhost_net_get_features(get_vhost_net(nc->peer), features);
}
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -507,13 +502,10 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
for (i = 0; i < n->max_queues; i++) {
NetClientState *nc = qemu_get_subqueue(n->nic, i);
- if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
- continue;
- }
- if (!tap_get_vhost_net(nc->peer)) {
+ if (!get_vhost_net(nc->peer)) {
continue;
}
- vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
+ vhost_net_ack_features(get_vhost_net(nc->peer), features);
}
}
@@ -1443,7 +1435,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
assert(n->vhost_started);
- return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
+ return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
}
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
@@ -1452,7 +1444,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
VirtIONet *n = VIRTIO_NET(vdev);
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
assert(n->vhost_started);
- vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
+ vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
vdev, idx, mask);
}
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index a1137e1..fe622fb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -14,7 +14,6 @@
*/
#include "hw/virtio/vhost.h"
-#include "hw/virtio/vhost-backend.h"
#include "hw/hw.h"
#include "qemu/atomic.h"
#include "qemu/range.h"
diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
new file mode 100644
index 0000000..85109f6
--- /dev/null
+++ b/include/net/vhost-user.h
@@ -0,0 +1,17 @@
+/*
+ * vhost-user.h
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_H_
+#define VHOST_USER_H_
+
+struct vhost_net;
+struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
+
+#endif /* VHOST_USER_H_ */
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 1169562..abd3d0b 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -31,4 +31,5 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features);
bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask);
+VHostNetState *get_vhost_net(NetClientState *nc);
#endif
diff --git a/net/Makefile.objs b/net/Makefile.objs
index c25fe69..301f6b6 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
common-obj-y += socket.o
common-obj-y += dump.o
common-obj-y += eth.o
-common-obj-$(CONFIG_POSIX) += tap.o
+common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
common-obj-$(CONFIG_LINUX) += tap-linux.o
common-obj-$(CONFIG_WIN32) += tap-win32.o
common-obj-$(CONFIG_BSD) += tap-bsd.o
diff --git a/net/clients.h b/net/clients.h
index 7322ff5..7f3d4ae 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -57,4 +57,7 @@ int net_init_netmap(const NetClientOptions *opts, const char *name,
NetClientState *peer);
#endif
+int net_init_vhost_user(const NetClientOptions *opts, const char *name,
+ NetClientState *peer);
+
#endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/hub.c b/net/hub.c
index 33a99c9..7e0f2d6 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -322,6 +322,7 @@ void net_hub_check_clients(void)
case NET_CLIENT_OPTIONS_KIND_TAP:
case NET_CLIENT_OPTIONS_KIND_SOCKET:
case NET_CLIENT_OPTIONS_KIND_VDE:
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
has_host_dev = 1;
break;
default:
diff --git a/net/net.c b/net/net.c
index 9db88cc..0f057c5 100644
--- a/net/net.c
+++ b/net/net.c
@@ -734,6 +734,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
[NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge,
#endif
[NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport,
+ [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
};
@@ -767,6 +768,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
case NET_CLIENT_OPTIONS_KIND_BRIDGE:
#endif
case NET_CLIENT_OPTIONS_KIND_HUBPORT:
+ case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
break;
default:
diff --git a/net/vhost-user.c b/net/vhost-user.c
new file mode 100644
index 0000000..6fd5afc
--- /dev/null
+++ b/net/vhost-user.c
@@ -0,0 +1,111 @@
+/*
+ * vhost-user.c
+ *
+ * Copyright (c) 2013 Virtual Open Systems Sarl.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "clients.h"
+#include "net/vhost_net.h"
+#include "net/vhost-user.h"
+#include "qemu/error-report.h"
+
+typedef struct VhostUserState {
+ NetClientState nc;
+ VHostNetState *vhost_net;
+ char *devpath;
+} VhostUserState;
+
+VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
+{
+ VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+ assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+ return s->vhost_net;
+}
+
+static int vhost_user_running(VhostUserState *s)
+{
+ return (s->vhost_net) ? 1 : 0;
+}
+
+static int vhost_user_start(VhostUserState *s)
+{
+ VhostNetOptions options;
+
+ if (vhost_user_running(s)) {
+ return 1;
+ }
+
+ options.backend_type = VHOST_BACKEND_TYPE_USER;
+ options.net_backend = &s->nc;
+ options.devpath = s->devpath;
+ options.devfd = -1;
+ options.force = 1;
+
+ s->vhost_net = vhost_net_init(&options);
+
+ return vhost_user_running(s) ? 0 : -1;
+}
+
+static void vhost_user_stop(VhostUserState *s)
+{
+ if (vhost_user_running(s)) {
+ vhost_net_cleanup(s->vhost_net);
+ }
+
+ s->vhost_net = 0;
+}
+
+static void vhost_user_cleanup(NetClientState *nc)
+{
+ VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+
+ vhost_user_stop(s);
+ qemu_purge_queued_packets(nc);
+}
+
+static NetClientInfo net_vhost_user_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
+ .size = sizeof(VhostUserState),
+ .cleanup = vhost_user_cleanup,
+};
+
+static int net_vhost_user_init(NetClientState *peer, const char *device,
+ const char *name, const char *filename)
+{
+ NetClientState *nc;
+ VhostUserState *s;
+ int r;
+
+ nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
+
+ snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s", filename);
+
+ s = DO_UPCAST(VhostUserState, nc, nc);
+
+ /* We don't provide a receive callback */
+ s->nc.receive_disabled = 1;
+
+ s->devpath = g_strdup(filename);
+
+ r = vhost_user_start(s);
+
+ return r;
+}
+
+int net_init_vhost_user(const NetClientOptions *opts, const char *name,
+ NetClientState *peer)
+{
+ const char *file;
+ const NetdevVhostUserOptions *vhost_user;
+
+ assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
+ vhost_user = opts->vhost_user;
+
+ file = vhost_user->file;
+
+ return net_vhost_user_init(peer, "vhost_user", name, file);
+}
diff --git a/qapi-schema.json b/qapi-schema.json
index c3c939c..3101287 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3030,6 +3030,21 @@
'*devname': 'str' } }
##
+# @NetdevVhostUserOptions
+#
+# Vhost-user network backend
+#
+# @file: control socket path
+#
+# Since 2.0
+##
+{ 'type': 'NetdevVhostUserOptions',
+ 'data': {
+ 'file': 'str' } }
+
+##
+
+##
# @NetClientOptions
#
# A discriminated record of network device traits.
@@ -3047,7 +3062,8 @@
'dump': 'NetdevDumpOptions',
'bridge': 'NetdevBridgeOptions',
'hubport': 'NetdevHubPortOptions',
- 'netmap': 'NetdevNetmapOptions' } }
+ 'netmap': 'NetdevNetmapOptions',
+ 'vhost-user': 'NetdevVhostUserOptions' } }
##
# @NetLegacy
diff --git a/qemu-options.hx b/qemu-options.hx
index 0d35c9c..edf25d0 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1419,6 +1419,8 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
" VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
" netmap device, defaults to '/dev/netmap')\n"
#endif
+ "-net vhost-user,file=name\n"
+ " connect to a unix domain socket implementing vhost-user backend\n"
"-net dump[,vlan=n][,file=f][,len=n]\n"
" dump traffic on vlan 'n' to file 'f' (max n bytes per packet)\n"
"-net none use it alone to have zero network devices. If no -net option\n"
@@ -1436,6 +1438,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
#ifdef CONFIG_NETMAP
"netmap|"
#endif
+ "vhost-user|"
"socket|"
"hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
STEXI
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 6/7] Add new vhost-user netdev backend
2014-01-09 15:00 ` [Qemu-devel] [PATCH v5 6/7] Add new vhost-user netdev backend Antonios Motakis
@ 2014-01-09 16:14 ` Michael S. Tsirkin
2014-01-10 11:00 ` Antonios Motakis
0 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-09 16:14 UTC (permalink / raw)
To: Antonios Motakis
Cc: snabb-devel, Anthony Liguori, tech, Jason Wang, Michael Tokarev,
qemu-devel, n.nikolaev, Markus Armbruster, Stefan Hajnoczi,
lukego, Paolo Bonzini, Luiz Capitulino
On Thu, Jan 09, 2014 at 04:00:00PM +0100, Antonios Motakis wrote:
> Add a new QEMU netdev backend that is intended to invoke vhost_net
> with the vhost-user backend. Also decouple virtio-net from the tap
> backend.
>
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
> hmp-commands.hx | 4 +-
> hw/net/vhost_net.c | 66 ++++++++++++++++++++++------
> hw/net/virtio-net.c | 42 ++++++++----------
> hw/virtio/vhost.c | 1 -
> include/net/vhost-user.h | 17 ++++++++
> include/net/vhost_net.h | 1 +
> net/Makefile.objs | 2 +-
> net/clients.h | 3 ++
> net/hub.c | 1 +
> net/net.c | 2 +
> net/vhost-user.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++
> qapi-schema.json | 18 +++++++-
> qemu-options.hx | 3 ++
> 13 files changed, 227 insertions(+), 44 deletions(-)
> create mode 100644 include/net/vhost-user.h
> create mode 100644 net/vhost-user.c
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index ebe8e78..d5a3774 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1190,7 +1190,7 @@ ETEXI
> {
> .name = "host_net_add",
> .args_type = "device:s,opts:s?",
> - .params = "tap|user|socket|vde|netmap|dump [options]",
> + .params = "tap|user|socket|vde|netmap|vhost-user|dump [options]",
> .help = "add host VLAN client",
> .mhandler.cmd = net_host_device_add,
> },
> @@ -1218,7 +1218,7 @@ ETEXI
> {
> .name = "netdev_add",
> .args_type = "netdev:O",
> - .params = "[user|tap|socket|hubport|netmap],id=str[,prop=value][,...]",
> + .params = "[user|tap|socket|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
> .help = "add host network device",
> .mhandler.cmd = hmp_netdev_add,
> },
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 3614e6c..e42f4d6 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -15,6 +15,7 @@
>
> #include "net/net.h"
> #include "net/tap.h"
> +#include "net/vhost-user.h"
>
> #include "hw/virtio/virtio-net.h"
> #include "net/vhost_net.h"
> @@ -174,15 +175,20 @@ static int vhost_net_start_one(struct vhost_net *net,
> goto fail_start;
> }
>
> - net->nc->info->poll(net->nc, false);
> - qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
> - file.fd = net->backend;
> - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> - const VhostOps *vhost_ops = net->dev.vhost_ops;
> - r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
> - if (r < 0) {
> - r = -errno;
> - goto fail;
> + if (net->nc->info->poll) {
> + net->nc->info->poll(net->nc, false);
> + }
> +
> + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
> + qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
> + file.fd = net->backend;
> + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> + const VhostOps *vhost_ops = net->dev.vhost_ops;
> + r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
> + if (r < 0) {
> + r = -errno;
> + goto fail;
> + }
> }
> }
> return 0;
> @@ -193,7 +199,9 @@ fail:
> int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
> assert(r >= 0);
> }
> - net->nc->info->poll(net->nc, true);
> + if (net->nc->info->poll) {
> + net->nc->info->poll(net->nc, true);
> + }
> vhost_dev_stop(&net->dev, dev);
> fail_start:
> vhost_dev_disable_notifiers(&net->dev, dev);
> @@ -215,7 +223,9 @@ static void vhost_net_stop_one(struct vhost_net *net,
> int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND, &file);
> assert(r >= 0);
> }
> - net->nc->info->poll(net->nc, true);
> + if (net->nc->info->poll) {
> + net->nc->info->poll(net->nc, true);
> + }
> vhost_dev_stop(&net->dev, dev);
> vhost_dev_disable_notifiers(&net->dev, dev);
> }
> @@ -235,7 +245,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
> }
>
> for (i = 0; i < total_queues; i++) {
> - r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i * 2);
> + r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2);
>
> if (r < 0) {
> goto err;
> @@ -252,7 +262,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>
> err:
> while (--i >= 0) {
> - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
> }
> return r;
> }
> @@ -273,7 +283,7 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
> assert(r >= 0);
>
> for (i = 0; i < total_queues; i++) {
> - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
> }
> }
>
> @@ -293,6 +303,29 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> {
> vhost_virtqueue_mask(&net->dev, dev, idx, mask);
> }
> +
> +VHostNetState *get_vhost_net(NetClientState *nc)
> +{
> + VHostNetState *vhost_net = 0;
> +
> + if (!nc) {
> + return 0;
> + }
> +
> + switch (nc->info->type) {
> + case NET_CLIENT_OPTIONS_KIND_TAP:
> + vhost_net = tap_get_vhost_net(nc);
> + break;
> + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> + vhost_net = vhost_user_get_vhost_net(nc);
> + break;
> + default:
> + break;
> + }
> +
> + return vhost_net;
> +}
> +
> #else
> struct vhost_net *vhost_net_init(VhostNetOptions *options)
> {
> @@ -338,4 +371,9 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> int idx, bool mask)
> {
> }
> +
> +VHostNetState *get_vhost_net(NetClientState *nc)
> +{
> + return 0;
> +}
> #endif
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 3626608..d49ee82 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -105,14 +105,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
> NetClientState *nc = qemu_get_queue(n->nic);
> int queues = n->multiqueue ? n->max_queues : 1;
>
> - if (!nc->peer) {
> - return;
> - }
> - if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> - return;
> - }
> -
> - if (!tap_get_vhost_net(nc->peer)) {
> + if (!get_vhost_net(nc->peer)) {
> return;
> }
>
> @@ -122,7 +115,7 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
> }
> if (!n->vhost_started) {
> int r;
> - if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) {
> + if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) {
> return;
> }
> n->vhost_started = 1;
> @@ -325,11 +318,16 @@ static void peer_test_vnet_hdr(VirtIONet *n)
> return;
> }
>
> - if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> - return;
> + switch (nc->peer->info->type) {
> + case NET_CLIENT_OPTIONS_KIND_TAP:
> + n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
> + break;
> + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> + n->has_vnet_hdr = 0;
> + break;
> + default:
> + break;
> }
> -
> - n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
> }
>
> static int peer_has_vnet_hdr(VirtIONet *n)
> @@ -437,13 +435,10 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
> features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
> }
>
> - if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> + if (!get_vhost_net(nc->peer)) {
> return features;
> }
> - if (!tap_get_vhost_net(nc->peer)) {
> - return features;
> - }
> - return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
> + return vhost_net_get_features(get_vhost_net(nc->peer), features);
> }
>
> static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
> @@ -507,13 +502,10 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
> for (i = 0; i < n->max_queues; i++) {
> NetClientState *nc = qemu_get_subqueue(n->nic, i);
>
> - if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> - continue;
> - }
> - if (!tap_get_vhost_net(nc->peer)) {
> + if (!get_vhost_net(nc->peer)) {
> continue;
> }
> - vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
> + vhost_net_ack_features(get_vhost_net(nc->peer), features);
> }
> }
>
> @@ -1443,7 +1435,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
> VirtIONet *n = VIRTIO_NET(vdev);
> NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
> assert(n->vhost_started);
> - return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
> + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
> }
>
> static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
> @@ -1452,7 +1444,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
> VirtIONet *n = VIRTIO_NET(vdev);
> NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
> assert(n->vhost_started);
> - vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
> + vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
> vdev, idx, mask);
> }
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index a1137e1..fe622fb 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -14,7 +14,6 @@
> */
>
> #include "hw/virtio/vhost.h"
> -#include "hw/virtio/vhost-backend.h"
> #include "hw/hw.h"
> #include "qemu/atomic.h"
> #include "qemu/range.h"
> diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
> new file mode 100644
> index 0000000..85109f6
> --- /dev/null
> +++ b/include/net/vhost-user.h
> @@ -0,0 +1,17 @@
> +/*
> + * vhost-user.h
> + *
> + * Copyright (c) 2013 Virtual Open Systems Sarl.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef VHOST_USER_H_
> +#define VHOST_USER_H_
> +
> +struct vhost_net;
> +struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
> +
> +#endif /* VHOST_USER_H_ */
> diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> index 1169562..abd3d0b 100644
> --- a/include/net/vhost_net.h
> +++ b/include/net/vhost_net.h
> @@ -31,4 +31,5 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features);
> bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
> void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> int idx, bool mask);
> +VHostNetState *get_vhost_net(NetClientState *nc);
> #endif
> diff --git a/net/Makefile.objs b/net/Makefile.objs
> index c25fe69..301f6b6 100644
> --- a/net/Makefile.objs
> +++ b/net/Makefile.objs
> @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
> common-obj-y += socket.o
> common-obj-y += dump.o
> common-obj-y += eth.o
> -common-obj-$(CONFIG_POSIX) += tap.o
> +common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
> common-obj-$(CONFIG_LINUX) += tap-linux.o
> common-obj-$(CONFIG_WIN32) += tap-win32.o
> common-obj-$(CONFIG_BSD) += tap-bsd.o
> diff --git a/net/clients.h b/net/clients.h
> index 7322ff5..7f3d4ae 100644
> --- a/net/clients.h
> +++ b/net/clients.h
> @@ -57,4 +57,7 @@ int net_init_netmap(const NetClientOptions *opts, const char *name,
> NetClientState *peer);
> #endif
>
> +int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> + NetClientState *peer);
> +
> #endif /* QEMU_NET_CLIENTS_H */
> diff --git a/net/hub.c b/net/hub.c
> index 33a99c9..7e0f2d6 100644
> --- a/net/hub.c
> +++ b/net/hub.c
> @@ -322,6 +322,7 @@ void net_hub_check_clients(void)
> case NET_CLIENT_OPTIONS_KIND_TAP:
> case NET_CLIENT_OPTIONS_KIND_SOCKET:
> case NET_CLIENT_OPTIONS_KIND_VDE:
> + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> has_host_dev = 1;
> break;
> default:
> diff --git a/net/net.c b/net/net.c
> index 9db88cc..0f057c5 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -734,6 +734,7 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
> [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge,
> #endif
> [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport,
> + [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
> };
>
>
> @@ -767,6 +768,7 @@ static int net_client_init1(const void *object, int is_netdev, Error **errp)
> case NET_CLIENT_OPTIONS_KIND_BRIDGE:
> #endif
> case NET_CLIENT_OPTIONS_KIND_HUBPORT:
> + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> break;
>
> default:
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> new file mode 100644
> index 0000000..6fd5afc
> --- /dev/null
> +++ b/net/vhost-user.c
> @@ -0,0 +1,111 @@
> +/*
> + * vhost-user.c
> + *
> + * Copyright (c) 2013 Virtual Open Systems Sarl.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "clients.h"
> +#include "net/vhost_net.h"
> +#include "net/vhost-user.h"
> +#include "qemu/error-report.h"
> +
> +typedef struct VhostUserState {
> + NetClientState nc;
> + VHostNetState *vhost_net;
> + char *devpath;
> +} VhostUserState;
> +
> +VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> +{
> + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> + return s->vhost_net;
> +}
> +
> +static int vhost_user_running(VhostUserState *s)
> +{
> + return (s->vhost_net) ? 1 : 0;
> +}
> +
> +static int vhost_user_start(VhostUserState *s)
> +{
> + VhostNetOptions options;
> +
> + if (vhost_user_running(s)) {
> + return 1;
> + }
> +
> + options.backend_type = VHOST_BACKEND_TYPE_USER;
> + options.net_backend = &s->nc;
> + options.devpath = s->devpath;
> + options.devfd = -1;
> + options.force = 1;
> +
> + s->vhost_net = vhost_net_init(&options);
> +
> + return vhost_user_running(s) ? 0 : -1;
> +}
> +
> +static void vhost_user_stop(VhostUserState *s)
> +{
> + if (vhost_user_running(s)) {
> + vhost_net_cleanup(s->vhost_net);
> + }
> +
> + s->vhost_net = 0;
> +}
> +
> +static void vhost_user_cleanup(NetClientState *nc)
> +{
> + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> +
> + vhost_user_stop(s);
> + qemu_purge_queued_packets(nc);
> +}
> +
> +static NetClientInfo net_vhost_user_info = {
> + .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
> + .size = sizeof(VhostUserState),
> + .cleanup = vhost_user_cleanup,
> +};
> +
> +static int net_vhost_user_init(NetClientState *peer, const char *device,
> + const char *name, const char *filename)
> +{
> + NetClientState *nc;
> + VhostUserState *s;
> + int r;
> +
> + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> +
> + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s", filename);
> +
> + s = DO_UPCAST(VhostUserState, nc, nc);
> +
> + /* We don't provide a receive callback */
> + s->nc.receive_disabled = 1;
> +
> + s->devpath = g_strdup(filename);
> +
> + r = vhost_user_start(s);
> +
> + return r;
> +}
> +
> +int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> + NetClientState *peer)
> +{
> + const char *file;
> + const NetdevVhostUserOptions *vhost_user;
> +
> + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> + vhost_user = opts->vhost_user;
> +
> + file = vhost_user->file;
> +
> + return net_vhost_user_init(peer, "vhost_user", name, file);
> +}
> diff --git a/qapi-schema.json b/qapi-schema.json
> index c3c939c..3101287 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -3030,6 +3030,21 @@
> '*devname': 'str' } }
>
> ##
> +# @NetdevVhostUserOptions
> +#
> +# Vhost-user network backend
> +#
> +# @file: control socket path
> +#
> +# Since 2.0
> +##
> +{ 'type': 'NetdevVhostUserOptions',
> + 'data': {
> + 'file': 'str' } }
> +
> +##
> +
> +##
> # @NetClientOptions
> #
> # A discriminated record of network device traits.
> @@ -3047,7 +3062,8 @@
> 'dump': 'NetdevDumpOptions',
> 'bridge': 'NetdevBridgeOptions',
> 'hubport': 'NetdevHubPortOptions',
> - 'netmap': 'NetdevNetmapOptions' } }
> + 'netmap': 'NetdevNetmapOptions',
> + 'vhost-user': 'NetdevVhostUserOptions' } }
>
> ##
> # @NetLegacy
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 0d35c9c..edf25d0 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -1419,6 +1419,8 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
> " VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
> " netmap device, defaults to '/dev/netmap')\n"
> #endif
> + "-net vhost-user,file=name\n"
> + " connect to a unix domain socket implementing vhost-user backend\n"
I'm guessing this does not work, you really must use this with -netdev?
> "-net dump[,vlan=n][,file=f][,len=n]\n"
> " dump traffic on vlan 'n' to file 'f' (max n bytes per packet)\n"
> "-net none use it alone to have zero network devices. If no -net option\n"
> @@ -1436,6 +1438,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
> #ifdef CONFIG_NETMAP
> "netmap|"
> #endif
> + "vhost-user|"
> "socket|"
> "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
> STEXI
> --
> 1.8.3.2
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 6/7] Add new vhost-user netdev backend
2014-01-09 16:14 ` Michael S. Tsirkin
@ 2014-01-10 11:00 ` Antonios Motakis
0 siblings, 0 replies; 24+ messages in thread
From: Antonios Motakis @ 2014-01-10 11:00 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: snabb-devel, Anthony Liguori, VirtualOpenSystems Technical Team,
Jason Wang, Michael Tokarev, qemu-devel qemu-devel,
Nikolay Nikolaev, Markus Armbruster, Stefan Hajnoczi, Luke Gorrie,
Paolo Bonzini, Luiz Capitulino
[-- Attachment #1: Type: text/plain, Size: 19649 bytes --]
On Thu, Jan 9, 2014 at 5:14 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Jan 09, 2014 at 04:00:00PM +0100, Antonios Motakis wrote:
> > Add a new QEMU netdev backend that is intended to invoke vhost_net
> > with the vhost-user backend. Also decouple virtio-net from the tap
> > backend.
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> > hmp-commands.hx | 4 +-
> > hw/net/vhost_net.c | 66 ++++++++++++++++++++++------
> > hw/net/virtio-net.c | 42 ++++++++----------
> > hw/virtio/vhost.c | 1 -
> > include/net/vhost-user.h | 17 ++++++++
> > include/net/vhost_net.h | 1 +
> > net/Makefile.objs | 2 +-
> > net/clients.h | 3 ++
> > net/hub.c | 1 +
> > net/net.c | 2 +
> > net/vhost-user.c | 111
> +++++++++++++++++++++++++++++++++++++++++++++++
> > qapi-schema.json | 18 +++++++-
> > qemu-options.hx | 3 ++
> > 13 files changed, 227 insertions(+), 44 deletions(-)
> > create mode 100644 include/net/vhost-user.h
> > create mode 100644 net/vhost-user.c
> >
> > diff --git a/hmp-commands.hx b/hmp-commands.hx
> > index ebe8e78..d5a3774 100644
> > --- a/hmp-commands.hx
> > +++ b/hmp-commands.hx
> > @@ -1190,7 +1190,7 @@ ETEXI
> > {
> > .name = "host_net_add",
> > .args_type = "device:s,opts:s?",
> > - .params = "tap|user|socket|vde|netmap|dump [options]",
> > + .params = "tap|user|socket|vde|netmap|vhost-user|dump
> [options]",
> > .help = "add host VLAN client",
> > .mhandler.cmd = net_host_device_add,
> > },
> > @@ -1218,7 +1218,7 @@ ETEXI
> > {
> > .name = "netdev_add",
> > .args_type = "netdev:O",
> > - .params =
> "[user|tap|socket|hubport|netmap],id=str[,prop=value][,...]",
> > + .params =
> "[user|tap|socket|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
> > .help = "add host network device",
> > .mhandler.cmd = hmp_netdev_add,
> > },
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 3614e6c..e42f4d6 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -15,6 +15,7 @@
> >
> > #include "net/net.h"
> > #include "net/tap.h"
> > +#include "net/vhost-user.h"
> >
> > #include "hw/virtio/virtio-net.h"
> > #include "net/vhost_net.h"
> > @@ -174,15 +175,20 @@ static int vhost_net_start_one(struct vhost_net
> *net,
> > goto fail_start;
> > }
> >
> > - net->nc->info->poll(net->nc, false);
> > - qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
> > - file.fd = net->backend;
> > - for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> > - const VhostOps *vhost_ops = net->dev.vhost_ops;
> > - r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
> &file);
> > - if (r < 0) {
> > - r = -errno;
> > - goto fail;
> > + if (net->nc->info->poll) {
> > + net->nc->info->poll(net->nc, false);
> > + }
> > +
> > + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP) {
> > + qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
> > + file.fd = net->backend;
> > + for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
> > + const VhostOps *vhost_ops = net->dev.vhost_ops;
> > + r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
> &file);
> > + if (r < 0) {
> > + r = -errno;
> > + goto fail;
> > + }
> > }
> > }
> > return 0;
> > @@ -193,7 +199,9 @@ fail:
> > int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
> &file);
> > assert(r >= 0);
> > }
> > - net->nc->info->poll(net->nc, true);
> > + if (net->nc->info->poll) {
> > + net->nc->info->poll(net->nc, true);
> > + }
> > vhost_dev_stop(&net->dev, dev);
> > fail_start:
> > vhost_dev_disable_notifiers(&net->dev, dev);
> > @@ -215,7 +223,9 @@ static void vhost_net_stop_one(struct vhost_net *net,
> > int r = vhost_ops->vhost_call(&net->dev, VHOST_NET_SET_BACKEND,
> &file);
> > assert(r >= 0);
> > }
> > - net->nc->info->poll(net->nc, true);
> > + if (net->nc->info->poll) {
> > + net->nc->info->poll(net->nc, true);
> > + }
> > vhost_dev_stop(&net->dev, dev);
> > vhost_dev_disable_notifiers(&net->dev, dev);
> > }
> > @@ -235,7 +245,7 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> > }
> >
> > for (i = 0; i < total_queues; i++) {
> > - r = vhost_net_start_one(tap_get_vhost_net(ncs[i].peer), dev, i
> * 2);
> > + r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2);
> >
> > if (r < 0) {
> > goto err;
> > @@ -252,7 +262,7 @@ int vhost_net_start(VirtIODevice *dev,
> NetClientState *ncs,
> >
> > err:
> > while (--i >= 0) {
> > - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> > + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
> > }
> > return r;
> > }
> > @@ -273,7 +283,7 @@ void vhost_net_stop(VirtIODevice *dev,
> NetClientState *ncs,
> > assert(r >= 0);
> >
> > for (i = 0; i < total_queues; i++) {
> > - vhost_net_stop_one(tap_get_vhost_net(ncs[i].peer), dev);
> > + vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
> > }
> > }
> >
> > @@ -293,6 +303,29 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> VirtIODevice *dev,
> > {
> > vhost_virtqueue_mask(&net->dev, dev, idx, mask);
> > }
> > +
> > +VHostNetState *get_vhost_net(NetClientState *nc)
> > +{
> > + VHostNetState *vhost_net = 0;
> > +
> > + if (!nc) {
> > + return 0;
> > + }
> > +
> > + switch (nc->info->type) {
> > + case NET_CLIENT_OPTIONS_KIND_TAP:
> > + vhost_net = tap_get_vhost_net(nc);
> > + break;
> > + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > + vhost_net = vhost_user_get_vhost_net(nc);
> > + break;
> > + default:
> > + break;
> > + }
> > +
> > + return vhost_net;
> > +}
> > +
> > #else
> > struct vhost_net *vhost_net_init(VhostNetOptions *options)
> > {
> > @@ -338,4 +371,9 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> VirtIODevice *dev,
> > int idx, bool mask)
> > {
> > }
> > +
> > +VHostNetState *get_vhost_net(NetClientState *nc)
> > +{
> > + return 0;
> > +}
> > #endif
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 3626608..d49ee82 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -105,14 +105,7 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> > NetClientState *nc = qemu_get_queue(n->nic);
> > int queues = n->multiqueue ? n->max_queues : 1;
> >
> > - if (!nc->peer) {
> > - return;
> > - }
> > - if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> > - return;
> > - }
> > -
> > - if (!tap_get_vhost_net(nc->peer)) {
> > + if (!get_vhost_net(nc->peer)) {
> > return;
> > }
> >
> > @@ -122,7 +115,7 @@ static void virtio_net_vhost_status(VirtIONet *n,
> uint8_t status)
> > }
> > if (!n->vhost_started) {
> > int r;
> > - if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) {
> > + if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) {
> > return;
> > }
> > n->vhost_started = 1;
> > @@ -325,11 +318,16 @@ static void peer_test_vnet_hdr(VirtIONet *n)
> > return;
> > }
> >
> > - if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
> > - return;
> > + switch (nc->peer->info->type) {
> > + case NET_CLIENT_OPTIONS_KIND_TAP:
> > + n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
> > + break;
> > + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > + n->has_vnet_hdr = 0;
> > + break;
> > + default:
> > + break;
> > }
> > -
> > - n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
> > }
> >
> > static int peer_has_vnet_hdr(VirtIONet *n)
> > @@ -437,13 +435,10 @@ static uint32_t
> virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
> > features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
> > }
> >
> > - if (!nc->peer || nc->peer->info->type !=
> NET_CLIENT_OPTIONS_KIND_TAP) {
> > + if (!get_vhost_net(nc->peer)) {
> > return features;
> > }
> > - if (!tap_get_vhost_net(nc->peer)) {
> > - return features;
> > - }
> > - return vhost_net_get_features(tap_get_vhost_net(nc->peer),
> features);
> > + return vhost_net_get_features(get_vhost_net(nc->peer), features);
> > }
> >
> > static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
> > @@ -507,13 +502,10 @@ static void virtio_net_set_features(VirtIODevice
> *vdev, uint32_t features)
> > for (i = 0; i < n->max_queues; i++) {
> > NetClientState *nc = qemu_get_subqueue(n->nic, i);
> >
> > - if (!nc->peer || nc->peer->info->type !=
> NET_CLIENT_OPTIONS_KIND_TAP) {
> > - continue;
> > - }
> > - if (!tap_get_vhost_net(nc->peer)) {
> > + if (!get_vhost_net(nc->peer)) {
> > continue;
> > }
> > - vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
> > + vhost_net_ack_features(get_vhost_net(nc->peer), features);
> > }
> > }
> >
> > @@ -1443,7 +1435,7 @@ static bool
> virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
> > VirtIONet *n = VIRTIO_NET(vdev);
> > NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
> > assert(n->vhost_started);
> > - return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer),
> idx);
> > + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
> > }
> >
> > static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
> > @@ -1452,7 +1444,7 @@ static void
> virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
> > VirtIONet *n = VIRTIO_NET(vdev);
> > NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
> > assert(n->vhost_started);
> > - vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
> > + vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
> > vdev, idx, mask);
> > }
> >
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index a1137e1..fe622fb 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -14,7 +14,6 @@
> > */
> >
> > #include "hw/virtio/vhost.h"
> > -#include "hw/virtio/vhost-backend.h"
> > #include "hw/hw.h"
> > #include "qemu/atomic.h"
> > #include "qemu/range.h"
> > diff --git a/include/net/vhost-user.h b/include/net/vhost-user.h
> > new file mode 100644
> > index 0000000..85109f6
> > --- /dev/null
> > +++ b/include/net/vhost-user.h
> > @@ -0,0 +1,17 @@
> > +/*
> > + * vhost-user.h
> > + *
> > + * Copyright (c) 2013 Virtual Open Systems Sarl.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef VHOST_USER_H_
> > +#define VHOST_USER_H_
> > +
> > +struct vhost_net;
> > +struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc);
> > +
> > +#endif /* VHOST_USER_H_ */
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index 1169562..abd3d0b 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -31,4 +31,5 @@ void vhost_net_ack_features(VHostNetState *net,
> unsigned features);
> > bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
> > void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> > int idx, bool mask);
> > +VHostNetState *get_vhost_net(NetClientState *nc);
> > #endif
> > diff --git a/net/Makefile.objs b/net/Makefile.objs
> > index c25fe69..301f6b6 100644
> > --- a/net/Makefile.objs
> > +++ b/net/Makefile.objs
> > @@ -2,7 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o
> > common-obj-y += socket.o
> > common-obj-y += dump.o
> > common-obj-y += eth.o
> > -common-obj-$(CONFIG_POSIX) += tap.o
> > +common-obj-$(CONFIG_POSIX) += tap.o vhost-user.o
> > common-obj-$(CONFIG_LINUX) += tap-linux.o
> > common-obj-$(CONFIG_WIN32) += tap-win32.o
> > common-obj-$(CONFIG_BSD) += tap-bsd.o
> > diff --git a/net/clients.h b/net/clients.h
> > index 7322ff5..7f3d4ae 100644
> > --- a/net/clients.h
> > +++ b/net/clients.h
> > @@ -57,4 +57,7 @@ int net_init_netmap(const NetClientOptions *opts,
> const char *name,
> > NetClientState *peer);
> > #endif
> >
> > +int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > + NetClientState *peer);
> > +
> > #endif /* QEMU_NET_CLIENTS_H */
> > diff --git a/net/hub.c b/net/hub.c
> > index 33a99c9..7e0f2d6 100644
> > --- a/net/hub.c
> > +++ b/net/hub.c
> > @@ -322,6 +322,7 @@ void net_hub_check_clients(void)
> > case NET_CLIENT_OPTIONS_KIND_TAP:
> > case NET_CLIENT_OPTIONS_KIND_SOCKET:
> > case NET_CLIENT_OPTIONS_KIND_VDE:
> > + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > has_host_dev = 1;
> > break;
> > default:
> > diff --git a/net/net.c b/net/net.c
> > index 9db88cc..0f057c5 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -734,6 +734,7 @@ static int (* const
> net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])(
> > [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge,
> > #endif
> > [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport,
> > + [NET_CLIENT_OPTIONS_KIND_VHOST_USER] = net_init_vhost_user,
> > };
> >
> >
> > @@ -767,6 +768,7 @@ static int net_client_init1(const void *object, int
> is_netdev, Error **errp)
> > case NET_CLIENT_OPTIONS_KIND_BRIDGE:
> > #endif
> > case NET_CLIENT_OPTIONS_KIND_HUBPORT:
> > + case NET_CLIENT_OPTIONS_KIND_VHOST_USER:
> > break;
> >
> > default:
> > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > new file mode 100644
> > index 0000000..6fd5afc
> > --- /dev/null
> > +++ b/net/vhost-user.c
> > @@ -0,0 +1,111 @@
> > +/*
> > + * vhost-user.c
> > + *
> > + * Copyright (c) 2013 Virtual Open Systems Sarl.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "clients.h"
> > +#include "net/vhost_net.h"
> > +#include "net/vhost-user.h"
> > +#include "qemu/error-report.h"
> > +
> > +typedef struct VhostUserState {
> > + NetClientState nc;
> > + VHostNetState *vhost_net;
> > + char *devpath;
> > +} VhostUserState;
> > +
> > +VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> > +{
> > + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > + assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> > + return s->vhost_net;
> > +}
> > +
> > +static int vhost_user_running(VhostUserState *s)
> > +{
> > + return (s->vhost_net) ? 1 : 0;
> > +}
> > +
> > +static int vhost_user_start(VhostUserState *s)
> > +{
> > + VhostNetOptions options;
> > +
> > + if (vhost_user_running(s)) {
> > + return 1;
> > + }
> > +
> > + options.backend_type = VHOST_BACKEND_TYPE_USER;
> > + options.net_backend = &s->nc;
> > + options.devpath = s->devpath;
> > + options.devfd = -1;
> > + options.force = 1;
> > +
> > + s->vhost_net = vhost_net_init(&options);
> > +
> > + return vhost_user_running(s) ? 0 : -1;
> > +}
> > +
> > +static void vhost_user_stop(VhostUserState *s)
> > +{
> > + if (vhost_user_running(s)) {
> > + vhost_net_cleanup(s->vhost_net);
> > + }
> > +
> > + s->vhost_net = 0;
> > +}
> > +
> > +static void vhost_user_cleanup(NetClientState *nc)
> > +{
> > + VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > +
> > + vhost_user_stop(s);
> > + qemu_purge_queued_packets(nc);
> > +}
> > +
> > +static NetClientInfo net_vhost_user_info = {
> > + .type = NET_CLIENT_OPTIONS_KIND_VHOST_USER,
> > + .size = sizeof(VhostUserState),
> > + .cleanup = vhost_user_cleanup,
> > +};
> > +
> > +static int net_vhost_user_init(NetClientState *peer, const char *device,
> > + const char *name, const char *filename)
> > +{
> > + NetClientState *nc;
> > + VhostUserState *s;
> > + int r;
> > +
> > + nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
> > +
> > + snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
> filename);
> > +
> > + s = DO_UPCAST(VhostUserState, nc, nc);
> > +
> > + /* We don't provide a receive callback */
> > + s->nc.receive_disabled = 1;
> > +
> > + s->devpath = g_strdup(filename);
> > +
> > + r = vhost_user_start(s);
> > +
> > + return r;
> > +}
> > +
> > +int net_init_vhost_user(const NetClientOptions *opts, const char *name,
> > + NetClientState *peer)
> > +{
> > + const char *file;
> > + const NetdevVhostUserOptions *vhost_user;
> > +
> > + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
> > + vhost_user = opts->vhost_user;
> > +
> > + file = vhost_user->file;
> > +
> > + return net_vhost_user_init(peer, "vhost_user", name, file);
> > +}
> > diff --git a/qapi-schema.json b/qapi-schema.json
> > index c3c939c..3101287 100644
> > --- a/qapi-schema.json
> > +++ b/qapi-schema.json
> > @@ -3030,6 +3030,21 @@
> > '*devname': 'str' } }
> >
> > ##
> > +# @NetdevVhostUserOptions
> > +#
> > +# Vhost-user network backend
> > +#
> > +# @file: control socket path
> > +#
> > +# Since 2.0
> > +##
> > +{ 'type': 'NetdevVhostUserOptions',
> > + 'data': {
> > + 'file': 'str' } }
> > +
> > +##
> > +
> > +##
> > # @NetClientOptions
> > #
> > # A discriminated record of network device traits.
> > @@ -3047,7 +3062,8 @@
> > 'dump': 'NetdevDumpOptions',
> > 'bridge': 'NetdevBridgeOptions',
> > 'hubport': 'NetdevHubPortOptions',
> > - 'netmap': 'NetdevNetmapOptions' } }
> > + 'netmap': 'NetdevNetmapOptions',
> > + 'vhost-user': 'NetdevVhostUserOptions' } }
> >
> > ##
> > # @NetLegacy
> > diff --git a/qemu-options.hx b/qemu-options.hx
> > index 0d35c9c..edf25d0 100644
> > --- a/qemu-options.hx
> > +++ b/qemu-options.hx
> > @@ -1419,6 +1419,8 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
> > " VALE port (created on the fly) called 'name'
> ('nmname' is name of the \n"
> > " netmap device, defaults to '/dev/netmap')\n"
> > #endif
> > + "-net vhost-user,file=name\n"
> > + " connect to a unix domain socket implementing
> vhost-user backend\n"
>
> I'm guessing this does not work, you really must use this with -netdev?
>
You are right, we will correct the message.
>
> > "-net dump[,vlan=n][,file=f][,len=n]\n"
> > " dump traffic on vlan 'n' to file 'f' (max n bytes
> per packet)\n"
> > "-net none use it alone to have zero network devices. If no
> -net option\n"
> > @@ -1436,6 +1438,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
> > #ifdef CONFIG_NETMAP
> > "netmap|"
> > #endif
> > + "vhost-user|"
> > "socket|"
> > "hubport],id=str[,option][,option][,...]\n", QEMU_ARCH_ALL)
> > STEXI
> > --
> > 1.8.3.2
> >
>
[-- Attachment #2: Type: text/html, Size: 24675 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
` (5 preceding siblings ...)
2014-01-09 15:00 ` [Qemu-devel] [PATCH v5 6/7] Add new vhost-user netdev backend Antonios Motakis
@ 2014-01-09 15:00 ` Antonios Motakis
2014-01-09 16:16 ` Michael S. Tsirkin
2014-01-09 16:11 ` [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Michael S. Tsirkin
7 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-09 15:00 UTC (permalink / raw)
To: qemu-devel, snabb-devel
Cc: Stefan Hajnoczi, Michael S. Tsirkin, Jason Wang, n.nikolaev,
Anthony Liguori, Paolo Bonzini, lukego, Antonios Motakis, tech
At runtime vhost-user netdev will detect if the vhost backend is up or down.
Upon disconnection it will set link_down accordingly and notify virtio-net.
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
hw/net/vhost_net.c | 16 +++++++++++
hw/virtio/vhost-backend.c | 16 +++++++++++
include/hw/virtio/vhost-backend.h | 2 ++
include/net/vhost_net.h | 1 +
net/vhost-user.c | 56 +++++++++++++++++++++++++++++++++++++++
5 files changed, 91 insertions(+)
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e42f4d6..56c218e 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -304,6 +304,17 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
+int vhost_net_link_status(VHostNetState *net)
+{
+ int r = 0;
+
+ if (net->dev.vhost_ops->vhost_status) {
+ r = net->dev.vhost_ops->vhost_status(&net->dev);
+ }
+
+ return r;
+}
+
VHostNetState *get_vhost_net(NetClientState *nc)
{
VHostNetState *vhost_net = 0;
@@ -372,6 +383,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
{
}
+int vhost_net_link_status(VHostNetState *net)
+{
+ return 0;
+}
+
VHostNetState *get_vhost_net(NetClientState *nc)
{
return 0;
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 50ea307..fcd274f 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -350,9 +350,23 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
}
}
+ /* mark the backend non operational */
+ if (result < 0) {
+ error_report("%s: Connection break detected\n", __func__);
+ vhost_user_cleanup(dev);
+ return 0;
+ }
+
return result;
}
+static int vhost_user_status(struct vhost_dev *dev)
+{
+ vhost_user_echo(dev);
+
+ return (dev->control >= 0);
+}
+
static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
{
int fd = -1;
@@ -432,6 +446,7 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
static const VhostOps user_ops = {
.backend_type = VHOST_BACKEND_TYPE_USER,
.vhost_call = vhost_user_call,
+ .vhost_status = vhost_user_status,
.vhost_backend_init = vhost_user_init,
.vhost_backend_cleanup = vhost_user_cleanup
};
@@ -464,6 +479,7 @@ static int vhost_kernel_cleanup(struct vhost_dev *dev)
static const VhostOps kernel_ops = {
.backend_type = VHOST_BACKEND_TYPE_KERNEL,
.vhost_call = vhost_kernel_call,
+ .vhost_status = 0,
.vhost_backend_init = vhost_kernel_init,
.vhost_backend_cleanup = vhost_kernel_cleanup
};
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index ef87ffa..f2b4a6c 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -22,12 +22,14 @@ struct vhost_dev;
typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request,
void *arg);
+typedef int (*vhost_status)(struct vhost_dev *dev);
typedef int (*vhost_backend_init)(struct vhost_dev *dev, const char *devpath);
typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
typedef struct VhostOps {
VhostBackendType backend_type;
vhost_call vhost_call;
+ vhost_status vhost_status;
vhost_backend_init vhost_backend_init;
vhost_backend_cleanup vhost_backend_cleanup;
} VhostOps;
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index abd3d0b..6390907 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -31,5 +31,6 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features);
bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask);
+int vhost_net_link_status(VHostNetState *net);
VHostNetState *get_vhost_net(NetClientState *nc);
#endif
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 6fd5afc..56f7dd4 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -12,6 +12,7 @@
#include "net/vhost_net.h"
#include "net/vhost-user.h"
#include "qemu/error-report.h"
+#include "qemu/timer.h"
typedef struct VhostUserState {
NetClientState nc;
@@ -19,6 +20,9 @@ typedef struct VhostUserState {
char *devpath;
} VhostUserState;
+static QEMUTimer *vhost_user_timer;
+#define VHOST_USER_TIMEOUT (1*1000)
+
VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
{
VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
@@ -31,6 +35,11 @@ static int vhost_user_running(VhostUserState *s)
return (s->vhost_net) ? 1 : 0;
}
+static int vhost_user_link_status(VhostUserState *s)
+{
+ return (!s->nc.link_down) && vhost_net_link_status(s->vhost_net);
+}
+
static int vhost_user_start(VhostUserState *s)
{
VhostNetOptions options;
@@ -59,6 +68,48 @@ static void vhost_user_stop(VhostUserState *s)
s->vhost_net = 0;
}
+static void vhost_user_timer_handler(void *opaque)
+{
+ VhostUserState *s = opaque;
+ int link_down = 0;
+
+ if (vhost_user_running(s)) {
+ if (!vhost_user_link_status(s)) {
+ link_down = 1;
+ }
+ } else {
+ vhost_user_start(s);
+ if (!vhost_user_running(s)) {
+ link_down = 1;
+ }
+ }
+
+ if (link_down != s->nc.link_down) {
+
+ s->nc.link_down = link_down;
+
+ if (s->nc.peer) {
+ s->nc.peer->link_down = link_down;
+ }
+
+ if (s->nc.info->link_status_changed) {
+ s->nc.info->link_status_changed(&s->nc);
+ }
+
+ if (s->nc.peer && s->nc.peer->info->link_status_changed) {
+ s->nc.peer->info->link_status_changed(s->nc.peer);
+ }
+
+ if (link_down) {
+ vhost_user_stop(s);
+ }
+ }
+
+ /* reschedule */
+ timer_mod(vhost_user_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + VHOST_USER_TIMEOUT);
+}
+
static void vhost_user_cleanup(NetClientState *nc)
{
VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
@@ -93,6 +144,11 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
r = vhost_user_start(s);
+ vhost_user_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
+ vhost_user_timer_handler, s);
+ timer_mod(vhost_user_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + VHOST_USER_TIMEOUT);
+
return r;
}
--
1.8.3.2
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection
2014-01-09 15:00 ` [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection Antonios Motakis
@ 2014-01-09 16:16 ` Michael S. Tsirkin
2014-01-10 10:59 ` Antonios Motakis
0 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-09 16:16 UTC (permalink / raw)
To: Antonios Motakis
Cc: snabb-devel, Anthony Liguori, Jason Wang, qemu-devel, n.nikolaev,
Stefan Hajnoczi, lukego, Paolo Bonzini, tech
On Thu, Jan 09, 2014 at 04:00:01PM +0100, Antonios Motakis wrote:
> At runtime vhost-user netdev will detect if the vhost backend is up or down.
> Upon disconnection it will set link_down accordingly and notify virtio-net.
And then what happens?
>
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
> hw/net/vhost_net.c | 16 +++++++++++
> hw/virtio/vhost-backend.c | 16 +++++++++++
> include/hw/virtio/vhost-backend.h | 2 ++
> include/net/vhost_net.h | 1 +
> net/vhost-user.c | 56 +++++++++++++++++++++++++++++++++++++++
> 5 files changed, 91 insertions(+)
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index e42f4d6..56c218e 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -304,6 +304,17 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> vhost_virtqueue_mask(&net->dev, dev, idx, mask);
> }
>
> +int vhost_net_link_status(VHostNetState *net)
> +{
> + int r = 0;
> +
> + if (net->dev.vhost_ops->vhost_status) {
> + r = net->dev.vhost_ops->vhost_status(&net->dev);
> + }
> +
> + return r;
> +}
> +
> VHostNetState *get_vhost_net(NetClientState *nc)
> {
> VHostNetState *vhost_net = 0;
> @@ -372,6 +383,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> {
> }
>
> +int vhost_net_link_status(VHostNetState *net)
> +{
> + return 0;
> +}
> +
> VHostNetState *get_vhost_net(NetClientState *nc)
> {
> return 0;
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 50ea307..fcd274f 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -350,9 +350,23 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
> }
> }
>
> + /* mark the backend non operational */
> + if (result < 0) {
> + error_report("%s: Connection break detected\n", __func__);
> + vhost_user_cleanup(dev);
> + return 0;
> + }
> +
> return result;
> }
>
> +static int vhost_user_status(struct vhost_dev *dev)
> +{
> + vhost_user_echo(dev);
> +
> + return (dev->control >= 0);
> +}
> +
> static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
> {
> int fd = -1;
> @@ -432,6 +446,7 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
> static const VhostOps user_ops = {
> .backend_type = VHOST_BACKEND_TYPE_USER,
> .vhost_call = vhost_user_call,
> + .vhost_status = vhost_user_status,
> .vhost_backend_init = vhost_user_init,
> .vhost_backend_cleanup = vhost_user_cleanup
> };
> @@ -464,6 +479,7 @@ static int vhost_kernel_cleanup(struct vhost_dev *dev)
> static const VhostOps kernel_ops = {
> .backend_type = VHOST_BACKEND_TYPE_KERNEL,
> .vhost_call = vhost_kernel_call,
> + .vhost_status = 0,
> .vhost_backend_init = vhost_kernel_init,
> .vhost_backend_cleanup = vhost_kernel_cleanup
> };
> diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
> index ef87ffa..f2b4a6c 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -22,12 +22,14 @@ struct vhost_dev;
>
> typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int request,
> void *arg);
> +typedef int (*vhost_status)(struct vhost_dev *dev);
> typedef int (*vhost_backend_init)(struct vhost_dev *dev, const char *devpath);
> typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
>
> typedef struct VhostOps {
> VhostBackendType backend_type;
> vhost_call vhost_call;
> + vhost_status vhost_status;
> vhost_backend_init vhost_backend_init;
> vhost_backend_cleanup vhost_backend_cleanup;
> } VhostOps;
> diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> index abd3d0b..6390907 100644
> --- a/include/net/vhost_net.h
> +++ b/include/net/vhost_net.h
> @@ -31,5 +31,6 @@ void vhost_net_ack_features(VHostNetState *net, unsigned features);
> bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
> void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> int idx, bool mask);
> +int vhost_net_link_status(VHostNetState *net);
> VHostNetState *get_vhost_net(NetClientState *nc);
> #endif
> diff --git a/net/vhost-user.c b/net/vhost-user.c
> index 6fd5afc..56f7dd4 100644
> --- a/net/vhost-user.c
> +++ b/net/vhost-user.c
> @@ -12,6 +12,7 @@
> #include "net/vhost_net.h"
> #include "net/vhost-user.h"
> #include "qemu/error-report.h"
> +#include "qemu/timer.h"
>
> typedef struct VhostUserState {
> NetClientState nc;
> @@ -19,6 +20,9 @@ typedef struct VhostUserState {
> char *devpath;
> } VhostUserState;
>
> +static QEMUTimer *vhost_user_timer;
> +#define VHOST_USER_TIMEOUT (1*1000)
> +
> VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> {
> VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> @@ -31,6 +35,11 @@ static int vhost_user_running(VhostUserState *s)
> return (s->vhost_net) ? 1 : 0;
> }
>
> +static int vhost_user_link_status(VhostUserState *s)
> +{
> + return (!s->nc.link_down) && vhost_net_link_status(s->vhost_net);
> +}
> +
> static int vhost_user_start(VhostUserState *s)
> {
> VhostNetOptions options;
> @@ -59,6 +68,48 @@ static void vhost_user_stop(VhostUserState *s)
> s->vhost_net = 0;
> }
>
> +static void vhost_user_timer_handler(void *opaque)
> +{
> + VhostUserState *s = opaque;
> + int link_down = 0;
> +
> + if (vhost_user_running(s)) {
> + if (!vhost_user_link_status(s)) {
> + link_down = 1;
> + }
> + } else {
> + vhost_user_start(s);
> + if (!vhost_user_running(s)) {
> + link_down = 1;
> + }
> + }
> +
> + if (link_down != s->nc.link_down) {
> +
> + s->nc.link_down = link_down;
> +
> + if (s->nc.peer) {
> + s->nc.peer->link_down = link_down;
> + }
> +
> + if (s->nc.info->link_status_changed) {
> + s->nc.info->link_status_changed(&s->nc);
> + }
> +
> + if (s->nc.peer && s->nc.peer->info->link_status_changed) {
> + s->nc.peer->info->link_status_changed(s->nc.peer);
> + }
> +
> + if (link_down) {
> + vhost_user_stop(s);
> + }
> + }
> +
> + /* reschedule */
> + timer_mod(vhost_user_timer,
> + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + VHOST_USER_TIMEOUT);
> +}
> +
> static void vhost_user_cleanup(NetClientState *nc)
> {
> VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> @@ -93,6 +144,11 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
>
> r = vhost_user_start(s);
>
> + vhost_user_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
> + vhost_user_timer_handler, s);
> + timer_mod(vhost_user_timer,
> + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + VHOST_USER_TIMEOUT);
> +
> return r;
> }
>
> --
> 1.8.3.2
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection
2014-01-09 16:16 ` Michael S. Tsirkin
@ 2014-01-10 10:59 ` Antonios Motakis
2014-01-10 12:29 ` Michael S. Tsirkin
0 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-10 10:59 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: snabb-devel, Anthony Liguori, Jason Wang, qemu-devel qemu-devel,
Nikolay Nikolaev, Stefan Hajnoczi, Luke Gorrie, Paolo Bonzini,
VirtualOpenSystems Technical Team
[-- Attachment #1: Type: text/plain, Size: 7807 bytes --]
On Thu, Jan 9, 2014 at 5:16 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Jan 09, 2014 at 04:00:01PM +0100, Antonios Motakis wrote:
> > At runtime vhost-user netdev will detect if the vhost backend is up or
> down.
> > Upon disconnection it will set link_down accordingly and notify
> virtio-net.
>
> And then what happens?
>
The virtio-net interface goes down. On the next polling cycle the
connection will be re-attempted (see vhost_user_timer_handler).
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> > hw/net/vhost_net.c | 16 +++++++++++
> > hw/virtio/vhost-backend.c | 16 +++++++++++
> > include/hw/virtio/vhost-backend.h | 2 ++
> > include/net/vhost_net.h | 1 +
> > net/vhost-user.c | 56
> +++++++++++++++++++++++++++++++++++++++
> > 5 files changed, 91 insertions(+)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index e42f4d6..56c218e 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -304,6 +304,17 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> VirtIODevice *dev,
> > vhost_virtqueue_mask(&net->dev, dev, idx, mask);
> > }
> >
> > +int vhost_net_link_status(VHostNetState *net)
> > +{
> > + int r = 0;
> > +
> > + if (net->dev.vhost_ops->vhost_status) {
> > + r = net->dev.vhost_ops->vhost_status(&net->dev);
> > + }
> > +
> > + return r;
> > +}
> > +
> > VHostNetState *get_vhost_net(NetClientState *nc)
> > {
> > VHostNetState *vhost_net = 0;
> > @@ -372,6 +383,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> VirtIODevice *dev,
> > {
> > }
> >
> > +int vhost_net_link_status(VHostNetState *net)
> > +{
> > + return 0;
> > +}
> > +
> > VHostNetState *get_vhost_net(NetClientState *nc)
> > {
> > return 0;
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 50ea307..fcd274f 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -350,9 +350,23 @@ static int vhost_user_call(struct vhost_dev *dev,
> unsigned long int request,
> > }
> > }
> >
> > + /* mark the backend non operational */
> > + if (result < 0) {
> > + error_report("%s: Connection break detected\n", __func__);
> > + vhost_user_cleanup(dev);
> > + return 0;
> > + }
> > +
> > return result;
> > }
> >
> > +static int vhost_user_status(struct vhost_dev *dev)
> > +{
> > + vhost_user_echo(dev);
> > +
> > + return (dev->control >= 0);
> > +}
> > +
> > static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
> > {
> > int fd = -1;
> > @@ -432,6 +446,7 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
> > static const VhostOps user_ops = {
> > .backend_type = VHOST_BACKEND_TYPE_USER,
> > .vhost_call = vhost_user_call,
> > + .vhost_status = vhost_user_status,
> > .vhost_backend_init = vhost_user_init,
> > .vhost_backend_cleanup = vhost_user_cleanup
> > };
> > @@ -464,6 +479,7 @@ static int vhost_kernel_cleanup(struct vhost_dev
> *dev)
> > static const VhostOps kernel_ops = {
> > .backend_type = VHOST_BACKEND_TYPE_KERNEL,
> > .vhost_call = vhost_kernel_call,
> > + .vhost_status = 0,
> > .vhost_backend_init = vhost_kernel_init,
> > .vhost_backend_cleanup = vhost_kernel_cleanup
> > };
> > diff --git a/include/hw/virtio/vhost-backend.h
> b/include/hw/virtio/vhost-backend.h
> > index ef87ffa..f2b4a6c 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -22,12 +22,14 @@ struct vhost_dev;
> >
> > typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int
> request,
> > void *arg);
> > +typedef int (*vhost_status)(struct vhost_dev *dev);
> > typedef int (*vhost_backend_init)(struct vhost_dev *dev, const char
> *devpath);
> > typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
> >
> > typedef struct VhostOps {
> > VhostBackendType backend_type;
> > vhost_call vhost_call;
> > + vhost_status vhost_status;
> > vhost_backend_init vhost_backend_init;
> > vhost_backend_cleanup vhost_backend_cleanup;
> > } VhostOps;
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index abd3d0b..6390907 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -31,5 +31,6 @@ void vhost_net_ack_features(VHostNetState *net,
> unsigned features);
> > bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
> > void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> > int idx, bool mask);
> > +int vhost_net_link_status(VHostNetState *net);
> > VHostNetState *get_vhost_net(NetClientState *nc);
> > #endif
> > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > index 6fd5afc..56f7dd4 100644
> > --- a/net/vhost-user.c
> > +++ b/net/vhost-user.c
> > @@ -12,6 +12,7 @@
> > #include "net/vhost_net.h"
> > #include "net/vhost-user.h"
> > #include "qemu/error-report.h"
> > +#include "qemu/timer.h"
> >
> > typedef struct VhostUserState {
> > NetClientState nc;
> > @@ -19,6 +20,9 @@ typedef struct VhostUserState {
> > char *devpath;
> > } VhostUserState;
> >
> > +static QEMUTimer *vhost_user_timer;
> > +#define VHOST_USER_TIMEOUT (1*1000)
> > +
> > VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> > {
> > VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > @@ -31,6 +35,11 @@ static int vhost_user_running(VhostUserState *s)
> > return (s->vhost_net) ? 1 : 0;
> > }
> >
> > +static int vhost_user_link_status(VhostUserState *s)
> > +{
> > + return (!s->nc.link_down) && vhost_net_link_status(s->vhost_net);
> > +}
> > +
> > static int vhost_user_start(VhostUserState *s)
> > {
> > VhostNetOptions options;
> > @@ -59,6 +68,48 @@ static void vhost_user_stop(VhostUserState *s)
> > s->vhost_net = 0;
> > }
> >
> > +static void vhost_user_timer_handler(void *opaque)
> > +{
> > + VhostUserState *s = opaque;
> > + int link_down = 0;
> > +
> > + if (vhost_user_running(s)) {
> > + if (!vhost_user_link_status(s)) {
> > + link_down = 1;
> > + }
> > + } else {
> > + vhost_user_start(s);
> > + if (!vhost_user_running(s)) {
> > + link_down = 1;
> > + }
> > + }
> > +
> > + if (link_down != s->nc.link_down) {
> > +
> > + s->nc.link_down = link_down;
> > +
> > + if (s->nc.peer) {
> > + s->nc.peer->link_down = link_down;
> > + }
> > +
> > + if (s->nc.info->link_status_changed) {
> > + s->nc.info->link_status_changed(&s->nc);
> > + }
> > +
> > + if (s->nc.peer && s->nc.peer->info->link_status_changed) {
> > + s->nc.peer->info->link_status_changed(s->nc.peer);
> > + }
> > +
> > + if (link_down) {
> > + vhost_user_stop(s);
> > + }
> > + }
> > +
> > + /* reschedule */
> > + timer_mod(vhost_user_timer,
> > + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
> VHOST_USER_TIMEOUT);
> > +}
> > +
> > static void vhost_user_cleanup(NetClientState *nc)
> > {
> > VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > @@ -93,6 +144,11 @@ static int net_vhost_user_init(NetClientState *peer,
> const char *device,
> >
> > r = vhost_user_start(s);
> >
> > + vhost_user_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
> > + vhost_user_timer_handler, s);
> > + timer_mod(vhost_user_timer,
> > + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
> VHOST_USER_TIMEOUT);
> > +
> > return r;
> > }
> >
> > --
> > 1.8.3.2
> >
>
[-- Attachment #2: Type: text/html, Size: 9940 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection
2014-01-10 10:59 ` Antonios Motakis
@ 2014-01-10 12:29 ` Michael S. Tsirkin
0 siblings, 0 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-10 12:29 UTC (permalink / raw)
To: Antonios Motakis
Cc: snabb-devel, Anthony Liguori, Jason Wang, qemu-devel qemu-devel,
Nikolay Nikolaev, Stefan Hajnoczi, Luke Gorrie, Paolo Bonzini,
VirtualOpenSystems Technical Team
On Fri, Jan 10, 2014 at 11:59:24AM +0100, Antonios Motakis wrote:
>
>
>
> On Thu, Jan 9, 2014 at 5:16 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Jan 09, 2014 at 04:00:01PM +0100, Antonios Motakis wrote:
> > At runtime vhost-user netdev will detect if the vhost backend is up or
> down.
> > Upon disconnection it will set link_down accordingly and notify
> virtio-net.
>
> And then what happens?
>
>
> The virtio-net interface goes down. On the next polling cycle the connection
> will be re-attempted (see vhost_user_timer_handler).
>
I'm guessing user should have control over how often to retry then,
it's a policy thing.
>
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> > ---
> > hw/net/vhost_net.c | 16 +++++++++++
> > hw/virtio/vhost-backend.c | 16 +++++++++++
> > include/hw/virtio/vhost-backend.h | 2 ++
> > include/net/vhost_net.h | 1 +
> > net/vhost-user.c | 56
> +++++++++++++++++++++++++++++++++++++++
> > 5 files changed, 91 insertions(+)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index e42f4d6..56c218e 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -304,6 +304,17 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> VirtIODevice *dev,
> > vhost_virtqueue_mask(&net->dev, dev, idx, mask);
> > }
> >
> > +int vhost_net_link_status(VHostNetState *net)
> > +{
> > + int r = 0;
> > +
> > + if (net->dev.vhost_ops->vhost_status) {
> > + r = net->dev.vhost_ops->vhost_status(&net->dev);
> > + }
> > +
> > + return r;
> > +}
> > +
> > VHostNetState *get_vhost_net(NetClientState *nc)
> > {
> > VHostNetState *vhost_net = 0;
> > @@ -372,6 +383,11 @@ void vhost_net_virtqueue_mask(VHostNetState *net,
> VirtIODevice *dev,
> > {
> > }
> >
> > +int vhost_net_link_status(VHostNetState *net)
> > +{
> > + return 0;
> > +}
> > +
> > VHostNetState *get_vhost_net(NetClientState *nc)
> > {
> > return 0;
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 50ea307..fcd274f 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -350,9 +350,23 @@ static int vhost_user_call(struct vhost_dev *dev,
> unsigned long int request,
> > }
> > }
> >
> > + /* mark the backend non operational */
> > + if (result < 0) {
> > + error_report("%s: Connection break detected\n", __func__);
> > + vhost_user_cleanup(dev);
> > + return 0;
> > + }
> > +
> > return result;
> > }
> >
> > +static int vhost_user_status(struct vhost_dev *dev)
> > +{
> > + vhost_user_echo(dev);
> > +
> > + return (dev->control >= 0);
> > +}
> > +
> > static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
> > {
> > int fd = -1;
> > @@ -432,6 +446,7 @@ static int vhost_user_cleanup(struct vhost_dev *dev)
> > static const VhostOps user_ops = {
> > .backend_type = VHOST_BACKEND_TYPE_USER,
> > .vhost_call = vhost_user_call,
> > + .vhost_status = vhost_user_status,
> > .vhost_backend_init = vhost_user_init,
> > .vhost_backend_cleanup = vhost_user_cleanup
> > };
> > @@ -464,6 +479,7 @@ static int vhost_kernel_cleanup(struct vhost_dev
> *dev)
> > static const VhostOps kernel_ops = {
> > .backend_type = VHOST_BACKEND_TYPE_KERNEL,
> > .vhost_call = vhost_kernel_call,
> > + .vhost_status = 0,
> > .vhost_backend_init = vhost_kernel_init,
> > .vhost_backend_cleanup = vhost_kernel_cleanup
> > };
> > diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/
> vhost-backend.h
> > index ef87ffa..f2b4a6c 100644
> > --- a/include/hw/virtio/vhost-backend.h
> > +++ b/include/hw/virtio/vhost-backend.h
> > @@ -22,12 +22,14 @@ struct vhost_dev;
> >
> > typedef int (*vhost_call)(struct vhost_dev *dev, unsigned long int
> request,
> > void *arg);
> > +typedef int (*vhost_status)(struct vhost_dev *dev);
> > typedef int (*vhost_backend_init)(struct vhost_dev *dev, const char
> *devpath);
> > typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev);
> >
> > typedef struct VhostOps {
> > VhostBackendType backend_type;
> > vhost_call vhost_call;
> > + vhost_status vhost_status;
> > vhost_backend_init vhost_backend_init;
> > vhost_backend_cleanup vhost_backend_cleanup;
> > } VhostOps;
> > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
> > index abd3d0b..6390907 100644
> > --- a/include/net/vhost_net.h
> > +++ b/include/net/vhost_net.h
> > @@ -31,5 +31,6 @@ void vhost_net_ack_features(VHostNetState *net,
> unsigned features);
> > bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
> > void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
> > int idx, bool mask);
> > +int vhost_net_link_status(VHostNetState *net);
> > VHostNetState *get_vhost_net(NetClientState *nc);
> > #endif
> > diff --git a/net/vhost-user.c b/net/vhost-user.c
> > index 6fd5afc..56f7dd4 100644
> > --- a/net/vhost-user.c
> > +++ b/net/vhost-user.c
> > @@ -12,6 +12,7 @@
> > #include "net/vhost_net.h"
> > #include "net/vhost-user.h"
> > #include "qemu/error-report.h"
> > +#include "qemu/timer.h"
> >
> > typedef struct VhostUserState {
> > NetClientState nc;
> > @@ -19,6 +20,9 @@ typedef struct VhostUserState {
> > char *devpath;
> > } VhostUserState;
> >
> > +static QEMUTimer *vhost_user_timer;
> > +#define VHOST_USER_TIMEOUT (1*1000)
> > +
> > VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
> > {
> > VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > @@ -31,6 +35,11 @@ static int vhost_user_running(VhostUserState *s)
> > return (s->vhost_net) ? 1 : 0;
> > }
> >
> > +static int vhost_user_link_status(VhostUserState *s)
> > +{
> > + return (!s->nc.link_down) && vhost_net_link_status(s->vhost_net);
> > +}
> > +
> > static int vhost_user_start(VhostUserState *s)
> > {
> > VhostNetOptions options;
> > @@ -59,6 +68,48 @@ static void vhost_user_stop(VhostUserState *s)
> > s->vhost_net = 0;
> > }
> >
> > +static void vhost_user_timer_handler(void *opaque)
> > +{
> > + VhostUserState *s = opaque;
> > + int link_down = 0;
> > +
> > + if (vhost_user_running(s)) {
> > + if (!vhost_user_link_status(s)) {
> > + link_down = 1;
> > + }
> > + } else {
> > + vhost_user_start(s);
> > + if (!vhost_user_running(s)) {
> > + link_down = 1;
> > + }
> > + }
> > +
> > + if (link_down != s->nc.link_down) {
> > +
> > + s->nc.link_down = link_down;
> > +
> > + if (s->nc.peer) {
> > + s->nc.peer->link_down = link_down;
> > + }
> > +
> > + if (s->nc.info->link_status_changed) {
> > + s->nc.info->link_status_changed(&s->nc);
> > + }
> > +
> > + if (s->nc.peer && s->nc.peer->info->link_status_changed) {
> > + s->nc.peer->info->link_status_changed(s->nc.peer);
> > + }
> > +
> > + if (link_down) {
> > + vhost_user_stop(s);
> > + }
> > + }
> > +
> > + /* reschedule */
> > + timer_mod(vhost_user_timer,
> > + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
> VHOST_USER_TIMEOUT);
> > +}
> > +
> > static void vhost_user_cleanup(NetClientState *nc)
> > {
> > VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
> > @@ -93,6 +144,11 @@ static int net_vhost_user_init(NetClientState *peer,
> const char *device,
> >
> > r = vhost_user_start(s);
> >
> > + vhost_user_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
> > + vhost_user_timer_handler, s);
> > + timer_mod(vhost_user_timer,
> > + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
> VHOST_USER_TIMEOUT);
> > +
> > return r;
> > }
> >
> > --
> > 1.8.3.2
> >
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends
2014-01-09 14:59 [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Antonios Motakis
` (6 preceding siblings ...)
2014-01-09 15:00 ` [Qemu-devel] [PATCH v5 7/7] Add vhost-user reconnection Antonios Motakis
@ 2014-01-09 16:11 ` Michael S. Tsirkin
2014-01-10 10:58 ` Antonios Motakis
7 siblings, 1 reply; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-09 16:11 UTC (permalink / raw)
To: Antonios Motakis; +Cc: lukego, snabb-devel, n.nikolaev, qemu-devel, tech
On Thu, Jan 09, 2014 at 03:59:54PM +0100, Antonios Motakis wrote:
> In this patch series we would like to introduce our approach for putting a
> virtio-net backend in an external userspace process. Our eventual target is to
> run the network backend in the Snabbswitch ethernet switch, while receiving
> traffic from a guest inside QEMU/KVM which runs an unmodified virtio-net
> implementation.
>
> For this, we are working into extending vhost to allow equivalent functionality
> for userspace. Vhost already passes control of the data plane of virtio-net to
> the host kernel; we want to realize a similar model, but for userspace.
>
> In this patch series the concept of a vhost-backend is introduced.
>
> We define two vhost backend types - vhost-kernel and vhost-user. The former is
> the interface to the current kernel module implementation. Its control plane is
> ioctl based. The data plane is the kernel directly accessing the QEMU allocated,
> guest memory.
>
> In the new vhost-user backend, the control plane is based on communication
> between QEMU and another userspace process using a unix domain socket. This
> allows to implement a virtio backend for a guest running in QEMU, inside the
> other userspace process.
>
> We change -mem-path to QemuOpts and add prealloc, share and unlink as properties
> to it. HugeTLBFS requirements of -mem-path are relaxed, so any valid path can
> be used now. The new properties allow more fine grained control over the guest
> RAM backing store.
>
> The data path is realized by directly accessing the vrings and the buffer data
> off the guest's memory.
>
> The current user of vhost-user is only vhost-net. We add new netdev backend
> that is intended to initialize vhost-net with vhost-user backend.
>
> Example usage:
>
> qemu -m 1024 -mem-path /hugetlbfs,prealloc=on,share=on \
> -netdev type=vhost-user,id=net0,file=/path/to/sock \
Why can't this use standard syntax that qemu_chr_open_socket
supports?
This will add features such as ability for management to open the socket
and pass it to qemu and switch, ability to switch server/client roles,
and others.
> -device virtio-net-pci,netdev=net0
>
> Changes from v4:
> - Use error_report for errors
> - VhostUserMsg has new field 'size' indicating the following payload length.
> Field 'flags' now has version and reply bits. The structure is packed.
> - Send data is of variable length ('size' field in message)
> - Parse received calls in 2 steps, header and payload
> - Add new message type VHOST_USER_ECHO, to check connection status
>
> Changes from v3:
> - Convert -mem-path to QemuOpts with prealloc, share and unlink properties
> - Set 1 sec timeout when read/write to the unix domain socket
> - Fix file descriptor leak
>
> Changes from v2:
> - Reconnect when the backend disappears
>
> Changes from v1:
> - Implementation of vhost-user netdev backend
> - Code improvements
>
> Antonios Motakis (7):
> Convert -mem-path to QemuOpts and add prealloc,share and unlink
> properties
> Decouple vhost from kernel interface
> Add vhost-user skeleton
> Add domain socket communication for vhost-user backend
> Add vhost-user calls implementation
> Add new vhost-user netdev backend
> Add vhost-user reconnection
>
> exec.c | 57 ++++-
> hmp-commands.hx | 4 +-
> hw/net/vhost_net.c | 144 ++++++++---
> hw/net/virtio-net.c | 42 ++--
> hw/scsi/vhost-scsi.c | 13 +-
> hw/virtio/Makefile.objs | 2 +-
> hw/virtio/vhost-backend.c | 504 ++++++++++++++++++++++++++++++++++++++
> hw/virtio/vhost.c | 46 ++--
> include/exec/cpu-all.h | 3 -
> include/hw/virtio/vhost-backend.h | 40 +++
> include/hw/virtio/vhost.h | 4 +-
> include/net/vhost-user.h | 17 ++
> include/net/vhost_net.h | 15 +-
> net/Makefile.objs | 2 +-
> net/clients.h | 3 +
> net/hub.c | 1 +
> net/net.c | 2 +
> net/tap.c | 16 +-
> net/vhost-user.c | 167 +++++++++++++
> qapi-schema.json | 18 +-
> qemu-options.hx | 13 +-
> vl.c | 41 +++-
> 22 files changed, 1030 insertions(+), 124 deletions(-)
> create mode 100644 hw/virtio/vhost-backend.c
> create mode 100644 include/hw/virtio/vhost-backend.h
> create mode 100644 include/net/vhost-user.h
> create mode 100644 net/vhost-user.c
>
> --
> 1.8.3.2
>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends
2014-01-09 16:11 ` [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends Michael S. Tsirkin
@ 2014-01-10 10:58 ` Antonios Motakis
2014-01-10 12:25 ` Michael S. Tsirkin
0 siblings, 1 reply; 24+ messages in thread
From: Antonios Motakis @ 2014-01-10 10:58 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Luke Gorrie, snabb-devel, Nikolay Nikolaev, qemu-devel qemu-devel,
VirtualOpenSystems Technical Team
[-- Attachment #1: Type: text/plain, Size: 5089 bytes --]
On Thu, Jan 9, 2014 at 5:11 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Thu, Jan 09, 2014 at 03:59:54PM +0100, Antonios Motakis wrote:
> > In this patch series we would like to introduce our approach for putting
> a
> > virtio-net backend in an external userspace process. Our eventual target
> is to
> > run the network backend in the Snabbswitch ethernet switch, while
> receiving
> > traffic from a guest inside QEMU/KVM which runs an unmodified virtio-net
> > implementation.
> >
> > For this, we are working into extending vhost to allow equivalent
> functionality
> > for userspace. Vhost already passes control of the data plane of
> virtio-net to
> > the host kernel; we want to realize a similar model, but for userspace.
> >
> > In this patch series the concept of a vhost-backend is introduced.
> >
> > We define two vhost backend types - vhost-kernel and vhost-user. The
> former is
> > the interface to the current kernel module implementation. Its control
> plane is
> > ioctl based. The data plane is the kernel directly accessing the QEMU
> allocated,
> > guest memory.
> >
> > In the new vhost-user backend, the control plane is based on
> communication
> > between QEMU and another userspace process using a unix domain socket.
> This
> > allows to implement a virtio backend for a guest running in QEMU, inside
> the
> > other userspace process.
> >
> > We change -mem-path to QemuOpts and add prealloc, share and unlink as
> properties
> > to it. HugeTLBFS requirements of -mem-path are relaxed, so any valid
> path can
> > be used now. The new properties allow more fine grained control over the
> guest
> > RAM backing store.
> >
> > The data path is realized by directly accessing the vrings and the
> buffer data
> > off the guest's memory.
> >
> > The current user of vhost-user is only vhost-net. We add new netdev
> backend
> > that is intended to initialize vhost-net with vhost-user backend.
> >
> > Example usage:
> >
> > qemu -m 1024 -mem-path /hugetlbfs,prealloc=on,share=on \
> > -netdev type=vhost-user,id=net0,file=/path/to/sock \
>
>
> Why can't this use standard syntax that qemu_chr_open_socket
> supports?
> This will add features such as ability for management to open the socket
> and pass it to qemu and switch, ability to switch server/client roles,
> and others.
>
What you have in mind is just to match the syntax used for the socket
chardev, or are you proposing some code reuse or interaction with it?
>
>
> > -device virtio-net-pci,netdev=net0
> >
> > Changes from v4:
> > - Use error_report for errors
> > - VhostUserMsg has new field 'size' indicating the following payload
> length.
> > Field 'flags' now has version and reply bits. The structure is packed.
> > - Send data is of variable length ('size' field in message)
> > - Parse received calls in 2 steps, header and payload
> > - Add new message type VHOST_USER_ECHO, to check connection status
> >
> > Changes from v3:
> > - Convert -mem-path to QemuOpts with prealloc, share and unlink
> properties
> > - Set 1 sec timeout when read/write to the unix domain socket
> > - Fix file descriptor leak
> >
> > Changes from v2:
> > - Reconnect when the backend disappears
> >
> > Changes from v1:
> > - Implementation of vhost-user netdev backend
> > - Code improvements
> >
> > Antonios Motakis (7):
> > Convert -mem-path to QemuOpts and add prealloc,share and unlink
> > properties
> > Decouple vhost from kernel interface
> > Add vhost-user skeleton
> > Add domain socket communication for vhost-user backend
> > Add vhost-user calls implementation
> > Add new vhost-user netdev backend
> > Add vhost-user reconnection
> >
> > exec.c | 57 ++++-
> > hmp-commands.hx | 4 +-
> > hw/net/vhost_net.c | 144 ++++++++---
> > hw/net/virtio-net.c | 42 ++--
> > hw/scsi/vhost-scsi.c | 13 +-
> > hw/virtio/Makefile.objs | 2 +-
> > hw/virtio/vhost-backend.c | 504
> ++++++++++++++++++++++++++++++++++++++
> > hw/virtio/vhost.c | 46 ++--
> > include/exec/cpu-all.h | 3 -
> > include/hw/virtio/vhost-backend.h | 40 +++
> > include/hw/virtio/vhost.h | 4 +-
> > include/net/vhost-user.h | 17 ++
> > include/net/vhost_net.h | 15 +-
> > net/Makefile.objs | 2 +-
> > net/clients.h | 3 +
> > net/hub.c | 1 +
> > net/net.c | 2 +
> > net/tap.c | 16 +-
> > net/vhost-user.c | 167 +++++++++++++
> > qapi-schema.json | 18 +-
> > qemu-options.hx | 13 +-
> > vl.c | 41 +++-
> > 22 files changed, 1030 insertions(+), 124 deletions(-)
> > create mode 100644 hw/virtio/vhost-backend.c
> > create mode 100644 include/hw/virtio/vhost-backend.h
> > create mode 100644 include/net/vhost-user.h
> > create mode 100644 net/vhost-user.c
> >
> > --
> > 1.8.3.2
> >
>
[-- Attachment #2: Type: text/html, Size: 6336 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Qemu-devel] [PATCH v5 0/7] Vhost and vhost-net support for userspace based backends
2014-01-10 10:58 ` Antonios Motakis
@ 2014-01-10 12:25 ` Michael S. Tsirkin
0 siblings, 0 replies; 24+ messages in thread
From: Michael S. Tsirkin @ 2014-01-10 12:25 UTC (permalink / raw)
To: Antonios Motakis
Cc: Luke Gorrie, snabb-devel, Nikolay Nikolaev, qemu-devel qemu-devel,
VirtualOpenSystems Technical Team
On Fri, Jan 10, 2014 at 11:58:23AM +0100, Antonios Motakis wrote:
>
>
>
> On Thu, Jan 9, 2014 at 5:11 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Jan 09, 2014 at 03:59:54PM +0100, Antonios Motakis wrote:
> > In this patch series we would like to introduce our approach for putting
> a
> > virtio-net backend in an external userspace process. Our eventual target
> is to
> > run the network backend in the Snabbswitch ethernet switch, while
> receiving
> > traffic from a guest inside QEMU/KVM which runs an unmodified virtio-net
> > implementation.
> >
> > For this, we are working into extending vhost to allow equivalent
> functionality
> > for userspace. Vhost already passes control of the data plane of
> virtio-net to
> > the host kernel; we want to realize a similar model, but for userspace.
> >
> > In this patch series the concept of a vhost-backend is introduced.
> >
> > We define two vhost backend types - vhost-kernel and vhost-user. The
> former is
> > the interface to the current kernel module implementation. Its control
> plane is
> > ioctl based. The data plane is the kernel directly accessing the QEMU
> allocated,
> > guest memory.
> >
> > In the new vhost-user backend, the control plane is based on
> communication
> > between QEMU and another userspace process using a unix domain socket.
> This
> > allows to implement a virtio backend for a guest running in QEMU, inside
> the
> > other userspace process.
> >
> > We change -mem-path to QemuOpts and add prealloc, share and unlink as
> properties
> > to it. HugeTLBFS requirements of -mem-path are relaxed, so any valid path
> can
> > be used now. The new properties allow more fine grained control over the
> guest
> > RAM backing store.
> >
> > The data path is realized by directly accessing the vrings and the buffer
> data
> > off the guest's memory.
> >
> > The current user of vhost-user is only vhost-net. We add new netdev
> backend
> > that is intended to initialize vhost-net with vhost-user backend.
> >
> > Example usage:
> >
> > qemu -m 1024 -mem-path /hugetlbfs,prealloc=on,share=on \
> > -netdev type=vhost-user,id=net0,file=/path/to/sock \
>
>
> Why can't this use standard syntax that qemu_chr_open_socket
> supports?
> This will add features such as ability for management to open the socket
> and pass it to qemu and switch, ability to switch server/client roles,
> and others.
>
>
> What you have in mind is just to match the syntax used for the socket chardev,
> or are you proposing some code reuse or interaction with it?
>
Matching syntax is most important IMHO.
But I'd say reusing code is a good idea too.
>
>
> > -device virtio-net-pci,netdev=net0
> >
> > Changes from v4:
> > - Use error_report for errors
> > - VhostUserMsg has new field 'size' indicating the following payload
> length.
> > Field 'flags' now has version and reply bits. The structure is packed.
> > - Send data is of variable length ('size' field in message)
> > - Parse received calls in 2 steps, header and payload
> > - Add new message type VHOST_USER_ECHO, to check connection status
> >
> > Changes from v3:
> > - Convert -mem-path to QemuOpts with prealloc, share and unlink
> properties
> > - Set 1 sec timeout when read/write to the unix domain socket
> > - Fix file descriptor leak
> >
> > Changes from v2:
> > - Reconnect when the backend disappears
> >
> > Changes from v1:
> > - Implementation of vhost-user netdev backend
> > - Code improvements
> >
> > Antonios Motakis (7):
> > Convert -mem-path to QemuOpts and add prealloc,share and unlink
> > properties
> > Decouple vhost from kernel interface
> > Add vhost-user skeleton
> > Add domain socket communication for vhost-user backend
> > Add vhost-user calls implementation
> > Add new vhost-user netdev backend
> > Add vhost-user reconnection
> >
> > exec.c | 57 ++++-
> > hmp-commands.hx | 4 +-
> > hw/net/vhost_net.c | 144 ++++++++---
> > hw/net/virtio-net.c | 42 ++--
> > hw/scsi/vhost-scsi.c | 13 +-
> > hw/virtio/Makefile.objs | 2 +-
> > hw/virtio/vhost-backend.c | 504
> ++++++++++++++++++++++++++++++++++++++
> > hw/virtio/vhost.c | 46 ++--
> > include/exec/cpu-all.h | 3 -
> > include/hw/virtio/vhost-backend.h | 40 +++
> > include/hw/virtio/vhost.h | 4 +-
> > include/net/vhost-user.h | 17 ++
> > include/net/vhost_net.h | 15 +-
> > net/Makefile.objs | 2 +-
> > net/clients.h | 3 +
> > net/hub.c | 1 +
> > net/net.c | 2 +
> > net/tap.c | 16 +-
> > net/vhost-user.c | 167 +++++++++++++
> > qapi-schema.json | 18 +-
> > qemu-options.hx | 13 +-
> > vl.c | 41 +++-
> > 22 files changed, 1030 insertions(+), 124 deletions(-)
> > create mode 100644 hw/virtio/vhost-backend.c
> > create mode 100644 include/hw/virtio/vhost-backend.h
> > create mode 100644 include/net/vhost-user.h
> > create mode 100644 net/vhost-user.c
> >
> > --
> > 1.8.3.2
> >
>
>
^ permalink raw reply [flat|nested] 24+ messages in thread