[Qemu-devel] [PATCH 01/15] exec: memory notifiers

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 01/15] exec: memory notifiers
       [not found] <cover.1265287265.git.mst@redhat.com>
@ 2010-02-04 12:41 ` Michael S. Tsirkin
  2010-02-04 12:42 ` [Qemu-devel] [PATCH 02/15] kvm: move kvm_set_phys_mem around Michael S. Tsirkin
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:41 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

This adds notifiers for phys memory changes: a set of callbacks that
vhost can register and update kernel accordingly.  Down the road, kvm
code can be switched to use these as well, instead of calling kvm code
directly from exec.c as is done now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 cpu-common.h |   19 ++++++++++
 exec.c       |  113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 129 insertions(+), 3 deletions(-)

diff --git a/cpu-common.h b/cpu-common.h
index 6302372..0ec9b72 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -8,6 +8,7 @@
 #endif
 
 #include "bswap.h"
+#include "qemu-queue.h"
 
 /* address in the RAM (different from a physical address) */
 typedef unsigned long ram_addr_t;
@@ -61,6 +62,24 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
 void cpu_unregister_map_client(void *cookie);
 
+struct CPUPhysMemoryClient;
+typedef struct CPUPhysMemoryClient CPUPhysMemoryClient;
+struct CPUPhysMemoryClient {
+    void (*set_memory)(struct CPUPhysMemoryClient *client,
+                       target_phys_addr_t start_addr,
+                       ram_addr_t size,
+                       ram_addr_t phys_offset);
+    int (*sync_dirty_bitmap)(struct CPUPhysMemoryClient *client,
+                             target_phys_addr_t start_addr,
+                             target_phys_addr_t end_addr);
+    int (*migration_log)(struct CPUPhysMemoryClient *client,
+                         int enable);
+    QLIST_ENTRY(CPUPhysMemoryClient) list;
+};
+
+void cpu_register_phys_memory_client(CPUPhysMemoryClient *);
+void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *);
+
 uint32_t ldub_phys(target_phys_addr_t addr);
 uint32_t lduw_phys(target_phys_addr_t addr);
 uint32_t ldl_phys(target_phys_addr_t addr);
diff --git a/exec.c b/exec.c
index 76831a1..d713b72 100644
--- a/exec.c
+++ b/exec.c
@@ -1623,6 +1623,101 @@ const CPULogItem cpu_log_items[] = {
     { 0, NULL, NULL },
 };
 
+#ifndef CONFIG_USER_ONLY
+static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
+    = QLIST_HEAD_INITIALIZER(memory_client_list);
+
+static void cpu_notify_set_memory(target_phys_addr_t start_addr,
+				  ram_addr_t size,
+				  ram_addr_t phys_offset)
+{
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        client->set_memory(client, start_addr, size, phys_offset);
+    }
+}
+
+static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
+					target_phys_addr_t end)
+{
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        int r = client->sync_dirty_bitmap(client, start, end);
+        if (r < 0)
+            return r;
+    }
+    return 0;
+}
+
+static int cpu_notify_migration_log(int enable)
+{
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        int r = client->migration_log(client, enable);
+        if (r < 0)
+            return r;
+    }
+    return 0;
+}
+
+static void phys_page_for_each_in_l1_map(PhysPageDesc **phys_map,
+                                         CPUPhysMemoryClient *client)
+{
+    PhysPageDesc *pd;
+    int l1, l2;
+
+    for (l1 = 0; l1 < L1_SIZE; ++l1) {
+        pd = phys_map[l1];
+        if (!pd) {
+            continue;
+        }
+        for (l2 = 0; l2 < L2_SIZE; ++l2) {
+            if (pd[l2].phys_offset == IO_MEM_UNASSIGNED) {
+                continue;
+            }
+            client->set_memory(client, pd[l2].region_offset,
+                               TARGET_PAGE_SIZE, pd[l2].phys_offset);
+        }
+    }
+}
+
+static void phys_page_for_each(CPUPhysMemoryClient *client)
+{
+#if TARGET_PHYS_ADDR_SPACE_BITS > 32
+
+#if TARGET_PHYS_ADDR_SPACE_BITS > (32 + L1_BITS)
+#error unsupported TARGET_PHYS_ADDR_SPACE_BITS
+#endif
+    void **phys_map = (void **)l1_phys_map;
+    int l1;
+    if (!l1_phys_map) {
+        return;
+    }
+    for (l1 = 0; l1 < L1_SIZE; ++l1) {
+        if (phys_map[l1]) {
+            phys_page_for_each_in_l1_map(phys_map[l1], client);
+        }
+    }
+#else
+    if (!l1_phys_map) {
+        return;
+    }
+    phys_page_for_each_in_l1_map(l1_phys_map, client);
+#endif
+}
+
+void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
+{
+    QLIST_INSERT_HEAD(&memory_client_list, client, list);
+    phys_page_for_each(client);
+}
+
+void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
+{
+    QLIST_REMOVE(client, list);
+}
+#endif
+
 static int cmp1(const char *s1, int n, const char *s2)
 {
     if (strlen(s2) != n)
@@ -1882,11 +1977,16 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
 
 int cpu_physical_memory_set_dirty_tracking(int enable)
 {
+    int ret = 0;
     in_migration = enable;
     if (kvm_enabled()) {
-        return kvm_set_migration_log(enable);
+        ret = kvm_set_migration_log(enable);
     }
-    return 0;
+    if (ret < 0) {
+        return ret;
+    }
+    ret = cpu_notify_migration_log(!!enable);
+    return ret;
 }
 
 int cpu_physical_memory_get_dirty_tracking(void)
@@ -1899,8 +1999,13 @@ int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
 {
     int ret = 0;
 
-    if (kvm_enabled())
+    if (kvm_enabled()) {
         ret = kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
+    }
+    if (ret < 0) {
+        return ret;
+    }
+    ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
     return ret;
 }
 
@@ -2315,6 +2420,8 @@ void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
     if (kvm_enabled())
         kvm_set_phys_mem(start_addr, size, phys_offset);
 
+    cpu_notify_set_memory(start_addr, size, phys_offset);
+
     if (phys_offset == IO_MEM_UNASSIGNED) {
         region_offset = start_addr;
     }
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 02/15] kvm: move kvm_set_phys_mem around
       [not found] <cover.1265287265.git.mst@redhat.com>
  2010-02-04 12:41 ` [Qemu-devel] [PATCH 01/15] exec: memory notifiers Michael S. Tsirkin
@ 2010-02-04 12:42 ` Michael S. Tsirkin
  2010-02-04 12:42 ` [Qemu-devel] [PATCH 03/15] kvm: move kvm to use memory notifiers Michael S. Tsirkin
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:42 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

move kvm_set_phys_mem so that it will
be later available earlier in the file.
needed for next patch using memory notifiers.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Avi Kivity <avi@redhat.com>
---
 kvm-all.c |  276 ++++++++++++++++++++++++++++++------------------------------
 1 files changed, 138 insertions(+), 138 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 15ec38e..4efb653 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -394,6 +394,144 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+void kvm_set_phys_mem(target_phys_addr_t start_addr,
+		       ram_addr_t size,
+		       ram_addr_t phys_offset)
+{
+    KVMState *s = kvm_state;
+    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
+    KVMSlot *mem, old;
+    int err;
+
+    if (start_addr & ~TARGET_PAGE_MASK) {
+        if (flags >= IO_MEM_UNASSIGNED) {
+            if (!kvm_lookup_overlapping_slot(s, start_addr,
+                                             start_addr + size)) {
+                return;
+            }
+            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
+        } else {
+            fprintf(stderr, "Only page-aligned memory slots supported\n");
+        }
+        abort();
+    }
+
+    /* KVM does not support read-only slots */
+    phys_offset &= ~IO_MEM_ROM;
+
+    while (1) {
+        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+        if (!mem) {
+            break;
+        }
+
+        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
+            (start_addr + size <= mem->start_addr + mem->memory_size) &&
+            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
+            /* The new slot fits into the existing one and comes with
+             * identical parameters - nothing to be done. */
+            return;
+        }
+
+        old = *mem;
+
+        /* unregister the overlapping slot */
+        mem->memory_size = 0;
+        err = kvm_set_user_memory_region(s, mem);
+        if (err) {
+            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
+                    __func__, strerror(-err));
+            abort();
+        }
+
+        /* Workaround for older KVM versions: we can't join slots, even not by
+         * unregistering the previous ones and then registering the larger
+         * slot. We have to maintain the existing fragmentation. Sigh.
+         *
+         * This workaround assumes that the new slot starts at the same
+         * address as the first existing one. If not or if some overlapping
+         * slot comes around later, we will fail (not seen in practice so far)
+         * - and actually require a recent KVM version. */
+        if (s->broken_set_mem_region &&
+            old.start_addr == start_addr && old.memory_size < size &&
+            flags < IO_MEM_UNASSIGNED) {
+            mem = kvm_alloc_slot(s);
+            mem->memory_size = old.memory_size;
+            mem->start_addr = old.start_addr;
+            mem->phys_offset = old.phys_offset;
+            mem->flags = 0;
+
+            err = kvm_set_user_memory_region(s, mem);
+            if (err) {
+                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
+                        strerror(-err));
+                abort();
+            }
+
+            start_addr += old.memory_size;
+            phys_offset += old.memory_size;
+            size -= old.memory_size;
+            continue;
+        }
+
+        /* register prefix slot */
+        if (old.start_addr < start_addr) {
+            mem = kvm_alloc_slot(s);
+            mem->memory_size = start_addr - old.start_addr;
+            mem->start_addr = old.start_addr;
+            mem->phys_offset = old.phys_offset;
+            mem->flags = 0;
+
+            err = kvm_set_user_memory_region(s, mem);
+            if (err) {
+                fprintf(stderr, "%s: error registering prefix slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
+        }
+
+        /* register suffix slot */
+        if (old.start_addr + old.memory_size > start_addr + size) {
+            ram_addr_t size_delta;
+
+            mem = kvm_alloc_slot(s);
+            mem->start_addr = start_addr + size;
+            size_delta = mem->start_addr - old.start_addr;
+            mem->memory_size = old.memory_size - size_delta;
+            mem->phys_offset = old.phys_offset + size_delta;
+            mem->flags = 0;
+
+            err = kvm_set_user_memory_region(s, mem);
+            if (err) {
+                fprintf(stderr, "%s: error registering suffix slot: %s\n",
+                        __func__, strerror(-err));
+                abort();
+            }
+        }
+    }
+
+    /* in case the KVM bug workaround already "consumed" the new slot */
+    if (!size)
+        return;
+
+    /* KVM does not need to know about this memory */
+    if (flags >= IO_MEM_UNASSIGNED)
+        return;
+
+    mem = kvm_alloc_slot(s);
+    mem->memory_size = size;
+    mem->start_addr = start_addr;
+    mem->phys_offset = phys_offset;
+    mem->flags = 0;
+
+    err = kvm_set_user_memory_region(s, mem);
+    if (err) {
+        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
+                strerror(-err));
+        abort();
+    }
+}
+
 int kvm_init(int smp_cpus)
 {
     static const char upgrade_note[] =
@@ -674,144 +812,6 @@ int kvm_cpu_exec(CPUState *env)
     return ret;
 }
 
-void kvm_set_phys_mem(target_phys_addr_t start_addr,
-                      ram_addr_t size,
-                      ram_addr_t phys_offset)
-{
-    KVMState *s = kvm_state;
-    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
-    KVMSlot *mem, old;
-    int err;
-
-    if (start_addr & ~TARGET_PAGE_MASK) {
-        if (flags >= IO_MEM_UNASSIGNED) {
-            if (!kvm_lookup_overlapping_slot(s, start_addr,
-                                             start_addr + size)) {
-                return;
-            }
-            fprintf(stderr, "Unaligned split of a KVM memory slot\n");
-        } else {
-            fprintf(stderr, "Only page-aligned memory slots supported\n");
-        }
-        abort();
-    }
-
-    /* KVM does not support read-only slots */
-    phys_offset &= ~IO_MEM_ROM;
-
-    while (1) {
-        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
-        if (!mem) {
-            break;
-        }
-
-        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
-            (start_addr + size <= mem->start_addr + mem->memory_size) &&
-            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
-            /* The new slot fits into the existing one and comes with
-             * identical parameters - nothing to be done. */
-            return;
-        }
-
-        old = *mem;
-
-        /* unregister the overlapping slot */
-        mem->memory_size = 0;
-        err = kvm_set_user_memory_region(s, mem);
-        if (err) {
-            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
-                    __func__, strerror(-err));
-            abort();
-        }
-
-        /* Workaround for older KVM versions: we can't join slots, even not by
-         * unregistering the previous ones and then registering the larger
-         * slot. We have to maintain the existing fragmentation. Sigh.
-         *
-         * This workaround assumes that the new slot starts at the same
-         * address as the first existing one. If not or if some overlapping
-         * slot comes around later, we will fail (not seen in practice so far)
-         * - and actually require a recent KVM version. */
-        if (s->broken_set_mem_region &&
-            old.start_addr == start_addr && old.memory_size < size &&
-            flags < IO_MEM_UNASSIGNED) {
-            mem = kvm_alloc_slot(s);
-            mem->memory_size = old.memory_size;
-            mem->start_addr = old.start_addr;
-            mem->phys_offset = old.phys_offset;
-            mem->flags = 0;
-
-            err = kvm_set_user_memory_region(s, mem);
-            if (err) {
-                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
-                        strerror(-err));
-                abort();
-            }
-
-            start_addr += old.memory_size;
-            phys_offset += old.memory_size;
-            size -= old.memory_size;
-            continue;
-        }
-
-        /* register prefix slot */
-        if (old.start_addr < start_addr) {
-            mem = kvm_alloc_slot(s);
-            mem->memory_size = start_addr - old.start_addr;
-            mem->start_addr = old.start_addr;
-            mem->phys_offset = old.phys_offset;
-            mem->flags = 0;
-
-            err = kvm_set_user_memory_region(s, mem);
-            if (err) {
-                fprintf(stderr, "%s: error registering prefix slot: %s\n",
-                        __func__, strerror(-err));
-                abort();
-            }
-        }
-
-        /* register suffix slot */
-        if (old.start_addr + old.memory_size > start_addr + size) {
-            ram_addr_t size_delta;
-
-            mem = kvm_alloc_slot(s);
-            mem->start_addr = start_addr + size;
-            size_delta = mem->start_addr - old.start_addr;
-            mem->memory_size = old.memory_size - size_delta;
-            mem->phys_offset = old.phys_offset + size_delta;
-            mem->flags = 0;
-
-            err = kvm_set_user_memory_region(s, mem);
-            if (err) {
-                fprintf(stderr, "%s: error registering suffix slot: %s\n",
-                        __func__, strerror(-err));
-                abort();
-            }
-        }
-    }
-
-    /* in case the KVM bug workaround already "consumed" the new slot */
-    if (!size)
-        return;
-
-    /* KVM does not need to know about this memory */
-    if (flags >= IO_MEM_UNASSIGNED)
-        return;
-
-    mem = kvm_alloc_slot(s);
-    mem->memory_size = size;
-    mem->start_addr = start_addr;
-    mem->phys_offset = phys_offset;
-    mem->flags = 0;
-
-    err = kvm_set_user_memory_region(s, mem);
-    if (err) {
-        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
-                strerror(-err));
-        abort();
-    }
-}
-
 int kvm_ioctl(KVMState *s, int type, ...)
 {
     int ret;
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 03/15] kvm: move kvm to use memory notifiers
       [not found] <cover.1265287265.git.mst@redhat.com>
  2010-02-04 12:41 ` [Qemu-devel] [PATCH 01/15] exec: memory notifiers Michael S. Tsirkin
  2010-02-04 12:42 ` [Qemu-devel] [PATCH 02/15] kvm: move kvm_set_phys_mem around Michael S. Tsirkin
@ 2010-02-04 12:42 ` Michael S. Tsirkin
  2010-02-04 12:44 ` [Qemu-devel] [PATCH 04/15] kvm: add API to set ioeventfd Michael S. Tsirkin
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:42 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

remove direct kvm calls from exec.c, make
kvm use memory notifiers framework instead.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Avi Kivity <avi@redhat.com>
---
 exec.c    |   17 +----------------
 kvm-all.c |   40 ++++++++++++++++++++++++++++++++++------
 kvm.h     |    8 --------
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/exec.c b/exec.c
index d713b72..2e7434e 100644
--- a/exec.c
+++ b/exec.c
@@ -1979,12 +1979,6 @@ int cpu_physical_memory_set_dirty_tracking(int enable)
 {
     int ret = 0;
     in_migration = enable;
-    if (kvm_enabled()) {
-        ret = kvm_set_migration_log(enable);
-    }
-    if (ret < 0) {
-        return ret;
-    }
     ret = cpu_notify_migration_log(!!enable);
     return ret;
 }
@@ -1997,14 +1991,8 @@ int cpu_physical_memory_get_dirty_tracking(void)
 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
                                    target_phys_addr_t end_addr)
 {
-    int ret = 0;
+    int ret;
 
-    if (kvm_enabled()) {
-        ret = kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
-    }
-    if (ret < 0) {
-        return ret;
-    }
     ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
     return ret;
 }
@@ -2417,9 +2405,6 @@ void cpu_register_physical_memory_offset(target_phys_addr_t start_addr,
     ram_addr_t orig_size = size;
     void *subpage;
 
-    if (kvm_enabled())
-        kvm_set_phys_mem(start_addr, size, phys_offset);
-
     cpu_notify_set_memory(start_addr, size, phys_offset);
 
     if (phys_offset == IO_MEM_UNASSIGNED) {
diff --git a/kvm-all.c b/kvm-all.c
index 4efb653..a312654 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -257,7 +257,7 @@ int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
                                           KVM_MEM_LOG_DIRTY_PAGES);
 }
 
-int kvm_set_migration_log(int enable)
+static int kvm_set_migration_log(int enable)
 {
     KVMState *s = kvm_state;
     KVMSlot *mem;
@@ -292,8 +292,8 @@ static int test_le_bit(unsigned long nr, unsigned char *addr)
  * @start_add: start of logged region.
  * @end_addr: end of logged region.
  */
-int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
-                                   target_phys_addr_t end_addr)
+static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
+					  target_phys_addr_t end_addr)
 {
     KVMState *s = kvm_state;
     unsigned long size, allocated_size = 0;
@@ -394,9 +394,9 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
-void kvm_set_phys_mem(target_phys_addr_t start_addr,
-		       ram_addr_t size,
-		       ram_addr_t phys_offset)
+static void kvm_set_phys_mem(target_phys_addr_t start_addr,
+			     ram_addr_t size,
+			     ram_addr_t phys_offset)
 {
     KVMState *s = kvm_state;
     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
@@ -532,6 +532,33 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr,
     }
 }
 
+static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
+				  target_phys_addr_t start_addr,
+				  ram_addr_t size,
+				  ram_addr_t phys_offset)
+{
+	kvm_set_phys_mem(start_addr, size, phys_offset);
+}
+
+static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
+					target_phys_addr_t start_addr,
+					target_phys_addr_t end_addr)
+{
+	return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
+}
+
+static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
+				    int enable)
+{
+	return kvm_set_migration_log(enable);
+}
+
+static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
+	.set_memory = kvm_client_set_memory,
+	.sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
+	.migration_log = kvm_client_migration_log,
+};
+
 int kvm_init(int smp_cpus)
 {
     static const char upgrade_note[] =
@@ -628,6 +655,7 @@ int kvm_init(int smp_cpus)
         goto err;
 
     kvm_state = s;
+    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
 
     return 0;
 
diff --git a/kvm.h b/kvm.h
index 1c93ac5..672d511 100644
--- a/kvm.h
+++ b/kvm.h
@@ -35,16 +35,8 @@ int kvm_init_vcpu(CPUState *env);
 
 int kvm_cpu_exec(CPUState *env);
 
-void kvm_set_phys_mem(target_phys_addr_t start_addr,
-                      ram_addr_t size,
-                      ram_addr_t phys_offset);
-
-int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
-                                   target_phys_addr_t end_addr);
-
 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size);
 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size);
-int kvm_set_migration_log(int enable);
 
 int kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 04/15] kvm: add API to set ioeventfd
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (2 preceding siblings ...)
  2010-02-04 12:42 ` [Qemu-devel] [PATCH 03/15] kvm: move kvm to use memory notifiers Michael S. Tsirkin
@ 2010-02-04 12:44 ` Michael S. Tsirkin
  2010-02-04 12:44 ` [Qemu-devel] [PATCH 05/15] notifier: event notifier implementation Michael S. Tsirkin
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:44 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

This adds API to set ioeventfd to kvm,
as well as stubs for non-eventfd case,
making it possible for users to use this API
without ifdefs.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 kvm-all.c |   20 ++++++++++++++++++++
 kvm.h     |   16 ++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index a312654..beaba6d 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1113,3 +1113,23 @@ void kvm_remove_all_breakpoints(CPUState *current_env)
 {
 }
 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
+
+#ifdef KVM_IOEVENTFD
+int kvm_set_ioeventfd(uint16_t addr, uint16_t data, int fd, bool assigned)
+{
+    struct kvm_ioeventfd kick = {
+        .datamatch = data,
+        .addr = addr,
+        .len = 2,
+        .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
+        .fd = fd,
+    };
+    int r;
+    if (!assigned)
+        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
+    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
+    if (r < 0)
+        return r;
+    return 0;
+}
+#endif
diff --git a/kvm.h b/kvm.h
index 672d511..a8a86e7 100644
--- a/kvm.h
+++ b/kvm.h
@@ -14,10 +14,16 @@
 #ifndef QEMU_KVM_H
 #define QEMU_KVM_H
 
+#include <stdbool.h>
+#include <errno.h>
 #include "config.h"
 #include "qemu-queue.h"
 
 #ifdef CONFIG_KVM
+#include <linux/kvm.h>
+#endif
+
+#ifdef CONFIG_KVM
 extern int kvm_allowed;
 
 #define kvm_enabled() (kvm_allowed)
@@ -131,4 +137,14 @@ static inline void cpu_synchronize_state(CPUState *env)
     }
 }
 
+#if defined(KVM_IOEVENTFD) && defined(CONFIG_KVM)
+int kvm_set_ioeventfd(uint16_t addr, uint16_t data, int fd, bool assigned);
+#else
+static inline
+int kvm_set_ioeventfd(uint16_t data, uint16_t addr, int fd, bool assigned)
+{
+    return -ENOSYS;
+}
+#endif
+
 #endif
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 05/15] notifier: event notifier implementation
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (3 preceding siblings ...)
  2010-02-04 12:44 ` [Qemu-devel] [PATCH 04/15] kvm: add API to set ioeventfd Michael S. Tsirkin
@ 2010-02-04 12:44 ` Michael S. Tsirkin
  2010-02-04 12:45 ` [Qemu-devel] [PATCH 06/15] virtio: add notifier support Michael S. Tsirkin
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:44 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

event notifiers are slightly generalized eventfd descriptors. Current
implementation depends on eventfd because vhost is the only user, and
vhost depends on eventfd anyway, but a stub is provided for non-eventfd
case.

We'll be able to further generalize this when another user comes along
and we see how to best do this.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    1 +
 hw/notifier.c   |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/notifier.h   |   16 ++++++++++++++++
 qemu-common.h   |    1 +
 4 files changed, 68 insertions(+), 0 deletions(-)
 create mode 100644 hw/notifier.c
 create mode 100644 hw/notifier.h

diff --git a/Makefile.target b/Makefile.target
index 5c0ef1f..31bde66 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -173,6 +173,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o machine.o gdbstub.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o virtio-serial-bus.o
+obj-y += notifier.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
 LIBS+=-lz
diff --git a/hw/notifier.c b/hw/notifier.c
new file mode 100644
index 0000000..dff38de
--- /dev/null
+++ b/hw/notifier.c
@@ -0,0 +1,50 @@
+#include "hw.h"
+#include "notifier.h"
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+#ifdef CONFIG_EVENTFD
+	int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
+	if (fd < 0)
+		return -errno;
+	e->fd = fd;
+	return 0;
+#else
+	return -ENOSYS;
+#endif
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+	close(e->fd);
+}
+
+int event_notifier_get_fd(EventNotifier *e)
+{
+	return e->fd;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+	uint64_t value;
+	int r = read(e->fd, &value, sizeof value);
+	return r == sizeof value;
+}
+
+int event_notifier_test(EventNotifier *e)
+{
+	uint64_t value;
+	int r = read(e->fd, &value, sizeof value);
+	if (r == sizeof value) {
+		/* restore previous value. */
+		int s = write(e->fd, &value, sizeof value);
+		/* never blocks because we use EFD_SEMAPHORE.
+		 * If we didn't we'd get EAGAIN on overflow
+		 * and we'd have to write code to ignore it. */
+		assert(s == sizeof value);
+	}
+	return r == sizeof value;
+}
diff --git a/hw/notifier.h b/hw/notifier.h
new file mode 100644
index 0000000..24117ea
--- /dev/null
+++ b/hw/notifier.h
@@ -0,0 +1,16 @@
+#ifndef QEMU_EVENT_NOTIFIER_H
+#define QEMU_EVENT_NOTIFIER_H
+
+#include "qemu-common.h"
+
+struct EventNotifier {
+	int fd;
+};
+
+int event_notifier_init(EventNotifier *, int active);
+void event_notifier_cleanup(EventNotifier *);
+int event_notifier_get_fd(EventNotifier *);
+int event_notifier_test_and_clear(EventNotifier *);
+int event_notifier_test(EventNotifier *);
+
+#endif
diff --git a/qemu-common.h b/qemu-common.h
index b09f717..423e962 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -224,6 +224,7 @@ typedef struct uWireSlave uWireSlave;
 typedef struct I2SCodec I2SCodec;
 typedef struct DeviceState DeviceState;
 typedef struct SSIBus SSIBus;
+typedef struct EventNotifier EventNotifier;
 
 /* CPU save/load.  */
 void cpu_save(QEMUFile *f, void *opaque);
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 06/15] virtio: add notifier support
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (4 preceding siblings ...)
  2010-02-04 12:44 ` [Qemu-devel] [PATCH 05/15] notifier: event notifier implementation Michael S. Tsirkin
@ 2010-02-04 12:45 ` Michael S. Tsirkin
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 07/15] virtio: add APIs for queue fields Michael S. Tsirkin
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:45 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

Add binding API to set host/guest notifiers.
Will be used by vhost.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio.c |   13 ++++++++++---
 hw/virtio.h |    5 ++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index 7c020a3..b9411e9 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -592,6 +592,12 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
     return &vdev->vq[i];
 }
 
+void virtio_irq(VirtIODevice *vdev, VirtQueue *vq)
+{
+    vdev->isr |= 0x01;
+    virtio_notify_vector(vdev, vq->vector);
+}
+
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     /* Always notify when queue is empty (when feature acknowledge) */
@@ -600,8 +606,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
          (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
         return;
 
-    vdev->isr |= 0x01;
-    virtio_notify_vector(vdev, vq->vector);
+    virtio_irq(vdev, vq);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -714,8 +719,10 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->queue_sel = 0;
     vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
-    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++)
+    for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+        vdev->vq[i].vdev = vdev;
+    }
 
     vdev->name = name;
     vdev->config_len = config_size;
diff --git a/hw/virtio.h b/hw/virtio.h
index 62e882b..2c298a8 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -18,6 +18,7 @@
 #include "net.h"
 #include "qdev.h"
 #include "sysemu.h"
+#include "notifier.h"
 
 /* from Linux's linux/virtio_config.h */
 
@@ -88,6 +89,8 @@ typedef struct {
     int (*load_config)(void * opaque, QEMUFile *f);
     int (*load_queue)(void * opaque, int n, QEMUFile *f);
     unsigned (*get_features)(void * opaque);
+    int (*guest_notifier)(void * opaque, int n, bool assigned);
+    int (*host_notifier)(void * opaque, int n, bool assigned);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 64
@@ -180,5 +183,5 @@ void virtio_net_exit(VirtIODevice *vdev);
 	DEFINE_PROP_BIT("indirect_desc", _state, _field, \
 			VIRTIO_RING_F_INDIRECT_DESC, true)
 
-
+void virtio_irq(VirtIODevice *vdev, VirtQueue *vq);
 #endif
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 07/15] virtio: add APIs for queue fields
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (5 preceding siblings ...)
  2010-02-04 12:45 ` [Qemu-devel] [PATCH 06/15] virtio: add notifier support Michael S. Tsirkin
@ 2010-02-04 12:46 ` Michael S. Tsirkin
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 08/15] virtio: add status change callback Michael S. Tsirkin
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:46 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

vhost needs physical addresses for ring and other queue fields,
so add APIs for these.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++----
 hw/virtio.h |   10 +++++++++-
 2 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/hw/virtio.c b/hw/virtio.c
index b9411e9..65e59c1 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -73,6 +73,9 @@ struct VirtQueue
     int inuse;
     uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+    VirtIODevice *vdev;
+    EventNotifier guest_notifier;
+    EventNotifier host_notifier;
 };
 
 /* virt queue functions */
@@ -592,10 +595,10 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
     return &vdev->vq[i];
 }
 
-void virtio_irq(VirtIODevice *vdev, VirtQueue *vq)
+void virtio_irq(VirtQueue *vq)
 {
-    vdev->isr |= 0x01;
-    virtio_notify_vector(vdev, vq->vector);
+    vq->vdev->isr |= 0x01;
+    virtio_notify_vector(vq->vdev, vq->vector);
 }
 
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
@@ -606,7 +609,8 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
          (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
         return;
 
-    virtio_irq(vdev, vq);
+    vdev->isr |= 0x01;
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -740,3 +744,42 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding,
     vdev->binding = binding;
     vdev->binding_opaque = opaque;
 }
+
+target_phys_addr_t virtio_queue_get_desc(VirtIODevice *vdev, int n)
+{
+	return vdev->vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_avail(VirtIODevice *vdev, int n)
+{
+	return vdev->vq[n].vring.avail;
+}
+
+target_phys_addr_t virtio_queue_get_used(VirtIODevice *vdev, int n)
+{
+	return vdev->vq[n].vring.used;
+}
+
+uint16_t virtio_queue_last_avail_idx(VirtIODevice *vdev, int n)
+{
+	return vdev->vq[n].last_avail_idx;
+}
+
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+{
+	vdev->vq[n].last_avail_idx = idx;
+}
+
+VirtQueue *virtio_queue(VirtIODevice *vdev, int n)
+{
+	return vdev->vq + n;
+}
+
+EventNotifier *virtio_queue_guest_notifier(VirtQueue *vq)
+{
+	return &vq->guest_notifier;
+}
+EventNotifier *virtio_queue_host_notifier(VirtQueue *vq)
+{
+	return &vq->host_notifier;
+}
diff --git a/hw/virtio.h b/hw/virtio.h
index 2c298a8..92ad5d1 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -183,5 +183,13 @@ void virtio_net_exit(VirtIODevice *vdev);
 	DEFINE_PROP_BIT("indirect_desc", _state, _field, \
 			VIRTIO_RING_F_INDIRECT_DESC, true)
 
-void virtio_irq(VirtIODevice *vdev, VirtQueue *vq);
+target_phys_addr_t virtio_queue_get_desc(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_avail(VirtIODevice *vdev, int n);
+target_phys_addr_t virtio_queue_get_used(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_last_avail_idx(VirtIODevice *vdev, int n);
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
+VirtQueue *virtio_queue(VirtIODevice *vdev, int n);
+EventNotifier *virtio_queue_guest_notifier(VirtQueue *vq);
+EventNotifier *virtio_queue_host_notifier(VirtQueue *vq);
+void virtio_irq(VirtQueue *vq);
 #endif
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 08/15] virtio: add status change callback
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (6 preceding siblings ...)
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 07/15] virtio: add APIs for queue fields Michael S. Tsirkin
@ 2010-02-04 12:46 ` Michael S. Tsirkin
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 09/15] virtio: move typedef to qemu-common Michael S. Tsirkin
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:46 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

vhost net backend needs to be notified when
frontend status changes. Add a callback.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/s390-virtio-bus.c |    3 +++
 hw/syborg_virtio.c   |    2 ++
 hw/virtio-pci.c      |    6 ++++++
 hw/virtio.h          |    1 +
 4 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c
index 6b6dafc..a4ce734 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390-virtio-bus.c
@@ -243,6 +243,9 @@ void s390_virtio_device_update_status(VirtIOS390Device *dev)
     uint32_t features;
 
     vdev->status = ldub_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS);
+    if (vdev->set_status) {
+        vdev->set_status(vdev);
+    }
 
     /* Update guest supported feature bitmap */
 
diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 65239a0..19f6473 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -152,6 +152,8 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->status = value & 0xFF;
         if (vdev->status == 0)
             virtio_reset(vdev);
+        if (vdev->set_status)
+            vdev->set_status(vdev);
         break;
     case SYBORG_VIRTIO_INT_ENABLE:
         s->int_enable = value;
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 709d13e..dbb0b16 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -210,6 +210,9 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
             virtio_reset(proxy->vdev);
             msix_unuse_all_vectors(&proxy->pci_dev);
         }
+        if (vdev->set_status) {
+            vdev->set_status(vdev);
+        }
         break;
     case VIRTIO_MSI_CONFIG_VECTOR:
         msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
@@ -377,6 +380,9 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
     if (PCI_COMMAND == address) {
         if (!(val & PCI_COMMAND_MASTER)) {
             proxy->vdev->status &= ~VIRTIO_CONFIG_S_DRIVER_OK;
+            if (proxy->vdev->set_status) {
+                proxy->vdev->set_status(proxy->vdev);
+            }
         }
     }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 92ad5d1..235e7c4 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -114,6 +114,7 @@ struct VirtIODevice
     void (*get_config)(VirtIODevice *vdev, uint8_t *config);
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
+    void (*set_status)(VirtIODevice *vdev);
     VirtQueue *vq;
     const VirtIOBindings *binding;
     void *binding_opaque;
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 09/15] virtio: move typedef to qemu-common
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (7 preceding siblings ...)
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 08/15] virtio: add status change callback Michael S. Tsirkin
@ 2010-02-04 12:46 ` Michael S. Tsirkin
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 10/15] virtio-pci: fill in notifier support Michael S. Tsirkin
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:46 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

make it possible to use type without header include

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio.h   |    1 -
 qemu-common.h |    1 +
 2 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/virtio.h b/hw/virtio.h
index 235e7c4..5dae591 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -68,7 +68,6 @@ static inline target_phys_addr_t vring_align(target_phys_addr_t addr,
 }
 
 typedef struct VirtQueue VirtQueue;
-typedef struct VirtIODevice VirtIODevice;
 
 #define VIRTQUEUE_MAX_SIZE 1024
 
diff --git a/qemu-common.h b/qemu-common.h
index 423e962..e1c3c4e 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -225,6 +225,7 @@ typedef struct I2SCodec I2SCodec;
 typedef struct DeviceState DeviceState;
 typedef struct SSIBus SSIBus;
 typedef struct EventNotifier EventNotifier;
+typedef struct VirtIODevice VirtIODevice;
 
 /* CPU save/load.  */
 void cpu_save(QEMUFile *f, void *opaque);
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 10/15] virtio-pci: fill in notifier support
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (8 preceding siblings ...)
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 09/15] virtio: move typedef to qemu-common Michael S. Tsirkin
@ 2010-02-04 12:46 ` Michael S. Tsirkin
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 11/15] tap: add interface to get device fd Michael S. Tsirkin
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:46 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

Support host/guest notifiers in virtio-pci.
The last one only with kvm, that's okay
because vhost relies on kvm anyway.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |   62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 62 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index dbb0b16..02859a7 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -23,6 +23,7 @@
 #include "msix.h"
 #include "net.h"
 #include "loader.h"
+#include "kvm.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -396,6 +397,65 @@ static unsigned virtio_pci_get_features(void *opaque)
     return proxy->host_features;
 }
 
+static void virtio_pci_guest_notifier_read(void *opaque)
+{
+    VirtQueue *vq = opaque;
+    EventNotifier *n = virtio_queue_guest_notifier(vq);
+    if (event_notifier_test_and_clear(n)) {
+        virtio_irq(vq);
+    }
+}
+
+static int virtio_pci_guest_notifier(void *opaque, int n, bool assign)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtQueue *vq = virtio_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_guest_notifier(vq);
+
+    if (assign) {
+        int r = event_notifier_init(notifier, 0);
+	if (r < 0)
+		return r;
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            virtio_pci_guest_notifier_read, NULL, vq);
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(notifier),
+                            NULL, NULL, vq);
+        event_notifier_cleanup(notifier);
+    }
+
+    return 0;
+}
+
+static int virtio_pci_host_notifier(void *opaque, int n, bool assign)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    VirtQueue *vq = virtio_queue(proxy->vdev, n);
+    EventNotifier *notifier = virtio_queue_host_notifier(vq);
+    int r;
+    if (assign) {
+	r = event_notifier_init(notifier, 1);
+	if (r < 0) {
+		return r;
+        }
+        r = kvm_set_ioeventfd(proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                              n, event_notifier_get_fd(notifier),
+                              assign);
+        if (r < 0) {
+            event_notifier_cleanup(notifier);
+        }
+    } else {
+        r = kvm_set_ioeventfd(proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
+                              n, event_notifier_get_fd(notifier),
+                              assign);
+	if (r < 0) {
+		return r;
+        }
+        event_notifier_cleanup(notifier);
+    }
+    return r;
+}
+
 static const VirtIOBindings virtio_pci_bindings = {
     .notify = virtio_pci_notify,
     .save_config = virtio_pci_save_config,
@@ -403,6 +463,8 @@ static const VirtIOBindings virtio_pci_bindings = {
     .save_queue = virtio_pci_save_queue,
     .load_queue = virtio_pci_load_queue,
     .get_features = virtio_pci_get_features,
+    .host_notifier = virtio_pci_host_notifier,
+    .guest_notifier = virtio_pci_guest_notifier,
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 11/15] tap: add interface to get device fd
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (9 preceding siblings ...)
  2010-02-04 12:46 ` [Qemu-devel] [PATCH 10/15] virtio-pci: fill in notifier support Michael S. Tsirkin
@ 2010-02-04 12:47 ` Michael S. Tsirkin
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 12/15] vhost: vhost net support Michael S. Tsirkin
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:47 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

Will be used by vhost to attach/detach to backend.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 net/tap.c |    7 +++++++
 net/tap.h |    2 ++
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index d3492de..7e9ca79 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -269,6 +269,13 @@ static void tap_poll(VLANClientState *nc, bool enable)
     tap_write_poll(s, enable);
 }
 
+int tap_get_fd(VLANClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+    return s->fd;
+}
+
 /* fd support */
 
 static NetClientInfo net_tap_info = {
diff --git a/net/tap.h b/net/tap.h
index 538a562..a244b28 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -48,4 +48,6 @@ int tap_probe_vnet_hdr(int fd);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
 
+int tap_get_fd(VLANClientState *vc);
+
 #endif /* QEMU_NET_TAP_H */
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 12/15] vhost: vhost net support
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (10 preceding siblings ...)
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 11/15] tap: add interface to get device fd Michael S. Tsirkin
@ 2010-02-04 12:47 ` Michael S. Tsirkin
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 13/15] tap: add vhost/vhostfd options Michael S. Tsirkin
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:47 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

This adds vhost net support in qemu. Will be tied to tap device and
virtio by following patches.  Raw backend is currently missing, will be
worked on/submitted separately.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    1 +
 hw/vhost.c      |  603 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vhost.h      |   44 ++++
 hw/vhost_net.c  |  147 ++++++++++++++
 hw/vhost_net.h  |   20 ++
 5 files changed, 815 insertions(+), 0 deletions(-)
 create mode 100644 hw/vhost.c
 create mode 100644 hw/vhost.h
 create mode 100644 hw/vhost_net.c
 create mode 100644 hw/vhost_net.h

diff --git a/Makefile.target b/Makefile.target
index 31bde66..852c5ff 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -174,6 +174,7 @@ obj-y = vl.o async.o monitor.o pci.o pci_host.o pcie_host.o machine.o gdbstub.o
 # need to fix this properly
 obj-y += virtio-blk.o virtio-balloon.o virtio-net.o virtio-pci.o virtio-serial-bus.o
 obj-y += notifier.o
+obj-y += vhost_net.o vhost.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
 LIBS+=-lz
diff --git a/hw/vhost.c b/hw/vhost.c
new file mode 100644
index 0000000..e5c1ead
--- /dev/null
+++ b/hw/vhost.c
@@ -0,0 +1,603 @@
+#include "linux/vhost.h"
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+#include "vhost.h"
+#include "hw/hw.h"
+/* For range_get_last */
+#include "pci.h"
+
+static void vhost_dev_sync_region(struct vhost_dev *dev,
+				     uint64_t mfirst, uint64_t mlast,
+				     uint64_t rfirst, uint64_t rlast)
+{
+	uint64_t start = MAX(mfirst, rfirst);
+	uint64_t end = MIN(mlast, rlast);
+	vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK;
+	vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
+	uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
+
+	assert(end / VHOST_LOG_CHUNK < dev->log_size);
+	assert(start / VHOST_LOG_CHUNK < dev->log_size);
+	if (end < start) {
+		return;
+	}
+	for (;from < to; ++from) {
+		vhost_log_chunk_t log;
+		int bit;
+		/* We first check with non-atomic: much cheaper,
+		 * and we expect non-dirty to be the common case. */
+		if (!*from) {
+			continue;
+		}
+		/* Data must be read atomically. We don't really
+		 * need the barrier semantics of __sync
+		 * builtins, but it's easier to use them than
+		 * roll our own. */
+		log = __sync_fetch_and_and(from, 0);
+		while ((bit = sizeof(log) > sizeof(int) ?
+		       ffsll(log) : ffs(log))) {
+			bit -= 1;
+			cpu_physical_memory_set_dirty(addr + bit * VHOST_LOG_PAGE);
+			log &= ~(0x1ull << bit);
+		}
+		addr += VHOST_LOG_CHUNK;
+	}
+}
+
+static int vhost_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
+					target_phys_addr_t start_addr,
+					target_phys_addr_t end_addr)
+{
+	struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
+	int i;
+	if (!dev->log_enabled || !dev->started) {
+		return 0;
+	}
+	for (i = 0; i < dev->mem->nregions; ++i) {
+		struct vhost_memory_region *reg = dev->mem->regions + i;
+		vhost_dev_sync_region(dev, start_addr, end_addr,
+				      reg->guest_phys_addr,
+				      range_get_last(reg->guest_phys_addr,
+						     reg->memory_size));
+	}
+	for (i = 0; i < dev->nvqs; ++i) {
+		struct vhost_virtqueue *vq = dev->vqs + i;
+		unsigned size = sizeof(struct vring_used_elem) * vq->num;
+		vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
+				      range_get_last(vq->used_phys, size));
+	}
+	return 0;
+}
+
+/* Assign/unassign. Keep an unsorted array of non-overlapping
+ * memory regions in dev->mem. */
+static void vhost_dev_unassign_memory(struct vhost_dev *dev,
+				      uint64_t start_addr,
+				      uint64_t size)
+{
+	int from, to, n = dev->mem->nregions;
+	/* Track overlapping/split regions for sanity checking. */
+	int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0;
+
+	for (from = 0, to = 0; from < n; ++from, ++to) {
+		struct vhost_memory_region *reg = dev->mem->regions + to;
+		uint64_t reglast;
+		uint64_t memlast;
+		uint64_t change;
+
+		/* clone old region */
+		if (to != from) {
+			memcpy(reg, dev->mem->regions + from, sizeof *reg);
+		}
+
+		/* No overlap is simple */
+		if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+				    start_addr, size)) {
+			continue;
+		}
+
+		/* Split only happens if supplied region
+		 * is in the middle of an existing one. Thus it can not
+		 * overlap with any other existing region. */
+		assert(!split);
+
+		reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+		memlast = range_get_last(start_addr, size);
+
+		/* Remove whole region */
+		if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
+			--dev->mem->nregions;
+			--to;
+			assert(to >= 0);
+			++overlap_middle;
+			continue;
+		}
+
+		/* Shrink region */
+		if (memlast >= reglast) {
+			reg->memory_size = start_addr - reg->guest_phys_addr;
+			assert(reg->memory_size);
+			assert(!overlap_end);
+			++overlap_end;
+			continue;
+		}
+
+		/* Shift region */
+		if (start_addr <= reg->guest_phys_addr) {
+			change = memlast + 1 - reg->guest_phys_addr;
+			reg->memory_size -= change;
+			reg->guest_phys_addr += change;
+			reg->userspace_addr += change;
+			assert(reg->memory_size);
+			assert(!overlap_start);
+			++overlap_start;
+			continue;
+		}
+
+		/* This only happens if supplied region
+		 * is in the middle of an existing one. Thus it can not
+		 * overlap with any other existing region. */
+		assert(!overlap_start);
+		assert(!overlap_end);
+		assert(!overlap_middle);
+		/* Split region: shrink first part, shift second part. */
+		memcpy(dev->mem->regions + n, reg, sizeof *reg);
+		reg->memory_size = start_addr - reg->guest_phys_addr;
+		assert(reg->memory_size);
+		change = memlast + 1 - reg->guest_phys_addr;
+		reg = dev->mem->regions + n;
+		reg->memory_size -= change;
+		assert(reg->memory_size);
+		reg->guest_phys_addr += change;
+		reg->userspace_addr += change;
+		/* Never add more than 1 region */
+		assert(dev->mem->nregions == n);
+		++dev->mem->nregions;
+		++split;
+	}
+}
+
+/* Called after unassign, so no regions overlap the given range. */
+static void vhost_dev_assign_memory(struct vhost_dev *dev,
+				    uint64_t start_addr,
+				    uint64_t size,
+				    uint64_t uaddr)
+{
+	int from, to;
+	struct vhost_memory_region *merged = NULL;
+	for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) {
+		struct vhost_memory_region *reg = dev->mem->regions + to;
+		uint64_t prlast, urlast;
+		uint64_t pmlast, umlast;
+		uint64_t s, e, u;
+
+		/* clone old region */
+		if (to != from) {
+			memcpy(reg, dev->mem->regions + from, sizeof *reg);
+		}
+		prlast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+		pmlast = range_get_last(start_addr, size);
+		urlast = range_get_last(reg->userspace_addr, reg->memory_size);
+		umlast = range_get_last(uaddr, size);
+
+		/* check for overlapping regions: should never happen. */
+		assert(prlast < start_addr || pmlast < reg->guest_phys_addr);
+		/* Not an adjacent or overlapping region - do not merge. */
+		if ((prlast + 1 != start_addr || urlast + 1 != uaddr) &&
+		    (pmlast + 1 != reg->guest_phys_addr ||
+		     umlast + 1 != reg->userspace_addr)) {
+			continue;
+		}
+
+		if (merged) {
+			--to;
+			assert(to >= 0);
+		} else {
+			merged = reg;
+		}
+		u = MIN(uaddr, reg->userspace_addr);
+		s = MIN(start_addr, reg->guest_phys_addr);
+		e = MAX(pmlast, prlast);
+		uaddr = merged->userspace_addr = u;
+		start_addr = merged->guest_phys_addr = s;
+		size = merged->memory_size = e - s + 1;
+		assert(merged->memory_size);
+	}
+
+	if (!merged) {
+		struct vhost_memory_region *reg = dev->mem->regions + to;
+		memset(reg, 0, sizeof *reg);
+		reg->memory_size = size;
+		assert(reg->memory_size);
+		reg->guest_phys_addr = start_addr;
+		reg->userspace_addr = uaddr;
+		++to;
+	}
+	assert(to <= dev->mem->nregions + 1);
+	dev->mem->nregions = to;
+}
+
+static uint64_t vhost_get_log_size(struct vhost_dev *dev)
+{
+	uint64_t log_size = 0;
+	int i;
+	for (i = 0; i < dev->mem->nregions; ++i) {
+		struct vhost_memory_region *reg = dev->mem->regions + i;
+		uint64_t last = range_get_last(reg->guest_phys_addr,
+					       reg->memory_size);
+		log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
+	}
+	for (i = 0; i < dev->nvqs; ++i) {
+		struct vhost_virtqueue *vq = dev->vqs + i;
+		uint64_t last = vq->used_phys +
+			sizeof(struct vring_used_elem) * vq->num - 1;
+		log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
+	}
+	return log_size;
+}
+
+static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
+{
+	vhost_log_chunk_t *log;
+	int r;
+	if (size) {
+		log = qemu_mallocz(size * sizeof *log);
+	} else {
+		log = NULL;
+	}
+	r = ioctl(dev->control, VHOST_SET_LOG_BASE,
+		  (uint64_t)(unsigned long)log);
+	assert(r >= 0);
+	vhost_client_sync_dirty_bitmap(&dev->client, 0,
+				       (target_phys_addr_t)~0x0ull);
+	if (dev->log) {
+		qemu_free(dev->log);
+	}
+	dev->log = log;
+	dev->log_size = size;
+}
+
+static void vhost_client_set_memory(CPUPhysMemoryClient *client,
+				    target_phys_addr_t start_addr,
+				    ram_addr_t size,
+				    ram_addr_t phys_offset)
+{
+	struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
+	ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
+	int s = offsetof(struct vhost_memory, regions) +
+		(dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
+	uint64_t log_size;
+	int r;
+	dev->mem = qemu_realloc(dev->mem, s);
+
+	assert(size);
+
+	vhost_dev_unassign_memory(dev, start_addr, size);
+	if (flags == IO_MEM_RAM) {
+		/* Add given mapping, merging adjacent regions if any */
+		vhost_dev_assign_memory(dev, start_addr, size,
+				(uintptr_t)qemu_get_ram_ptr(phys_offset));
+	} else {
+		/* Remove old mapping for this memory, if any. */
+		vhost_dev_unassign_memory(dev, start_addr, size);
+	}
+
+	if (!dev->started) {
+		return;
+	}
+	if (!dev->log_enabled) {
+		r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+		assert(r >= 0);
+		return;
+	}
+	log_size = vhost_get_log_size(dev);
+	/* We allocate an extra 4K bytes to log,
+	 * to reduce the * number of reallocations. */
+#define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
+	/* To log more, must increase log size before table update. */
+	if (dev->log_size < log_size) {
+		vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
+	}
+	r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+	assert(r >= 0);
+	/* To log less, can only decrease log size after table update. */
+	if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
+		vhost_dev_log_resize(dev, log_size);
+	}
+}
+
+static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
+{
+	uint64_t features = dev->acked_features;
+	int r;
+	if (dev->log_enabled) {
+		features |= 0x1 << VHOST_F_LOG_ALL;
+	}
+	r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+	return r < 0 ? -errno : 0;
+}
+
+static int vhost_client_migration_log(struct CPUPhysMemoryClient *client,
+				      int enable)
+{
+	struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
+	int r;
+	if (!!enable == dev->log_enabled) {
+		return 0;
+	}
+	if (!dev->started) {
+		dev->log_enabled = enable;
+		return 0;
+	}
+	if (!enable) {
+		r = vhost_dev_set_log(dev, false);
+		if (r < 0) {
+			return r;
+		}
+		if (dev->log) {
+			qemu_free(dev->log);
+		}
+		dev->log = NULL;
+		dev->log_size = 0;
+	} else {
+		vhost_dev_log_resize(dev, vhost_get_log_size(dev));
+		r = vhost_dev_set_log(dev, false);
+		if (r < 0) {
+			return r;
+		}
+	}
+	dev->log_enabled = enable;
+	return 0;
+}
+
+static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
+				    struct vhost_virtqueue *vq,
+				    unsigned idx, bool enable_log)
+{
+	struct vhost_vring_addr addr = {
+		.index = idx,
+		.desc_user_addr = (u_int64_t)(unsigned long)vq->desc,
+		.avail_user_addr = (u_int64_t)(unsigned long)vq->avail,
+		.used_user_addr = (u_int64_t)(unsigned long)vq->used,
+		.log_guest_addr = vq->used_phys,
+		.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0,
+	};
+	int r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+	if (r < 0) {
+		return -errno;
+	}
+	return 0;
+}
+
+static int vhost_virtqueue_init(struct vhost_dev *dev,
+				struct VirtIODevice *vdev,
+				struct vhost_virtqueue *vq,
+				unsigned idx)
+{
+	target_phys_addr_t s, l, a;
+	int r;
+	struct vhost_vring_file file = {
+		.index = idx,
+	};
+	struct vhost_vring_state state = {
+		.index = idx,
+	};
+	struct VirtQueue *q = virtio_queue(vdev, idx);
+
+	vq->num = state.num = virtio_queue_get_num(vdev, idx);
+	r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+	if (r) {
+		return -errno;
+	}
+
+	state.num = virtio_queue_last_avail_idx(vdev, idx);
+	r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+	if (r) {
+		return -errno;
+	}
+
+	s = l = sizeof(struct vring_desc) * vq->num;
+	a = virtio_queue_get_desc(vdev, idx);
+	vq->desc = cpu_physical_memory_map(a, &l, 0);
+	if (!vq->desc || l != s) {
+		r = -ENOMEM;
+		goto fail_alloc;
+	}
+	s = l = offsetof(struct vring_avail, ring) +
+		sizeof(u_int64_t) * vq->num;
+	a = virtio_queue_get_avail(vdev, idx);
+	vq->avail = cpu_physical_memory_map(a, &l, 0);
+	if (!vq->avail || l != s) {
+		r = -ENOMEM;
+		goto fail_alloc;
+	}
+	s = l = offsetof(struct vring_used, ring) +
+		sizeof(struct vring_used_elem) * vq->num;
+	vq->used_phys = a = virtio_queue_get_used(vdev, idx);
+	vq->used = cpu_physical_memory_map(a, &l, 1);
+	if (!vq->used || l != s) {
+		r = -ENOMEM;
+		goto fail_alloc;
+	}
+
+	r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
+	if (r < 0) {
+		r = -errno;
+		goto fail_alloc;
+	}
+	if (!vdev->binding->guest_notifier || !vdev->binding->host_notifier) {
+		fprintf(stderr, "binding does not support irqfd/queuefd\n");
+		r = -ENOSYS;
+		goto fail_alloc;
+	}
+        r = vdev->binding->guest_notifier(vdev->binding_opaque, idx, true);
+	if (r < 0) {
+		fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+		goto fail_guest_notifier;
+	}
+
+        r = vdev->binding->host_notifier(vdev->binding_opaque, idx, true);
+	if (r < 0) {
+		fprintf(stderr, "Error binding host notifier: %d\n", -r);
+		goto fail_host_notifier;
+	}
+
+	file.fd = event_notifier_get_fd(virtio_queue_host_notifier(q));
+	r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+	if (r) {
+		goto fail_kick;
+	}
+
+	file.fd = event_notifier_get_fd(virtio_queue_guest_notifier(q));
+	r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+	if (r) {
+		goto fail_call;
+	}
+
+	return 0;
+
+fail_call:
+fail_kick:
+        vdev->binding->host_notifier(vdev->binding_opaque, idx, false);
+fail_host_notifier:
+        vdev->binding->guest_notifier(vdev->binding_opaque, idx, false);
+fail_guest_notifier:
+fail_alloc:
+	return r;
+}
+
+static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
+				    struct VirtIODevice *vdev,
+				    struct vhost_virtqueue *vq,
+				    unsigned idx)
+{
+	struct vhost_vring_state state = {
+		.index = idx,
+	};
+	int r;
+	r = vdev->binding->guest_notifier(vdev->binding_opaque, idx, false);
+	if (r < 0) {
+		fprintf(stderr, "vhost VQ %d guest cleanup failed: %d\n", idx, r);
+		fflush(stderr);
+	}
+	assert (r >= 0);
+
+	r = vdev->binding->host_notifier(vdev->binding_opaque, idx, false);
+	if (r < 0) {
+		fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
+		fflush(stderr);
+	}
+	assert (r >= 0);
+	r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
+	if (r < 0) {
+		fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
+		fflush(stderr);
+	}
+	virtio_queue_set_last_avail_idx(vdev, idx, state.num);
+	assert (r >= 0);
+}
+
+int vhost_dev_init(struct vhost_dev *hdev, int devfd)
+{
+	uint64_t features;
+	int r;
+	if (devfd >= 0) {
+		hdev->control = devfd;
+	} else {
+		hdev->control = open("/dev/vhost-net", O_RDWR);
+		if (hdev->control < 0)
+			return -errno;
+	}
+	r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+	if (r < 0)
+		goto fail;
+
+	r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
+	if (r < 0)
+		goto fail;
+	hdev->features = features;
+	
+	hdev->client.set_memory = vhost_client_set_memory;
+	hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
+	hdev->client.migration_log = vhost_client_migration_log;
+	hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
+	hdev->log = NULL;
+	hdev->log_size = 0;
+	hdev->log_enabled = false;
+	hdev->started = false;
+	cpu_register_phys_memory_client(&hdev->client);
+	return 0;
+fail:
+	r = -errno;
+	close(hdev->control);
+	return r;
+}
+
+void vhost_dev_cleanup(struct vhost_dev *hdev)
+{
+	cpu_unregister_phys_memory_client(&hdev->client);
+	qemu_free(hdev->mem);
+	close(hdev->control);
+}
+
+int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+	int i, r;
+
+	r = vhost_dev_set_log(hdev, hdev->log_enabled);
+	if (r < 0)
+		goto fail;
+	r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
+	if (r < 0) {
+		r = -errno;
+		goto fail;
+	}
+	if (hdev->log_enabled) {
+		hdev->log_size = vhost_get_log_size(hdev);
+		hdev->log = hdev->log_size ?
+			qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
+		r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
+			  (uint64_t)(unsigned long)hdev->log);
+		if (r < 0) {
+			r = -errno;
+			goto fail;
+		}
+	}
+
+	for (i = 0; i < hdev->nvqs; ++i) {
+		r = vhost_virtqueue_init(hdev,
+		   			 vdev,
+					 hdev->vqs + i,
+					 i);
+		if (r < 0)
+			goto fail_vq;
+	}
+	hdev->started = true;
+
+	return 0;
+fail_vq:
+	while (--i >= 0) {
+		vhost_virtqueue_cleanup(hdev,
+					vdev,
+					hdev->vqs + i,
+					i);
+	}
+fail:
+	return r;
+}
+
+void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+	int i;
+	for (i = 0; i < hdev->nvqs; ++i) {
+		vhost_virtqueue_cleanup(hdev,
+					vdev,
+					hdev->vqs + i,
+					i);
+	}
+	vhost_client_sync_dirty_bitmap(&hdev->client, 0,
+				       (target_phys_addr_t)~0x0ull);
+	hdev->started = false;
+	qemu_free(hdev->log);
+	hdev->log_size = 0;
+}
diff --git a/hw/vhost.h b/hw/vhost.h
new file mode 100644
index 0000000..2ed3933
--- /dev/null
+++ b/hw/vhost.h
@@ -0,0 +1,44 @@
+#ifndef VHOST_H
+#define VHOST_H
+
+#include "hw/hw.h"
+#include "hw/virtio.h"
+
+/* Generic structures common for any vhost based device. */
+struct vhost_virtqueue {
+	int kick;
+	int call;
+	void *desc;
+	void *avail;
+	void *used;
+	int num;
+	unsigned long long used_phys;
+};
+
+typedef unsigned long vhost_log_chunk_t;
+#define VHOST_LOG_PAGE 0x1000
+#define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
+#define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
+
+struct vhost_memory;
+struct vhost_dev {
+	CPUPhysMemoryClient client;
+	int control;
+	struct vhost_memory *mem;
+	struct vhost_virtqueue *vqs;
+	int nvqs;
+	unsigned long long features;
+	unsigned long long acked_features;
+	unsigned long long backend_features;
+	bool started;
+	bool log_enabled;
+	vhost_log_chunk_t *log;
+	unsigned long long log_size;
+};
+
+int vhost_dev_init(struct vhost_dev *hdev, int devfd);
+void vhost_dev_cleanup(struct vhost_dev *hdev);
+int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
+void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
+
+#endif
diff --git a/hw/vhost_net.c b/hw/vhost_net.c
new file mode 100644
index 0000000..c89ff40
--- /dev/null
+++ b/hw/vhost_net.c
@@ -0,0 +1,147 @@
+#include <sys/eventfd.h>
+#include <sys/socket.h>
+#include <linux/kvm.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/vhost.h>
+#include <linux/virtio_ring.h>
+#include <netpacket/packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <stdio.h>
+
+#include "net.h"
+#include "net/tap.h"
+
+#include "virtio-net.h"
+#include "vhost.h"
+#include "vhost_net.h"
+
+struct vhost_net {
+	struct vhost_dev dev;
+	struct vhost_virtqueue vqs[2];
+	int backend;
+	VLANClientState *vc;
+};
+
+unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
+{
+	/* Clear features not supported by host kernel. */
+	if (!(net->dev.features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)))
+		features &= ~(1 << VIRTIO_F_NOTIFY_ON_EMPTY);
+	if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC)))
+		features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+	if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF)))
+		features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
+	return features;
+}
+
+void vhost_net_ack_features(struct vhost_net *net, unsigned features)
+{
+	net->dev.acked_features = net->dev.backend_features;
+	if (features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY))
+		net->dev.acked_features |= (1 << VIRTIO_F_NOTIFY_ON_EMPTY);
+	if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC))
+		net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
+}
+
+static int vhost_net_get_fd(VLANClientState *backend)
+{
+	switch (backend->info->type) {
+	case NET_CLIENT_TYPE_TAP:
+		return tap_get_fd(backend);
+	default:
+		fprintf(stderr, "vhost-net requires tap backend\n");
+		return -EBADFD;
+	}
+}
+
+struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd)
+{
+	int r;
+	struct vhost_net *net = qemu_malloc(sizeof *net);
+	if (!backend) {
+		fprintf(stderr, "vhost-net requires backend to be setup\n");
+		goto fail;
+	}
+	r = vhost_net_get_fd(backend);
+	if (r < 0)
+		goto fail;
+	net->vc = backend;
+	net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
+		(1 << VHOST_NET_F_VIRTIO_NET_HDR);
+	net->backend = r;
+
+	r = vhost_dev_init(&net->dev, devfd);
+	if (r < 0)
+		goto fail;
+	if (~net->dev.features & net->dev.backend_features) {
+		fprintf(stderr, "vhost lacks feature mask %llu for backend\n",
+			~net->dev.features & net->dev.backend_features);
+		vhost_dev_cleanup(&net->dev);
+		goto fail;
+	}
+
+	/* Set sane init value. Override when guest acks. */
+	vhost_net_ack_features(net, 0);
+	return net;
+fail:
+	qemu_free(net);
+	return NULL;
+}
+
+int vhost_net_start(struct vhost_net *net,
+		    VirtIODevice *dev)
+{
+	struct vhost_vring_file file = { };
+	int r;
+
+	net->dev.nvqs = 2;
+	net->dev.vqs = net->vqs;
+	r = vhost_dev_start(&net->dev, dev);
+	if (r < 0)
+		return r;
+
+	net->vc->info->poll(net->vc, false);
+	qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
+	file.fd = net->backend;
+	for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+		r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+		if (r < 0) {
+			r = -errno;
+			goto fail;
+		}
+	}
+	return 0;
+fail:
+	file.fd = -1;
+	while (--file.index >= 0) {
+		int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+		assert(r >= 0);
+	}
+	net->vc->info->poll(net->vc, true);
+	vhost_dev_stop(&net->dev, dev);
+	return r;
+}
+
+void vhost_net_stop(struct vhost_net *net,
+		    VirtIODevice *dev)
+{
+	struct vhost_vring_file file = { .fd = -1 };
+
+	for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+		int r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
+		assert(r >= 0);
+	}
+	net->vc->info->poll(net->vc, true);
+	vhost_dev_stop(&net->dev, dev);
+}
+
+void vhost_net_cleanup(struct vhost_net *net)
+{
+	vhost_dev_cleanup(&net->dev);
+	qemu_free(net);
+}
+/* TODO: log */
diff --git a/hw/vhost_net.h b/hw/vhost_net.h
new file mode 100644
index 0000000..21f0277
--- /dev/null
+++ b/hw/vhost_net.h
@@ -0,0 +1,20 @@
+#ifndef VHOST_NET_H
+#define VHOST_NET_H
+
+#include "net.h"
+
+struct vhost_net;
+
+struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd);
+
+int vhost_net_start(struct vhost_net *net,
+		    VirtIODevice *dev);
+void vhost_net_stop(struct vhost_net *net,
+		    VirtIODevice *dev);
+
+void vhost_net_cleanup(struct vhost_net *net);
+
+unsigned vhost_net_get_features(struct vhost_net *net, unsigned features);
+void vhost_net_ack_features(struct vhost_net *net, unsigned features);
+
+#endif
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 13/15] tap: add vhost/vhostfd options
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (11 preceding siblings ...)
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 12/15] vhost: vhost net support Michael S. Tsirkin
@ 2010-02-04 12:47 ` Michael S. Tsirkin
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 14/15] tap: add API to retrieve vhost net header Michael S. Tsirkin
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 15/15] virtio-net: vhost net support Michael S. Tsirkin
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:47 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 net.c           |    8 ++++++++
 net/tap.c       |   29 +++++++++++++++++++++++++++++
 qemu-options.hx |    4 +++-
 3 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/net.c b/net.c
index 6ef93e6..b942d03 100644
--- a/net.c
+++ b/net.c
@@ -976,6 +976,14 @@ static struct {
                 .name = "vnet_hdr",
                 .type = QEMU_OPT_BOOL,
                 .help = "enable the IFF_VNET_HDR flag on the tap interface"
+            }, {
+                .name = "vhost",
+                .type = QEMU_OPT_BOOL,
+                .help = "enable vhost-net network accelerator",
+            }, {
+                .name = "vhostfd",
+                .type = QEMU_OPT_STRING,
+                .help = "file descriptor of an already opened vhost net device",
             },
 #endif /* _WIN32 */
             { /* end of list */ }
diff --git a/net/tap.c b/net/tap.c
index 7e9ca79..d9f2e41 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -41,6 +41,8 @@
 
 #include "net/tap-linux.h"
 
+#include "hw/vhost_net.h"
+
 /* Maximum GSO packet size (64k) plus plenty of room for
  * the ethernet and virtio_net headers
  */
@@ -57,6 +59,7 @@ typedef struct TAPState {
     unsigned int has_vnet_hdr : 1;
     unsigned int using_vnet_hdr : 1;
     unsigned int has_ufo: 1;
+    struct vhost_net *vhost_net;
 } TAPState;
 
 static int launch_script(const char *setup_script, const char *ifname, int fd);
@@ -252,6 +255,10 @@ static void tap_cleanup(VLANClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
+    if (s->vhost_net) {
+        vhost_net_cleanup(s->vhost_net);
+    }
+
     qemu_purge_queued_packets(nc);
 
     if (s->down_script[0])
@@ -307,6 +314,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan,
     s->has_ufo = tap_probe_has_ufo(s->fd);
     tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
     tap_read_poll(s, 1);
+    s->vhost_net = NULL;
     return s;
 }
 
@@ -456,6 +464,27 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan
         }
     }
 
+    if (qemu_opt_get_bool(opts, "vhost", 0)) {
+        int vhostfd, r;
+        if (qemu_opt_get(opts, "vhostfd")) {
+            r = net_handle_fd_param(mon, qemu_opt_get(opts, "vhostfd"));
+            if (r == -1) {
+                return -1;
+            }
+            vhostfd = r;
+        } else {
+            vhostfd = -1;
+        }
+        s->vhost_net = vhost_net_init(&s->nc, vhostfd);
+        if (!s->vhost_net) {
+            qemu_error("vhost-net requested but could not be initialized\n");
+            return -1;
+        }
+    } else if (qemu_opt_get(opts, "vhostfd")) {
+        qemu_error("vhostfd= is not valid without vhost\n");
+        return -1;
+    }
+
     if (vlan) {
         vlan->nb_host_devs++;
     }
diff --git a/qemu-options.hx b/qemu-options.hx
index 5c9f482..c2b25c5 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -831,7 +831,7 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "-net tap[,vlan=n][,name=str],ifname=name\n"
     "                connect the host TAP network interface to VLAN 'n'\n"
 #else
-    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off]\n"
+    "-net tap[,vlan=n][,name=str][,fd=h][,ifname=name][,script=file][,downscript=dfile][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off][,vhostfd=h]\n"
     "                connect the host TAP network interface to VLAN 'n' and use the\n"
     "                network scripts 'file' (default=%s)\n"
     "                and 'dfile' (default=%s)\n"
@@ -841,6 +841,8 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
     "                default of 'sndbuf=1048576' can be disabled using 'sndbuf=0')\n"
     "                use vnet_hdr=off to avoid enabling the IFF_VNET_HDR tap flag\n"
     "                use vnet_hdr=on to make the lack of IFF_VNET_HDR support an error condition\n"
+    "                use vhost=on to enable experimental in kernel accelerator\n"
+    "                use 'vhostfd=h' to connect to an already opened vhost net device\n"
 #endif
     "-net socket[,vlan=n][,name=str][,fd=h][,listen=[host]:port][,connect=host:port]\n"
     "                connect the vlan 'n' to another VLAN using a socket connection\n"
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 14/15] tap: add API to retrieve vhost net header
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (12 preceding siblings ...)
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 13/15] tap: add vhost/vhostfd options Michael S. Tsirkin
@ 2010-02-04 12:47 ` Michael S. Tsirkin
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 15/15] virtio-net: vhost net support Michael S. Tsirkin
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:47 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

will be used by virtio-net for vhost net support

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 net/tap.c |    7 +++++++
 net/tap.h |    3 +++
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index d9f2e41..166cf05 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -491,3 +491,10 @@ int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan
 
     return 0;
 }
+
+struct vhost_net *tap_get_vhost_net(VLANClientState *nc)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    assert(nc->info->type == NET_CLIENT_TYPE_TAP);
+    return s->vhost_net;
+}
diff --git a/net/tap.h b/net/tap.h
index a244b28..b8cec83 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -50,4 +50,7 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
 
 int tap_get_fd(VLANClientState *vc);
 
+struct vhost_net;
+struct vhost_net *tap_get_vhost_net(VLANClientState *vc);
+
 #endif /* QEMU_NET_TAP_H */
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 15/15] virtio-net: vhost net support
       [not found] <cover.1265287265.git.mst@redhat.com>
                   ` (13 preceding siblings ...)
  2010-02-04 12:47 ` [Qemu-devel] [PATCH 14/15] tap: add API to retrieve vhost net header Michael S. Tsirkin
@ 2010-02-04 12:47 ` Michael S. Tsirkin
  14 siblings, 0 replies; 15+ messages in thread
From: Michael S. Tsirkin @ 2010-02-04 12:47 UTC (permalink / raw)
  To: Anthony Liguori, qemu-devel

This connects virtio-net to vhost net backend.
The code is structured in a way analogous to what we have with vnet
header capability in tap.  We start/stop backend on driver start/stop as
well as on save and vm start (for migration).

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 6e48997..f32c6fa 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -17,6 +17,7 @@
 #include "net/tap.h"
 #include "qemu-timer.h"
 #include "virtio-net.h"
+#include "vhost_net.h"
 
 #define VIRTIO_NET_VM_VERSION    11
 
@@ -47,6 +48,8 @@ typedef struct VirtIONet
     uint8_t nomulti;
     uint8_t nouni;
     uint8_t nobcast;
+    uint8_t vhost_started;
+    VMChangeStateEntry *vmstate;
     struct {
         int in_use;
         int first_multi;
@@ -114,6 +117,10 @@ static void virtio_net_reset(VirtIODevice *vdev)
     n->nomulti = 0;
     n->nouni = 0;
     n->nobcast = 0;
+    if (n->vhost_started) {
+        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), vdev);
+        n->vhost_started = 0;
+    }
 
     /* Flush any MAC and VLAN filter table state */
     n->mac_table.in_use = 0;
@@ -172,7 +179,10 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
         features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
     }
 
-    return features;
+    if (!tap_get_vhost_net(n->nic->nc.peer)) {
+        return features;
+    }
+    return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
 }
 
 static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
@@ -690,6 +700,12 @@ static void virtio_net_save(QEMUFile *f, void *opaque)
 {
     VirtIONet *n = opaque;
 
+    if (n->vhost_started) {
+	/* TODO: should we really stop the backend?
+	 * If we don't, it might keep writing to memory. */
+        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
+	n->vhost_started = 0;
+    }
     virtio_save(&n->vdev, f);
 
     qemu_put_buffer(f, n->mac, ETH_ALEN);
@@ -802,7 +818,6 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
         qemu_mod_timer(n->tx_timer,
                        qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
     }
-
     return 0;
 }
 
@@ -822,6 +837,47 @@ static NetClientInfo net_virtio_info = {
     .link_status_changed = virtio_net_set_link_status,
 };
 
+static void virtio_net_set_status(struct VirtIODevice *vdev)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+    if (!n->nic->nc.peer) {
+        return;
+    }
+    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
+        return;
+    }
+
+    if (!tap_get_vhost_net(n->nic->nc.peer)) {
+        return;
+    }
+    if (!!n->vhost_started == !!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+        return;
+    }
+    if (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+        int r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), vdev);
+        if (r < 0) {
+            fprintf(stderr, "unable to start vhost net: %d: "
+                    "falling back on userspace virtio\n", -r);
+        } else {
+            n->vhost_started = 1;
+        }
+    } else {
+        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), vdev);
+        n->vhost_started = 0;
+    }
+}
+
+static void virtio_net_vmstate_change(void *opaque, int running, int reason)
+{
+	VirtIONet *n = opaque;
+	if (!running) {
+		return;
+	}
+	/* This is called when vm is started, it will start vhost backend if it
+	 * appropriate e.g. after migration. */
+	virtio_net_set_status(&n->vdev);
+}
+
 VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
 {
     VirtIONet *n;
@@ -837,6 +893,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
     n->vdev.set_features = virtio_net_set_features;
     n->vdev.bad_features = virtio_net_bad_features;
     n->vdev.reset = virtio_net_reset;
+    n->vdev.set_status = virtio_net_set_status;
     n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
     n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
     n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
@@ -859,6 +916,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
+    n->vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n);
 
     return &n->vdev;
 }
@@ -866,6 +924,11 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
 void virtio_net_exit(VirtIODevice *vdev)
 {
     VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
+    qemu_del_vm_change_state_handler(n->vmstate);
+
+    if (n->vhost_started) {
+        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), vdev);
+    }
 
     qemu_purge_queued_packets(&n->nic->nc);
 
-- 
1.6.6.144.g5c3af

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2010-02-04 12:50 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1265287265.git.mst@redhat.com>
2010-02-04 12:41 ` [Qemu-devel] [PATCH 01/15] exec: memory notifiers Michael S. Tsirkin
2010-02-04 12:42 ` [Qemu-devel] [PATCH 02/15] kvm: move kvm_set_phys_mem around Michael S. Tsirkin
2010-02-04 12:42 ` [Qemu-devel] [PATCH 03/15] kvm: move kvm to use memory notifiers Michael S. Tsirkin
2010-02-04 12:44 ` [Qemu-devel] [PATCH 04/15] kvm: add API to set ioeventfd Michael S. Tsirkin
2010-02-04 12:44 ` [Qemu-devel] [PATCH 05/15] notifier: event notifier implementation Michael S. Tsirkin
2010-02-04 12:45 ` [Qemu-devel] [PATCH 06/15] virtio: add notifier support Michael S. Tsirkin
2010-02-04 12:46 ` [Qemu-devel] [PATCH 07/15] virtio: add APIs for queue fields Michael S. Tsirkin
2010-02-04 12:46 ` [Qemu-devel] [PATCH 08/15] virtio: add status change callback Michael S. Tsirkin
2010-02-04 12:46 ` [Qemu-devel] [PATCH 09/15] virtio: move typedef to qemu-common Michael S. Tsirkin
2010-02-04 12:46 ` [Qemu-devel] [PATCH 10/15] virtio-pci: fill in notifier support Michael S. Tsirkin
2010-02-04 12:47 ` [Qemu-devel] [PATCH 11/15] tap: add interface to get device fd Michael S. Tsirkin
2010-02-04 12:47 ` [Qemu-devel] [PATCH 12/15] vhost: vhost net support Michael S. Tsirkin
2010-02-04 12:47 ` [Qemu-devel] [PATCH 13/15] tap: add vhost/vhostfd options Michael S. Tsirkin
2010-02-04 12:47 ` [Qemu-devel] [PATCH 14/15] tap: add API to retrieve vhost net header Michael S. Tsirkin
2010-02-04 12:47 ` [Qemu-devel] [PATCH 15/15] virtio-net: vhost net support Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).