All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/2] exec: Convert bounce buffer to a set
@ 2015-03-10  7:50 Fam Zheng
  2015-03-10  7:50 ` [Qemu-devel] [PATCH 1/2] " Fam Zheng
  2015-03-10  7:50 ` [Qemu-devel] [PATCH 2/2] exec: Remove map_client_list family Fam Zheng
  0 siblings, 2 replies; 5+ messages in thread
From: Fam Zheng @ 2015-03-10  7:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini

The first patch introduces a hash table based bounce buffer tracker, which
guarantees the availability whenever bounce buffer is needed for dma.

The second patch removes the unused map_client_list.

Fam Zheng (2):
  exec: Convert bounce buffer to a set
  exec: Remove map_client_list family

 dma-helpers.c             |  20 ---------
 exec.c                    | 112 ++++++++++++++++++++++------------------------
 include/exec/cpu-common.h |   1 -
 include/exec/memory.h     |   2 -
 tests/ide-test.c          |   7 ++-
 5 files changed, 59 insertions(+), 83 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH 1/2] exec: Convert bounce buffer to a set
  2015-03-10  7:50 [Qemu-devel] [PATCH 0/2] exec: Convert bounce buffer to a set Fam Zheng
@ 2015-03-10  7:50 ` Fam Zheng
  2015-03-10 11:14   ` Paolo Bonzini
  2015-03-10  7:50 ` [Qemu-devel] [PATCH 2/2] exec: Remove map_client_list family Fam Zheng
  1 sibling, 1 reply; 5+ messages in thread
From: Fam Zheng @ 2015-03-10  7:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini

With the introduction of dataplane, the global bounce buffer is neither
thread-safe nor multi-thread friendly. The problems are:

 1) Access to "bounce" is not atomic, thus not safe from dataplane
 thread.

 2) Access to "map_client_list" is not atomic, thus not safe from
 dataplane thread.

 3) In dma_blk_cb, there is a race condition between:

        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
        /* ... */
        cpu_register_map_client(dbs, continue_after_map_failure);

    Bounce may become available after dma_memory_map failed but before
    cpu_register_map_client is called.

 4) The callback registered by cpu_register_map_client is not called in the
    right AioContext.

This patch changes the global bounce to a global set, which is protected
by a mutex. This makes the dma_memory_map never returns NULL in the
bounce buffer path.

The dma helper behavior is changed, so update the ide dma test case
too. The test used to trigger a big number of consecutive dma read (as a
result of disabling "busmaster"), which will put the IDE device to "intr
| active" status. After this patch, the dma read is merged to a very big
SG (niov=8192), which results in -EINVAL in the final preadv(2). In this
case, ide ERR bit is set.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 exec.c           | 77 +++++++++++++++++++++++++++++++++++++++-----------------
 tests/ide-test.c |  7 ++++--
 2 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/exec.c b/exec.c
index 6a5adab..5943cc2 100644
--- a/exec.c
+++ b/exec.c
@@ -94,6 +94,42 @@ DEFINE_TLS(CPUState *, current_cpu);
    2 = Adaptive rate instruction counting.  */
 int use_icount;
 
+typedef struct BounceBuffer {
+    MemoryRegion *mr;
+    void *buffer;
+    hwaddr addr;
+    hwaddr len;
+    QLIST_ENTRY(BounceBuffer) next;
+} BounceBuffer;
+
+static QemuMutex bounce_lock;
+static GHashTable *bounce_buffers;
+
+static BounceBuffer *bounce_buffer_find_and_remove(void *buffer)
+{
+    BounceBuffer *bounce;
+
+    qemu_mutex_lock(&bounce_lock);
+    bounce = g_hash_table_lookup(bounce_buffers, buffer);
+    if (bounce) {
+        g_hash_table_remove(bounce_buffers, buffer);
+    }
+    qemu_mutex_unlock(&bounce_lock);
+    return bounce;
+}
+
+static inline void bounce_buffer_insert(BounceBuffer *bounce)
+{
+    qemu_mutex_lock(&bounce_lock);
+    g_hash_table_insert(bounce_buffers, bounce->buffer, bounce);
+    qemu_mutex_unlock(&bounce_lock);
+}
+
+static guint bounce_buffer_hash(gconstpointer key)
+{
+    return *(long *)key >> 4;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 
 typedef struct PhysPageEntry PhysPageEntry;
@@ -433,6 +469,8 @@ void cpu_exec_init_all(void)
     memory_map_init();
     io_mem_init();
 #endif
+    qemu_mutex_init(&bounce_lock);
+    bounce_buffers = g_hash_table_new(bounce_buffer_hash, NULL);
 }
 
 #if !defined(CONFIG_USER_ONLY)
@@ -2475,15 +2513,6 @@ void cpu_flush_icache_range(hwaddr start, int len)
                                            start, NULL, len, FLUSH_CACHE);
 }
 
-typedef struct {
-    MemoryRegion *mr;
-    void *buffer;
-    hwaddr addr;
-    hwaddr len;
-} BounceBuffer;
-
-static BounceBuffer bounce;
-
 typedef struct MapClient {
     void *opaque;
     void (*callback)(void *opaque);
@@ -2568,23 +2597,22 @@ void *address_space_map(AddressSpace *as,
     l = len;
     mr = address_space_translate(as, addr, &xlat, &l, is_write);
     if (!memory_access_is_direct(mr, is_write)) {
-        if (bounce.buffer) {
-            return NULL;
-        }
+        BounceBuffer *bounce = g_new(BounceBuffer, 1);
         /* Avoid unbounded allocations */
         l = MIN(l, TARGET_PAGE_SIZE);
-        bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
-        bounce.addr = addr;
-        bounce.len = l;
+        bounce->buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
+        bounce->addr = addr;
+        bounce->len = l;
 
         memory_region_ref(mr);
-        bounce.mr = mr;
+        bounce->mr = mr;
         if (!is_write) {
-            address_space_read(as, addr, bounce.buffer, l);
+            address_space_read(as, addr, bounce->buffer, l);
         }
 
         *plen = l;
-        return bounce.buffer;
+        bounce_buffer_insert(bounce);
+        return bounce->buffer;
     }
 
     base = xlat;
@@ -2617,7 +2645,10 @@ void *address_space_map(AddressSpace *as,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len)
 {
-    if (buffer != bounce.buffer) {
+    BounceBuffer *bounce;
+
+    bounce = bounce_buffer_find_and_remove(buffer);
+    if (!bounce) {
         MemoryRegion *mr;
         ram_addr_t addr1;
 
@@ -2633,11 +2664,11 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
         return;
     }
     if (is_write) {
-        address_space_write(as, bounce.addr, bounce.buffer, access_len);
+        address_space_write(as, bounce->addr, bounce->buffer, access_len);
     }
-    qemu_vfree(bounce.buffer);
-    bounce.buffer = NULL;
-    memory_region_unref(bounce.mr);
+    qemu_vfree(bounce->buffer);
+    memory_region_unref(bounce->mr);
+    g_free(bounce);
     cpu_notify_map_clients();
 }
 
diff --git a/tests/ide-test.c b/tests/ide-test.c
index 29f4039..5eb81ec 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -366,6 +366,7 @@ static void test_bmdma_long_prdt(void)
 static void test_bmdma_no_busmaster(void)
 {
     uint8_t status;
+    uint8_t rs;
 
     /* No PRDT_EOT, each entry addr 0/size 64k, and in theory qemu shouldn't be
      * able to access it anyway because the Bus Master bit in the PCI command
@@ -378,8 +379,10 @@ static void test_bmdma_no_busmaster(void)
 
     /* Not entirely clear what the expected result is, but this is what we get
      * in practice. At least we want to be aware of any changes. */
-    g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    g_assert_cmphex(status, ==, BM_STS_INTR);
+    rs = inb(IDE_BASE + reg_status);
+    assert_bit_clear(rs, DF);
+    assert_bit_set(rs, ERR);
 }
 
 static void test_bmdma_setup(void)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH 2/2] exec: Remove map_client_list family
  2015-03-10  7:50 [Qemu-devel] [PATCH 0/2] exec: Convert bounce buffer to a set Fam Zheng
  2015-03-10  7:50 ` [Qemu-devel] [PATCH 1/2] " Fam Zheng
@ 2015-03-10  7:50 ` Fam Zheng
  1 sibling, 0 replies; 5+ messages in thread
From: Fam Zheng @ 2015-03-10  7:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paolo Bonzini

Now that bounce buffer is no longer global and the callback and BH are
not necessary. Remove them altogether.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 dma-helpers.c             | 20 --------------------
 exec.c                    | 35 -----------------------------------
 include/exec/cpu-common.h |  1 -
 include/exec/memory.h     |  2 --
 4 files changed, 58 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 6918572..4de52dc 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -81,25 +81,6 @@ typedef struct {
     DMAIOFunc *io_func;
 } DMAAIOCB;
 
-static void dma_blk_cb(void *opaque, int ret);
-
-static void reschedule_dma(void *opaque)
-{
-    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
-
-    qemu_bh_delete(dbs->bh);
-    dbs->bh = NULL;
-    dma_blk_cb(dbs, 0);
-}
-
-static void continue_after_map_failure(void *opaque)
-{
-    DMAAIOCB *dbs = (DMAAIOCB *)opaque;
-
-    dbs->bh = qemu_bh_new(reschedule_dma, dbs);
-    qemu_bh_schedule(dbs->bh);
-}
-
 static void dma_blk_unmap(DMAAIOCB *dbs)
 {
     int i;
@@ -161,7 +142,6 @@ static void dma_blk_cb(void *opaque, int ret)
 
     if (dbs->iov.size == 0) {
         trace_dma_map_wait(dbs);
-        cpu_register_map_client(dbs, continue_after_map_failure);
         return;
     }
 
diff --git a/exec.c b/exec.c
index 5943cc2..45f324e 100644
--- a/exec.c
+++ b/exec.c
@@ -2519,38 +2519,6 @@ typedef struct MapClient {
     QLIST_ENTRY(MapClient) link;
 } MapClient;
 
-static QLIST_HEAD(map_client_list, MapClient) map_client_list
-    = QLIST_HEAD_INITIALIZER(map_client_list);
-
-void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
-{
-    MapClient *client = g_malloc(sizeof(*client));
-
-    client->opaque = opaque;
-    client->callback = callback;
-    QLIST_INSERT_HEAD(&map_client_list, client, link);
-    return client;
-}
-
-static void cpu_unregister_map_client(void *_client)
-{
-    MapClient *client = (MapClient *)_client;
-
-    QLIST_REMOVE(client, link);
-    g_free(client);
-}
-
-static void cpu_notify_map_clients(void)
-{
-    MapClient *client;
-
-    while (!QLIST_EMPTY(&map_client_list)) {
-        client = QLIST_FIRST(&map_client_list);
-        client->callback(client->opaque);
-        cpu_unregister_map_client(client);
-    }
-}
-
 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
 {
     MemoryRegion *mr;
@@ -2576,8 +2544,6 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_
  * May map a subset of the requested range, given by and returned in *plen.
  * May return NULL if resources needed to perform the mapping are exhausted.
  * Use only for reads OR writes - not for read-modify-write operations.
- * Use cpu_register_map_client() to know when retrying the map operation is
- * likely to succeed.
  */
 void *address_space_map(AddressSpace *as,
                         hwaddr addr,
@@ -2669,7 +2635,6 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
     qemu_vfree(bounce->buffer);
     memory_region_unref(bounce->mr);
     g_free(bounce);
-    cpu_notify_map_clients();
 }
 
 void *cpu_physical_memory_map(hwaddr addr,
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index fcc3162..c86c837 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -82,7 +82,6 @@ void *cpu_physical_memory_map(hwaddr addr,
                               int is_write);
 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
                                int is_write, hwaddr access_len);
-void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
 
 bool cpu_physical_memory_is_io(hwaddr phys_addr);
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 06ffa1d..3ac93ab 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1127,8 +1127,6 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_
  * May map a subset of the requested range, given by and returned in @plen.
  * May return %NULL if resources needed to perform the mapping are exhausted.
  * Use only for reads OR writes - not for read-modify-write operations.
- * Use cpu_register_map_client() to know when retrying the map operation is
- * likely to succeed.
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] exec: Convert bounce buffer to a set
  2015-03-10  7:50 ` [Qemu-devel] [PATCH 1/2] " Fam Zheng
@ 2015-03-10 11:14   ` Paolo Bonzini
  2015-03-11  4:57     ` Fam Zheng
  0 siblings, 1 reply; 5+ messages in thread
From: Paolo Bonzini @ 2015-03-10 11:14 UTC (permalink / raw)
  To: Fam Zheng, qemu-devel



On 10/03/2015 08:50, Fam Zheng wrote:
> +    QLIST_ENTRY(BounceBuffer) next;

Where is this used?

> -    if (buffer != bounce.buffer) {
> +    BounceBuffer *bounce;
> +
> +    bounce = bounce_buffer_find_and_remove(buffer);
> +    if (!bounce) {

I'm afraid that this adds a mutex lock/unlock pair and a hash table
lookup on a very hot path.  One possibility is to add a check that the
hash table is not empty in bounce_buffer_find_and_remove.  That can be
done outside the lock so it's fast.

I'm also wondering if it's okay to let the guest do arbitrarily large
memory allocations (e.g. DMA from a huge unassigned memory area above
guest RAM); effectively you're disabling the "/* Avoid unbounded
allocations */" safety guard.

Is it hard to do this while keeping map_clients?

Paolo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] exec: Convert bounce buffer to a set
  2015-03-10 11:14   ` Paolo Bonzini
@ 2015-03-11  4:57     ` Fam Zheng
  0 siblings, 0 replies; 5+ messages in thread
From: Fam Zheng @ 2015-03-11  4:57 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On Tue, 03/10 12:14, Paolo Bonzini wrote:
> 
> 
> On 10/03/2015 08:50, Fam Zheng wrote:
> > +    QLIST_ENTRY(BounceBuffer) next;
> 
> Where is this used?

Unused, I will remove this.

> 
> > -    if (buffer != bounce.buffer) {
> > +    BounceBuffer *bounce;
> > +
> > +    bounce = bounce_buffer_find_and_remove(buffer);
> > +    if (!bounce) {
> 
> I'm afraid that this adds a mutex lock/unlock pair and a hash table
> lookup on a very hot path.  One possibility is to add a check that the
> hash table is not empty in bounce_buffer_find_and_remove.  That can be
> done outside the lock so it's fast.
> 
> I'm also wondering if it's okay to let the guest do arbitrarily large
> memory allocations (e.g. DMA from a huge unassigned memory area above
> guest RAM); effectively you're disabling the "/* Avoid unbounded
> allocations */" safety guard.

This is a good point, it is dangerous to allow that.

> 
> Is it hard to do this while keeping map_clients?

We might need to keep map_client for above reason. I'll take another look into
it.

Fam

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-03-11  4:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-10  7:50 [Qemu-devel] [PATCH 0/2] exec: Convert bounce buffer to a set Fam Zheng
2015-03-10  7:50 ` [Qemu-devel] [PATCH 1/2] " Fam Zheng
2015-03-10 11:14   ` Paolo Bonzini
2015-03-11  4:57     ` Fam Zheng
2015-03-10  7:50 ` [Qemu-devel] [PATCH 2/2] exec: Remove map_client_list family Fam Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.