qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code
@ 2014-12-11 13:52 Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted Paolo Bonzini
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-11 13:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, ming.lei, pl, stefanha

This series rewrites the io_submit code to avoid having to
synchronize the iocbs in two places (qemu_laiocb and LaioQueue).
Instead, the queue of pending I/O requests is kept in a list.

This fixes the problems with io_submit doing partial submissions
or failing with EAGAIN, without complicating the code.  The bug is
shown by "qemu-img bench -n -t none -s 65536 -d 1024 -c 100000"
pretty much instantly.

Paolo

v1->v2: add patch 4, and fix QSIMPLEQ_SPLIT_AFTER

Paolo Bonzini (5):
  linux-aio: queue requests that cannot be submitted
  linux-aio: track whether the queue is blocked
  linux-aio: rename LaioQueue idx field to "n"
  linux-aio: drop return code from laio_io_unplug and ioq_submit
  linux-aio: simplify removal of completed iocbs from the list

 block/linux-aio.c    | 99 ++++++++++++++++++++++++----------------------------
 block/raw-aio.h      |  2 +-
 include/qemu/queue.h | 11 ++++++
 3 files changed, 58 insertions(+), 54 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
@ 2014-12-11 13:52 ` Paolo Bonzini
  2014-12-16 11:07   ` Kevin Wolf
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 2/5] linux-aio: track whether the queue is blocked Paolo Bonzini
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-11 13:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, ming.lei, pl, stefanha

Keep a queue of requests that were not submitted; pass them to
the kernel when a completion is reported, unless the queue is
plugged.

The array of iocbs is rebuilt every time from scratch.  This
avoids keeping the iocbs array and list synchronized.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/linux-aio.c | 75 ++++++++++++++++++++++++-------------------------------
 1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index d92513b..b6fbfd8 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -35,14 +35,13 @@ struct qemu_laiocb {
     size_t nbytes;
     QEMUIOVector *qiov;
     bool is_read;
-    QLIST_ENTRY(qemu_laiocb) node;
+    QSIMPLEQ_ENTRY(qemu_laiocb) next;
 };
 
 typedef struct {
-    struct iocb *iocbs[MAX_QUEUED_IO];
     int plugged;
-    unsigned int size;
     unsigned int idx;
+    QSIMPLEQ_HEAD(, qemu_laiocb) pending;
 } LaioQueue;
 
 struct qemu_laio_state {
@@ -59,6 +58,8 @@ struct qemu_laio_state {
     int event_max;
 };
 
+static int ioq_submit(struct qemu_laio_state *s);
+
 static inline ssize_t io_event_ret(struct io_event *ev)
 {
     return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
@@ -135,6 +136,10 @@ static void qemu_laio_completion_bh(void *opaque)
 
         qemu_laio_process_completion(s, laiocb);
     }
+
+    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+        ioq_submit(s);
+    }
 }
 
 static void qemu_laio_completion_cb(EventNotifier *e)
@@ -172,52 +177,40 @@ static const AIOCBInfo laio_aiocb_info = {
 
 static void ioq_init(LaioQueue *io_q)
 {
-    io_q->size = MAX_QUEUED_IO;
-    io_q->idx = 0;
+    QSIMPLEQ_INIT(&io_q->pending);
     io_q->plugged = 0;
+    io_q->idx = 0;
 }
 
 static int ioq_submit(struct qemu_laio_state *s)
 {
-    int ret, i = 0;
-    int len = s->io_q.idx;
-
-    do {
-        ret = io_submit(s->ctx, len, s->io_q.iocbs);
-    } while (i++ < 3 && ret == -EAGAIN);
+    int ret, i;
+    int len = 0;
+    struct qemu_laiocb *aiocb;
+    struct iocb *iocbs[MAX_QUEUED_IO];
 
-    /* empty io queue */
-    s->io_q.idx = 0;
+    QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
+        iocbs[len++] = &aiocb->iocb;
+        if (len == MAX_QUEUED_IO) {
+            break;
+        }
+    }
 
+    ret = io_submit(s->ctx, len, iocbs);
+    if (ret == -EAGAIN) {
+        ret = 0;
+    }
     if (ret < 0) {
-        i = 0;
-    } else {
-        i = ret;
+        abort();
     }
 
-    for (; i < len; i++) {
-        struct qemu_laiocb *laiocb =
-            container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
-
-        laiocb->ret = (ret < 0) ? ret : -EIO;
-        qemu_laio_process_completion(s, laiocb);
+    for (i = 0; i < ret; i++) {
+        s->io_q.idx--;
+        QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
     }
     return ret;
 }
 
-static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
-{
-    unsigned int idx = s->io_q.idx;
-
-    s->io_q.iocbs[idx++] = iocb;
-    s->io_q.idx = idx;
-
-    /* submit immediately if queue is full */
-    if (idx == s->io_q.size) {
-        ioq_submit(s);
-    }
-}
-
 void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
 {
     struct qemu_laio_state *s = aio_ctx;
@@ -236,7 +229,7 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
         return 0;
     }
 
-    if (s->io_q.idx > 0) {
+    if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ret = ioq_submit(s);
     }
 
@@ -276,12 +269,10 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
     }
     io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
 
-    if (!s->io_q.plugged) {
-        if (io_submit(s->ctx, 1, &iocbs) < 0) {
-            goto out_free_aiocb;
-        }
-    } else {
-        ioq_enqueue(s, iocbs);
+    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
+    s->io_q.idx++;
+    if (s->io_q.idx == (s->io_q.plugged ? MAX_QUEUED_IO : 1)) {
+        ioq_submit(s);
     }
     return &laiocb->common;
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 2/5] linux-aio: track whether the queue is blocked
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted Paolo Bonzini
@ 2014-12-11 13:52 ` Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 3/5] linux-aio: rename LaioQueue idx field to "n" Paolo Bonzini
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-11 13:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, ming.lei, pl, stefanha

Avoid that unplug submits requests when io_submit reported that it
couldn't accept more; at the same time, try more io_submit calls if it
could handle the whole set of requests that were passed, so that the
"blocked" flag is reset as soon as possible.

After the previous patch, laio_submit already tried to avoid submitting
requests to a blocked queue, by comparing s->io_q.idx with "==" instead
of the more natural ">=".  Switch to the simpler expression now that we
have the "blocked" flag.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/linux-aio.c | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index b6fbfd8..b870942 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -41,6 +41,7 @@ struct qemu_laiocb {
 typedef struct {
     int plugged;
     unsigned int idx;
+    bool blocked;
     QSIMPLEQ_HEAD(, qemu_laiocb) pending;
 } LaioQueue;
 
@@ -180,34 +181,39 @@ static void ioq_init(LaioQueue *io_q)
     QSIMPLEQ_INIT(&io_q->pending);
     io_q->plugged = 0;
     io_q->idx = 0;
+    io_q->blocked = false;
 }
 
 static int ioq_submit(struct qemu_laio_state *s)
 {
-    int ret, i;
-    int len = 0;
+    int ret, i, len;
     struct qemu_laiocb *aiocb;
     struct iocb *iocbs[MAX_QUEUED_IO];
 
-    QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
-        iocbs[len++] = &aiocb->iocb;
-        if (len == MAX_QUEUED_IO) {
-            break;
+    do {
+        len = 0;
+        QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
+            iocbs[len++] = &aiocb->iocb;
+            if (len == MAX_QUEUED_IO) {
+                break;
+            }
         }
-    }
 
-    ret = io_submit(s->ctx, len, iocbs);
-    if (ret == -EAGAIN) {
-        ret = 0;
-    }
-    if (ret < 0) {
-        abort();
-    }
+        ret = io_submit(s->ctx, len, iocbs);
+        if (ret == -EAGAIN) {
+            ret = 0;
+        }
+        if (ret < 0) {
+            abort();
+        }
+
+        for (i = 0; i < ret; i++) {
+            s->io_q.idx--;
+            QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
+        }
+    } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
+    s->io_q.blocked = (s->io_q.idx > 0);
 
-    for (i = 0; i < ret; i++) {
-        s->io_q.idx--;
-        QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
-    }
     return ret;
 }
 
@@ -229,7 +235,7 @@ int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
         return 0;
     }
 
-    if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ret = ioq_submit(s);
     }
 
@@ -271,7 +277,8 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
 
     QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
     s->io_q.idx++;
-    if (s->io_q.idx == (s->io_q.plugged ? MAX_QUEUED_IO : 1)) {
+    if (!s->io_q.blocked &&
+        (!s->io_q.plugged || s->io_q.idx >= MAX_QUEUED_IO)) {
         ioq_submit(s);
     }
     return &laiocb->common;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 3/5] linux-aio: rename LaioQueue idx field to "n"
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 2/5] linux-aio: track whether the queue is blocked Paolo Bonzini
@ 2014-12-11 13:52 ` Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 4/5] linux-aio: drop return code from laio_io_unplug and ioq_submit Paolo Bonzini
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-11 13:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, ming.lei, pl, stefanha

It does not identify an index in an array anymore.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/linux-aio.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index b870942..b223d9e 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -40,7 +40,7 @@ struct qemu_laiocb {
 
 typedef struct {
     int plugged;
-    unsigned int idx;
+    unsigned int n;
     bool blocked;
     QSIMPLEQ_HEAD(, qemu_laiocb) pending;
 } LaioQueue;
@@ -180,7 +180,7 @@ static void ioq_init(LaioQueue *io_q)
 {
     QSIMPLEQ_INIT(&io_q->pending);
     io_q->plugged = 0;
-    io_q->idx = 0;
+    io_q->n = 0;
     io_q->blocked = false;
 }
 
@@ -208,11 +208,11 @@ static int ioq_submit(struct qemu_laio_state *s)
         }
 
         for (i = 0; i < ret; i++) {
-            s->io_q.idx--;
+            s->io_q.n--;
             QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
         }
     } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
-    s->io_q.blocked = (s->io_q.idx > 0);
+    s->io_q.blocked = (s->io_q.n > 0);
 
     return ret;
 }
@@ -276,9 +276,9 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
     io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
 
     QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
-    s->io_q.idx++;
+    s->io_q.n++;
     if (!s->io_q.blocked &&
-        (!s->io_q.plugged || s->io_q.idx >= MAX_QUEUED_IO)) {
+        (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
         ioq_submit(s);
     }
     return &laiocb->common;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 4/5] linux-aio: drop return code from laio_io_unplug and ioq_submit
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
                   ` (2 preceding siblings ...)
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 3/5] linux-aio: rename LaioQueue idx field to "n" Paolo Bonzini
@ 2014-12-11 13:52 ` Paolo Bonzini
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 5/5] linux-aio: simplify removal of completed iocbs from the list Paolo Bonzini
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-11 13:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, ming.lei, pl, stefanha

These are unused.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/linux-aio.c | 15 +++++----------
 block/raw-aio.h   |  2 +-
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index b223d9e..8474378 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -59,7 +59,7 @@ struct qemu_laio_state {
     int event_max;
 };
 
-static int ioq_submit(struct qemu_laio_state *s);
+static void ioq_submit(struct qemu_laio_state *s);
 
 static inline ssize_t io_event_ret(struct io_event *ev)
 {
@@ -184,7 +184,7 @@ static void ioq_init(LaioQueue *io_q)
     io_q->blocked = false;
 }
 
-static int ioq_submit(struct qemu_laio_state *s)
+static void ioq_submit(struct qemu_laio_state *s)
 {
     int ret, i, len;
     struct qemu_laiocb *aiocb;
@@ -213,8 +213,6 @@ static int ioq_submit(struct qemu_laio_state *s)
         }
     } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
     s->io_q.blocked = (s->io_q.n > 0);
-
-    return ret;
 }
 
 void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
@@ -224,22 +222,19 @@ void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
     s->io_q.plugged++;
 }
 
-int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
 {
     struct qemu_laio_state *s = aio_ctx;
-    int ret = 0;
 
     assert(s->io_q.plugged > 0 || !unplug);
 
     if (unplug && --s->io_q.plugged > 0) {
-        return 0;
+        return;
     }
 
     if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ret = ioq_submit(s);
+        ioq_submit(s);
     }
-
-    return ret;
 }
 
 BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 80681ce..31d791f 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -41,7 +41,7 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
 void laio_detach_aio_context(void *s, AioContext *old_context);
 void laio_attach_aio_context(void *s, AioContext *new_context);
 void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
+void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
 #endif
 
 #ifdef _WIN32
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v2 5/5] linux-aio: simplify removal of completed iocbs from the list
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
                   ` (3 preceding siblings ...)
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 4/5] linux-aio: drop return code from laio_io_unplug and ioq_submit Paolo Bonzini
@ 2014-12-11 13:52 ` Paolo Bonzini
  2014-12-11 14:49 ` [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Kevin Wolf
  2014-12-12 17:07 ` Stefan Hajnoczi
  6 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-11 13:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: kwolf, ming.lei, pl, stefanha

There is no need to do another O(n) pass on the list; the iocb to
split the list at is already available through the array we passed to
io_submit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/linux-aio.c    | 12 ++++++------
 include/qemu/queue.h | 11 +++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 8474378..c991443 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -186,9 +186,10 @@ static void ioq_init(LaioQueue *io_q)
 
 static void ioq_submit(struct qemu_laio_state *s)
 {
-    int ret, i, len;
+    int ret, len;
     struct qemu_laiocb *aiocb;
     struct iocb *iocbs[MAX_QUEUED_IO];
+    QSIMPLEQ_HEAD(, qemu_laiocb) completed;
 
     do {
         len = 0;
@@ -201,16 +202,15 @@ static void ioq_submit(struct qemu_laio_state *s)
 
         ret = io_submit(s->ctx, len, iocbs);
         if (ret == -EAGAIN) {
-            ret = 0;
+            break;
         }
         if (ret < 0) {
             abort();
         }
 
-        for (i = 0; i < ret; i++) {
-            s->io_q.n--;
-            QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
-        }
+        s->io_q.n -= ret;
+        aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
+        QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
     } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
     s->io_q.blocked = (s->io_q.n > 0);
 }
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index 0dedd29..a98eb3a 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -279,6 +279,17 @@ struct {                                                                \
         (head)->sqh_last = &(head)->sqh_first;                          \
 } while (/*CONSTCOND*/0)
 
+#define QSIMPLEQ_SPLIT_AFTER(head, elm, field, removed) do {            \
+    QSIMPLEQ_INIT(removed);                                             \
+    if (((removed)->sqh_first = (head)->sqh_first) != NULL) {           \
+        if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) {      \
+            (head)->sqh_last = &(head)->sqh_first;                      \
+        }                                                               \
+        (removed)->sqh_last = &(elm)->field.sqe_next;                   \
+        (elm)->field.sqe_next = NULL;                                   \
+    }                                                                   \
+} while (/*CONSTCOND*/0)
+
 #define QSIMPLEQ_REMOVE(head, elm, type, field) do {                    \
     if ((head)->sqh_first == (elm)) {                                   \
         QSIMPLEQ_REMOVE_HEAD((head), field);                            \
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
                   ` (4 preceding siblings ...)
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 5/5] linux-aio: simplify removal of completed iocbs from the list Paolo Bonzini
@ 2014-12-11 14:49 ` Kevin Wolf
  2014-12-12 17:07 ` Stefan Hajnoczi
  6 siblings, 0 replies; 15+ messages in thread
From: Kevin Wolf @ 2014-12-11 14:49 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: ming.lei, pl, qemu-devel, stefanha

Am 11.12.2014 um 14:52 hat Paolo Bonzini geschrieben:
> This series rewrites the io_submit code to avoid having to
> synchronize the iocbs in two places (qemu_laiocb and LaioQueue).
> Instead, the queue of pending I/O requests is kept in a list.
> 
> This fixes the problems with io_submit doing partial submissions
> or failing with EAGAIN, without complicating the code.  The bug is
> shown by "qemu-img bench -n -t none -s 65536 -d 1024 -c 100000"
> pretty much instantly.
> 
> Paolo
> 
> v1->v2: add patch 4, and fix QSIMPLEQ_SPLIT_AFTER

Reviewed-by: Kevin Wolf <kwolf@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code
  2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
                   ` (5 preceding siblings ...)
  2014-12-11 14:49 ` [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Kevin Wolf
@ 2014-12-12 17:07 ` Stefan Hajnoczi
  6 siblings, 0 replies; 15+ messages in thread
From: Stefan Hajnoczi @ 2014-12-12 17:07 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kwolf, ming.lei, pl, qemu-devel, stefanha

[-- Attachment #1: Type: text/plain, Size: 1226 bytes --]

On Thu, Dec 11, 2014 at 02:52:25PM +0100, Paolo Bonzini wrote:
> This series rewrites the io_submit code to avoid having to
> synchronize the iocbs in two places (qemu_laiocb and LaioQueue).
> Instead, the queue of pending I/O requests is kept in a list.
> 
> This fixes the problems with io_submit doing partial submissions
> or failing with EAGAIN, without complicating the code.  The bug is
> shown by "qemu-img bench -n -t none -s 65536 -d 1024 -c 100000"
> pretty much instantly.
> 
> Paolo
> 
> v1->v2: add patch 4, and fix QSIMPLEQ_SPLIT_AFTER
> 
> Paolo Bonzini (5):
>   linux-aio: queue requests that cannot be submitted
>   linux-aio: track whether the queue is blocked
>   linux-aio: rename LaioQueue idx field to "n"
>   linux-aio: drop return code from laio_io_unplug and ioq_submit
>   linux-aio: simplify removal of completed iocbs from the list
> 
>  block/linux-aio.c    | 99 ++++++++++++++++++++++++----------------------------
>  block/raw-aio.h      |  2 +-
>  include/qemu/queue.h | 11 ++++++
>  3 files changed, 58 insertions(+), 54 deletions(-)
> 
> -- 
> 2.1.0
> 
> 

Thanks, applied to my block-next tree:
https://github.com/stefanha/qemu/commits/block-next

Stefan

[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted Paolo Bonzini
@ 2014-12-16 11:07   ` Kevin Wolf
  2014-12-16 11:28     ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Kevin Wolf @ 2014-12-16 11:07 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: ming.lei, pl, qemu-devel, stefanha

Am 11.12.2014 um 14:52 hat Paolo Bonzini geschrieben:
> Keep a queue of requests that were not submitted; pass them to
> the kernel when a completion is reported, unless the queue is
> plugged.
> 
> The array of iocbs is rebuilt every time from scratch.  This
> avoids keeping the iocbs array and list synchronized.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Just found out that in qemu-img bench, this patch seems to cost about
5-8% for me.

An optimisation for the unplugged case would probably be easy, but that
would be cheating, as the devices that we're really interested in always
plug the queue (perhaps I should extend qemu-img bench to do that
optionally, too).

Anything clever that we can do about this? Or will we just have to live
with the fact that sending a single request is now slower than it used
to be before bdrv_plug?

Kevin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-16 11:07   ` Kevin Wolf
@ 2014-12-16 11:28     ` Paolo Bonzini
  2014-12-16 13:10       ` Kevin Wolf
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-16 11:28 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: ming.lei, pl, qemu-devel, stefanha



On 16/12/2014 12:07, Kevin Wolf wrote:
> Am 11.12.2014 um 14:52 hat Paolo Bonzini geschrieben:
>> Keep a queue of requests that were not submitted; pass them to
>> the kernel when a completion is reported, unless the queue is
>> plugged.
>>
>> The array of iocbs is rebuilt every time from scratch.  This
>> avoids keeping the iocbs array and list synchronized.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Just found out that in qemu-img bench, this patch seems to cost about
> 5-8% for me.

What execution?  Queue depth=1?

For me it was noisy but I couldn't see a pessimization, and this patch
should only add a handful of pointer accesses.  Also, does perf point at
a culprit, and does patch 5 restore some of the performance?

Weird guess: TLB misses from accessing iocbs[0] on the stack (using a
different coroutine stack every time)?  Perf would report that as a
large cost of this line:

        iocbs[len++] = &aiocb->iocb;

> An optimisation for the unplugged case would probably be easy, but that
> would be cheating, as the devices that we're really interested in always
> plug the queue (perhaps I should extend qemu-img bench to do that
> optionally, too).

If you want to do that, you also have to move the "refilling" of the
queue to a bottom half.  If you refill from the completion routine, you
always have a single empty slot and plugging doesn't do anything.

Paolo

> Anything clever that we can do about this? Or will we just have to live
> with the fact that sending a single request is now slower than it used
> to be before bdrv_plug?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-16 11:28     ` Paolo Bonzini
@ 2014-12-16 13:10       ` Kevin Wolf
  2014-12-16 18:28         ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Kevin Wolf @ 2014-12-16 13:10 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: ming.lei, pl, qemu-devel, stefanha

Am 16.12.2014 um 12:28 hat Paolo Bonzini geschrieben:
> 
> 
> On 16/12/2014 12:07, Kevin Wolf wrote:
> > Am 11.12.2014 um 14:52 hat Paolo Bonzini geschrieben:
> >> Keep a queue of requests that were not submitted; pass them to
> >> the kernel when a completion is reported, unless the queue is
> >> plugged.
> >>
> >> The array of iocbs is rebuilt every time from scratch.  This
> >> avoids keeping the iocbs array and list synchronized.
> >>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > Just found out that in qemu-img bench, this patch seems to cost about
> > 5-8% for me.
> 
> What execution?  Queue depth=1?

My usual one:

$ ./qemu-img bench -t none -c 10000000 -n /dev/loop0
Sending 10000000 requests, 4096 bytes each, 64 in parallel

> For me it was noisy but I couldn't see a pessimization, and this patch
> should only add a handful of pointer accesses.  Also, does perf point at
> a culprit, and does patch 5 restore some of the performance?
> 
> Weird guess: TLB misses from accessing iocbs[0] on the stack (using a
> different coroutine stack every time)?  Perf would report that as a
> large cost of this line:
> 
>         iocbs[len++] = &aiocb->iocb;

No, I can't seem to read much from the perf results. The cost seems to
be spread fairly evenly across ioq_submit(), with the exception of the
instruction after the call to io_submit(). Not sure why the next
instruction always takes so much time (independent of what it is), but
it has been this way before.

I was surprised to see a "rep stos" scoring at 10% in laio_submit(),
apparently io_prep_*() do a memset on the iocb. Not sure if that is
necessary, but again, it has always been this way.

Patch 5 doesn't restore the performance, which makes sense, as qemu-img
only sends single requests.

Kevin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-16 13:10       ` Kevin Wolf
@ 2014-12-16 18:28         ` Paolo Bonzini
  2014-12-16 20:26           ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-16 18:28 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: ming.lei, pl, qemu-devel, stefanha



On 16/12/2014 14:10, Kevin Wolf wrote:
> Am 16.12.2014 um 12:28 hat Paolo Bonzini geschrieben:
>>
>>
>> On 16/12/2014 12:07, Kevin Wolf wrote:
>>> Am 11.12.2014 um 14:52 hat Paolo Bonzini geschrieben:
>>>> Keep a queue of requests that were not submitted; pass them to
>>>> the kernel when a completion is reported, unless the queue is
>>>> plugged.
>>>>
>>>> The array of iocbs is rebuilt every time from scratch.  This
>>>> avoids keeping the iocbs array and list synchronized.
>>>>
>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>>
>>> Just found out that in qemu-img bench, this patch seems to cost about
>>> 5-8% for me.
>>
>> What execution?  Queue depth=1?
> 
> My usual one:
> 
> $ ./qemu-img bench -t none -c 10000000 -n /dev/loop0
> Sending 10000000 requests, 4096 bytes each, 64 in parallel

I could reproduce this very well on a random OS image that I had around.
 This is raw over XFS over dm-crypt, and the image is about 75% sparse
(8.2G used over 35G).  I only get 1-2%, but still it's visible.

However I can hardly reproduce it when using a partition directly:

         old    new
mean     9.9565 9.9636      (+0.07%)
stddev   0.0405 0.0537
min      9.871  9.867
median   9.973  9.971
max      10.01  10.053
count    20     20

I haven't tried removing layers (e.g. fully-allocated XFS image without
dm-crypt).

Paolo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-16 18:28         ` Paolo Bonzini
@ 2014-12-16 20:26           ` Paolo Bonzini
  2014-12-17 12:34             ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-16 20:26 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: ming.lei, pl, qemu-devel, stefanha


> I could reproduce this very well on a random OS image that I had around.
>  This is raw over XFS over dm-crypt, and the image is about 75% sparse
> (8.2G used over 35G).  I only get 1-2%, but still it's visible.
> 
> However I can hardly reproduce it when using a partition directly:
> 
>          old    new
> mean     9.9565 9.9636      (+0.07%)
> stddev   0.0405 0.0537
> min      9.871  9.867
> median   9.973  9.971
> max      10.01  10.053
> count    20     20
> 
> I haven't tried removing layers (e.g. fully-allocated XFS image without
> dm-crypt).

Could not reproduce it with a fully-allocated XFS image, on the contrary
the patched QEMU is a bit faster:

        old      new
mean    14.83325 14.82660
stddev  0.016930 0.010328
min     14.819   14.818
max     14.854   14.883
median  14.8225  14.8255
count   20       20

Paolo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-16 20:26           ` Paolo Bonzini
@ 2014-12-17 12:34             ` Paolo Bonzini
  2014-12-17 15:03               ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-17 12:34 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: ming.lei, pl, qemu-devel, stefanha



On 16/12/2014 21:26, Paolo Bonzini wrote:
> 
>> I could reproduce this very well on a random OS image that I had around.
>>  This is raw over XFS over dm-crypt, and the image is about 75% sparse
>> (8.2G used over 35G).  I only get 1-2%, but still it's visible.
>>
>> However I can hardly reproduce it when using a partition directly:
>>
>>          old    new
>> mean     9.9565 9.9636      (+0.07%)
>> stddev   0.0405 0.0537
>> min      9.871  9.867
>> median   9.973  9.971
>> max      10.01  10.053
>> count    20     20
>>
>> I haven't tried removing layers (e.g. fully-allocated XFS image without
>> dm-crypt).
> 
> Could not reproduce it with a fully-allocated XFS image, on the contrary
> the patched QEMU is a bit faster:
> 
>         old      new
> mean    14.83325 14.82660
> stddev  0.016930 0.010328
> min     14.819   14.818
> max     14.854   14.883
> median  14.8225  14.8255
> count   20       20

Same for 50% sparse XFS image, patched QEMU a bit faster:

        old     new
Mean	8.31285 8.30625
stddev	0.03690 0.03333
min     8.277   8.276
max     8.378   8.382
median	8.292   8.2905
count   20      20

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted
  2014-12-17 12:34             ` Paolo Bonzini
@ 2014-12-17 15:03               ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-12-17 15:03 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: ming.lei, pl, qemu-devel, stefanha



On 17/12/2014 13:34, Paolo Bonzini wrote:
> 
> 
> On 16/12/2014 21:26, Paolo Bonzini wrote:
>>
>>> I could reproduce this very well on a random OS image that I had around.
>>>  This is raw over XFS over dm-crypt, and the image is about 75% sparse
>>> (8.2G used over 35G).  I only get 1-2%, but still it's visible.
>>>
>>> However I can hardly reproduce it when using a partition directly:
>>>
>>>          old    new
>>> mean     9.9565 9.9636      (+0.07%)
>>> stddev   0.0405 0.0537
>>> min      9.871  9.867
>>> median   9.973  9.971
>>> max      10.01  10.053
>>> count    20     20
>>>
>>> I haven't tried removing layers (e.g. fully-allocated XFS image without
>>> dm-crypt).
>>
>> Could not reproduce it with a fully-allocated XFS image, on the contrary
>> the patched QEMU is a bit faster:
>>
>>         old      new
>> mean    14.83325 14.82660
>> stddev  0.016930 0.010328
>> min     14.819   14.818
>> max     14.854   14.883
>> median  14.8225  14.8255
>> count   20       20
> 
> Same for 50% sparse XFS image, patched QEMU a bit faster:
> 
>         old     new
> Mean	8.31285 8.30625
> stddev	0.03690 0.03333
> min     8.277   8.276
> max     8.378   8.382
> median	8.292   8.2905
> count   20      20

Today I cannot reproduce it even on the original testcase:

       old     new
mean   9.64175 9.60935
stddev 0.15211 0.11117
min    9.445   9.454
max    10.102  9.835
median 9.64    9.6205
count  20      20

Some notes:

1) do you have Fam's io_get_events patch?  I don't, and the tests
probably should be done with it.

2) "qemu-img bench" probably could also be changed to use AioContext
directly instead of going through GMainLoop.  That said, there is some
low-hanging fruit for GMainLoop that I'll shortly send a patch for.

3) bench_cb does this:

        b->sector += b->bufsize;

Should it shift right by >> BDRV_SECTOR_BITS?  Or is it intended to do
the equivalent of a random read benchmark?

Paolo

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2014-12-17 15:03 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-11 13:52 [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Paolo Bonzini
2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 1/5] linux-aio: queue requests that cannot be submitted Paolo Bonzini
2014-12-16 11:07   ` Kevin Wolf
2014-12-16 11:28     ` Paolo Bonzini
2014-12-16 13:10       ` Kevin Wolf
2014-12-16 18:28         ` Paolo Bonzini
2014-12-16 20:26           ` Paolo Bonzini
2014-12-17 12:34             ` Paolo Bonzini
2014-12-17 15:03               ` Paolo Bonzini
2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 2/5] linux-aio: track whether the queue is blocked Paolo Bonzini
2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 3/5] linux-aio: rename LaioQueue idx field to "n" Paolo Bonzini
2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 4/5] linux-aio: drop return code from laio_io_unplug and ioq_submit Paolo Bonzini
2014-12-11 13:52 ` [Qemu-devel] [PATCH v2 5/5] linux-aio: simplify removal of completed iocbs from the list Paolo Bonzini
2014-12-11 14:49 ` [Qemu-devel] [PATCH v2 0/5] linux-aio: rewrite and simplify queuing code Kevin Wolf
2014-12-12 17:07 ` Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).