From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Bart Van Assche <bvanassche@acm.org>,
Stefan Hajnoczi <stefanha@redhat.com>,
Roman Penyaev <rpenyaev@suse.de>
Subject: [PATCH liburing 2/2] Fix the use of memory barriers
Date: Mon, 1 Jul 2019 14:42:32 -0700 [thread overview]
Message-ID: <20190701214232.29338-3-bvanassche@acm.org> (raw)
In-Reply-To: <20190701214232.29338-1-bvanassche@acm.org>
Introduce the smp_load_acquire() and smp_store_release() macros. Fix
synchronization in io_uring_cq_advance() and __io_uring_get_cqe().
Remove a superfluous local variable, if-test and write barrier from
__io_uring_submit(). Remove a superfluous barrier from
test/io_uring_enter.c.
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Roman Penyaev <rpenyaev@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
man/io_uring_setup.2 | 6 ++-
src/barrier.h | 87 +++++++++++++++++++++++++++++++++++++++++--
src/liburing.h | 15 +++-----
src/queue.c | 30 ++++-----------
test/io_uring_enter.c | 8 ++--
5 files changed, 107 insertions(+), 39 deletions(-)
diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index ebaee2d43f35..9ab01434c18d 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -97,7 +97,11 @@ call with the following code sequence:
.in +4n
.EX
-read_barrier();
+/*
+ * Ensure that the wakeup flag is read after the tail pointer has been
+ * written.
+ */
+smp_mb();
if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP)
io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP);
.EE
diff --git a/src/barrier.h b/src/barrier.h
index ef00f6722ba9..e1a407fccde2 100644
--- a/src/barrier.h
+++ b/src/barrier.h
@@ -1,16 +1,95 @@
#ifndef LIBURING_BARRIER_H
#define LIBURING_BARRIER_H
+/*
+From the kernel documentation file refcount-vs-atomic.rst:
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+:c:func:`smp_acquire__after_ctrl_dep`.
+*/
+
+/* From tools/include/linux/compiler.h */
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+/* From tools/virtio/linux/compiler.h */
+#define WRITE_ONCE(var, val) \
+ (*((volatile typeof(val) *)(&(var))) = (val))
+#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
+
+
#if defined(__x86_64) || defined(__i386__)
-#define read_barrier() __asm__ __volatile__("":::"memory")
-#define write_barrier() __asm__ __volatile__("":::"memory")
+/* From tools/arch/x86/include/asm/barrier.h */
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb() asm volatile("mfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#endif
+
+#if defined(__x86_64__)
+#define smp_store_release(p, v) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(p), (v)); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*(p)); \
+ barrier(); \
+ ___p1; \
+})
+#endif /* defined(__x86_64__) */
#else
/*
* Add arch appropriate definitions. Be safe and use full barriers for
* archs we don't have support for.
*/
-#define read_barrier() __sync_synchronize()
-#define write_barrier() __sync_synchronize()
+#define smp_rmb() __sync_synchronize()
+#define smp_wmb() __sync_synchronize()
+#endif
+
+/* From tools/include/asm/barrier.h */
+
+#ifndef smp_store_release
+# define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p1; \
+})
#endif
#endif
diff --git a/src/liburing.h b/src/liburing.h
index d3fcd1524540..a350a013ef8a 100644
--- a/src/liburing.h
+++ b/src/liburing.h
@@ -88,11 +88,10 @@ extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
extern int io_uring_unregister_eventfd(struct io_uring *ring);
#define io_uring_for_each_cqe(ring, head, cqe) \
+ /* smp_load_acquire() enforces the order of tail and CQE reads. */ \
for (head = *(ring)->cq.khead; \
- /* See read_barrier() explanation in __io_uring_get_cqe() */ \
- ({read_barrier(); \
- cqe = (head != *(ring)->cq.ktail ? \
- &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL);}); \
+ (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \
+ &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
head++) \
@@ -105,13 +104,11 @@ static inline void io_uring_cq_advance(struct io_uring *ring,
if (nr) {
struct io_uring_cq *cq = &ring->cq;
- (*cq->khead) += nr;
-
/*
- * Ensure that the kernel sees our new head, the kernel has
- * the matching read barrier.
+ * Ensure that the kernel only sees the new value of the head
+ * index after the CQEs have been read.
*/
- write_barrier();
+ smp_store_release(cq->khead, *cq->khead + nr);
}
}
diff --git a/src/queue.c b/src/queue.c
index bec363fc0ebf..72b22935c2ef 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -77,7 +77,7 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
- unsigned ktail, ktail_next, submitted, to_submit;
+ unsigned ktail, submitted, to_submit;
unsigned flags;
int ret;
@@ -88,15 +88,11 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
* Fill in sqes that we have queued up, adding them to the kernel ring
*/
submitted = 0;
- ktail = ktail_next = *sq->ktail;
+ ktail = *sq->ktail;
to_submit = sq->sqe_tail - sq->sqe_head;
while (to_submit--) {
- ktail_next++;
- read_barrier();
-
sq->array[ktail & mask] = sq->sqe_head & mask;
- ktail = ktail_next;
-
+ ktail++;
sq->sqe_head++;
submitted++;
}
@@ -104,21 +100,11 @@ static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
if (!submitted)
return 0;
- if (*sq->ktail != ktail) {
- /*
- * First write barrier ensures that the SQE stores are updated
- * with the tail update. This is needed so that the kernel
- * will never see a tail update without the preceeding sQE
- * stores being done.
- */
- write_barrier();
- *sq->ktail = ktail;
- /*
- * The kernel has the matching read barrier for reading the
- * SQ tail.
- */
- write_barrier();
- }
+ /*
+ * Ensure that the kernel sees the SQE updates before it sees the tail
+ * update.
+ */
+ smp_store_release(sq->ktail, ktail);
flags = 0;
if (wait_nr || sq_ring_needs_enter(ring, &flags)) {
diff --git a/test/io_uring_enter.c b/test/io_uring_enter.c
index d6e407e621ff..b25afd5790f3 100644
--- a/test/io_uring_enter.c
+++ b/test/io_uring_enter.c
@@ -262,9 +262,11 @@ main(int argc, char **argv)
ktail = *sq->ktail;
sq->array[ktail & mask] = index;
++ktail;
- write_barrier();
- *sq->ktail = ktail;
- write_barrier();
+ /*
+ * Ensure that the kernel sees the SQE update before it sees the tail
+ * update.
+ */
+ smp_store_release(sq->ktail, ktail);
ret = io_uring_enter(ring.ring_fd, 1, 0, 0, NULL);
/* now check to see if our sqe was dropped */
--
2.22.0.410.gd8fdbe21b5-goog
next prev parent reply other threads:[~2019-07-01 21:42 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-07-01 21:42 [PATCH liburing 0/2] Memory synchronization improvements Bart Van Assche
2019-07-01 21:42 ` [PATCH liburing 1/2] __io_uring_get_cqe(): Use io_uring_for_each_cqe() Bart Van Assche
2019-07-01 21:42 ` Bart Van Assche [this message]
2019-07-02 9:07 ` [PATCH liburing 2/2] Fix the use of memory barriers Roman Penyaev
2019-07-02 16:17 ` Bart Van Assche
2019-07-02 18:40 ` Roman Penyaev
2019-07-02 20:31 ` Bart Van Assche
2019-07-03 9:49 ` Roman Penyaev
2019-07-03 20:49 ` Bart Van Assche
2019-07-02 16:18 ` [PATCH liburing 0/2] Memory synchronization improvements Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190701214232.29338-3-bvanassche@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=linux-block@vger.kernel.org \
--cc=rpenyaev@suse.de \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).