From: Peter Zijlstra <peterz@infradead.org>
To: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>, John Stultz <john.stultz@linaro.org>,
Ingo Molnar <mingo@redhat.com>,
lkml <linux-kernel@vger.kernel.org>,
Dmitry Shmidt <dimitrysh@google.com>,
Rom Lemarchand <romlem@google.com>,
Colin Cross <ccross@google.com>, Todd Kjos <tkjos@google.com>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Subject: Re: Severe performance regression w/ 4.4+ on Android due to cgroup locking changes
Date: Thu, 14 Jul 2016 21:35:37 +0200 [thread overview]
Message-ID: <20160714193537.GQ30927@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <20160714183609.GJ30154@twins.programming.kicks-ass.net>
On Thu, Jul 14, 2016 at 08:36:09PM +0200, Peter Zijlstra wrote:
> On Thu, Jul 14, 2016 at 08:09:52PM +0200, Oleg Nesterov wrote:
> > On 07/14, Peter Zijlstra wrote:
> > >
> > > The below is a compile tested only first draft so far. I'll go give it
> > > some runtime next.
> >
> > So I will wait for the new version, but at first glance this matches the
> > code I already reviewed in the past (at least, tried hard to review ;)
> > and it looks correct.
> >
> > Just a couple of almost cosmetic nits, feel free to ignore.
>
> Drat, I just mailed out the patches.. I can do a second version later.
How about so on top of what I sent?
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -13,11 +13,10 @@ struct percpu_rw_semaphore {
unsigned int __percpu *read_count;
struct rw_semaphore rw_sem;
wait_queue_head_t writer;
- int state;
+ int readers_block;
};
-extern void __percpu_down_read(struct percpu_rw_semaphore *);
-extern int __percpu_down_read_trylock(struct percpu_rw_semaphore *);
+extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
extern void __percpu_up_read(struct percpu_rw_semaphore *);
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
@@ -37,7 +36,7 @@ static inline void percpu_down_read(stru
*/
__this_cpu_inc(*sem->read_count);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
- __percpu_down_read(sem); /* Unconditional memory barrier */
+ __percpu_down_read(sem, false); /* Unconditional memory barrier */
preempt_enable();
/*
* The barrier() from preempt_enable() prevents the compiler from
@@ -55,7 +54,7 @@ static inline int percpu_down_read_trylo
*/
__this_cpu_inc(*sem->read_count);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
- ret = __percpu_down_read_trylock(sem);
+ ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
preempt_enable();
/*
* The barrier() from preempt_enable() prevents the compiler from
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -8,8 +8,6 @@
#include <linux/sched.h>
#include <linux/errno.h>
-enum { readers_slow, readers_block };
-
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
{
@@ -21,7 +19,7 @@ int __percpu_init_rwsem(struct percpu_rw
rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
init_waitqueue_head(&sem->writer);
- sem->state = readers_slow;
+ sem->readers_block = 0;
return 0;
}
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
@@ -41,21 +39,21 @@ void percpu_free_rwsem(struct percpu_rw_
}
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
-void __percpu_down_read(struct percpu_rw_semaphore *sem)
+int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
{
/*
* Due to having preemption disabled the decrement happens on
* the same CPU as the increment, avoiding the
* increment-on-one-CPU-and-decrement-on-another problem.
*
- * And yes, if the reader misses the writer's assignment of
- * readers_block to sem->state, then the writer is
- * guaranteed to see the reader's increment. Conversely, any
- * readers that increment their sem->read_count after the
- * writer looks are guaranteed to see the readers_block value,
+ * If the reader misses the writer's assignment of readers_block, then
+ * the writer is guaranteed to see the reader's increment.
+ *
+ * Conversely, any readers that increment their sem->read_count after
+ * the writer looks are guaranteed to see the readers_block value,
* which in turn means that they are guaranteed to immediately
- * decrement their sem->read_count, so that it doesn't matter
- * that the writer missed them.
+ * decrement their sem->read_count, so that it doesn't matter that the
+ * writer missed them.
*/
smp_mb(); /* A matches D */
@@ -64,8 +62,8 @@ void __percpu_down_read(struct percpu_rw
* If !readers_block the critical section starts here, matched by the
* release in percpu_up_write().
*/
- if (likely(smp_load_acquire(&sem->state) != readers_block))
- return;
+ if (likely(!smp_load_acquire(&sem->readers_block)))
+ return 1;
/*
* Per the above comment; we still have preemption disabled and
@@ -73,6 +71,9 @@ void __percpu_down_read(struct percpu_rw
*/
__percpu_up_read(sem);
+ if (try)
+ return 0;
+
/*
* We either call schedule() in the wait, or we'll fall through
* and reschedule on the preempt_enable() in percpu_down_read().
@@ -87,21 +88,10 @@ void __percpu_down_read(struct percpu_rw
__up_read(&sem->rw_sem);
preempt_disable();
+ return 1;
}
EXPORT_SYMBOL_GPL(__percpu_down_read);
-int __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
-{
- smp_mb(); /* A matches D */
-
- if (likely(smp_load_acquire(&sem->state) != readers_block))
- return 1;
-
- __percpu_up_read(sem);
- return 0;
-}
-EXPORT_SYMBOL_GPL(__percpu_down_read_trylock);
-
void __percpu_up_read(struct percpu_rw_semaphore *sem)
{
smp_mb(); /* B matches C */
@@ -150,23 +140,23 @@ static bool readers_active_check(struct
void percpu_down_write(struct percpu_rw_semaphore *sem)
{
- down_write(&sem->rw_sem);
-
/* Notify readers to take the slow path. */
rcu_sync_enter(&sem->rss);
+ down_write(&sem->rw_sem);
+
/*
* Notify new readers to block; up until now, and thus throughout the
* longish rcu_sync_enter() above, new readers could still come in.
*/
- WRITE_ONCE(sem->state, readers_block);
+ WRITE_ONCE(sem->readers_block, 1);
smp_mb(); /* D matches A */
/*
- * If they don't see our writer of readers_block to sem->state,
- * then we are guaranteed to see their sem->read_count increment, and
- * therefore will wait for them.
+ * If they don't see our writer of readers_block, then we are
+ * guaranteed to see their sem->read_count increment, and therefore
+ * will wait for them.
*/
/* Wait for all now active readers to complete. */
@@ -186,7 +176,7 @@ void percpu_up_write(struct percpu_rw_se
* Therefore we force it through the slow path which guarantees an
* acquire and thereby guarantees the critical section's consistency.
*/
- smp_store_release(&sem->state, readers_slow);
+ smp_store_release(&sem->readers_block, 0);
/*
* Release the write lock, this will allow readers back in the game.
@@ -194,9 +184,9 @@ void percpu_up_write(struct percpu_rw_se
up_write(&sem->rw_sem);
/*
- * Once this completes (at least one RCU grace period hence) the reader
- * fast path will be available again. Safe to use outside the exclusive
- * write lock because its counting.
+ * Once this completes (at least one RCU-sched grace period hence) the
+ * reader fast path will be available again. Safe to use outside the
+ * exclusive write lock because its counting.
*/
rcu_sync_exit(&sem->rss);
}
next prev parent reply other threads:[~2016-07-14 19:35 UTC|newest]
Thread overview: 67+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-07-13 0:00 Severe performance regression w/ 4.4+ on Android due to cgroup locking changes John Stultz
2016-07-13 8:21 ` Peter Zijlstra
2016-07-13 14:42 ` Paul E. McKenney
2016-07-13 18:13 ` Dmitry Shmidt
2016-07-13 18:32 ` Paul E. McKenney
2016-07-13 18:21 ` Tejun Heo
2016-07-13 18:33 ` Tejun Heo
2016-07-13 20:13 ` John Stultz
2016-07-13 20:18 ` Tejun Heo
2016-07-13 20:26 ` Peter Zijlstra
2016-07-13 20:39 ` Tejun Heo
2016-07-13 20:51 ` Peter Zijlstra
2016-07-13 21:01 ` Tejun Heo
2016-07-13 21:03 ` Paul E. McKenney
2016-07-13 21:05 ` Tejun Heo
2016-07-13 21:18 ` Paul E. McKenney
2016-07-13 21:42 ` Paul E. McKenney
2016-07-13 21:46 ` John Stultz
2016-07-13 22:17 ` Paul E. McKenney
2016-07-13 22:39 ` John Stultz
2016-07-13 23:02 ` Paul E. McKenney
2016-07-13 23:04 ` Paul E. McKenney
2016-07-14 11:35 ` Tejun Heo
2016-07-14 12:04 ` Peter Zijlstra
2016-07-14 12:08 ` Tejun Heo
2016-07-14 12:20 ` Peter Zijlstra
2016-07-14 15:07 ` Tejun Heo
2016-07-14 15:24 ` Tejun Heo
2016-07-14 16:32 ` Peter Zijlstra
2016-07-14 17:34 ` Oleg Nesterov
2016-07-14 16:54 ` John Stultz
2016-07-13 22:25 ` John Stultz
2016-07-13 22:01 ` Tejun Heo
2016-07-13 22:33 ` Paul E. McKenney
2016-07-14 6:49 ` Peter Zijlstra
2016-07-14 11:20 ` Tejun Heo
2016-07-14 12:11 ` Peter Zijlstra
2016-07-14 15:14 ` Tejun Heo
2016-07-14 13:18 ` Peter Zijlstra
2016-07-14 14:14 ` Peter Zijlstra
2016-07-14 14:58 ` Oleg Nesterov
2016-07-14 16:14 ` Peter Zijlstra
2016-07-14 16:37 ` Peter Zijlstra
2016-07-14 17:05 ` Oleg Nesterov
2016-07-14 16:23 ` Paul E. McKenney
2016-07-14 16:45 ` Peter Zijlstra
2016-07-14 17:15 ` Paul E. McKenney
2016-07-14 16:43 ` John Stultz
2016-07-14 16:49 ` Peter Zijlstra
2016-07-14 17:02 ` John Stultz
2016-07-14 17:13 ` Oleg Nesterov
2016-07-14 17:30 ` John Stultz
2016-07-14 17:41 ` Oleg Nesterov
2016-07-14 17:51 ` John Stultz
2016-07-14 18:09 ` Oleg Nesterov
2016-07-14 18:36 ` Peter Zijlstra
2016-07-14 19:35 ` Peter Zijlstra [this message]
2016-07-13 20:57 ` John Stultz
2016-07-13 20:52 ` Paul E. McKenney
2016-07-13 20:57 ` Peter Zijlstra
2016-07-13 21:08 ` Paul E. McKenney
2016-07-13 21:01 ` Dmitry Shmidt
2016-07-13 21:03 ` John Stultz
2016-07-13 21:05 ` Paul E. McKenney
2016-07-13 20:31 ` Dmitry Shmidt
2016-07-13 20:44 ` Colin Cross
2016-07-13 20:54 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160714193537.GQ30927@twins.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=ccross@google.com \
--cc=dimitrysh@google.com \
--cc=john.stultz@linaro.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=oleg@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=romlem@google.com \
--cc=tj@kernel.org \
--cc=tkjos@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox