LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 02/46] percpu_rwlock: Introduce per-CPU variables for the reader and the writer
From: Srivatsa S. Bhat @ 2013-02-18 12:38 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Per-CPU rwlocks ought to give better performance than global rwlocks.
That is where the "per-CPU" component comes in. So introduce the necessary
per-CPU variables that would be necessary at the reader and the writer sides,
and add the support for dynamically initializing per-CPU rwlocks.
These per-CPU variables will be used subsequently to implement the core
algorithm behind per-CPU rwlocks.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/percpu-rwlock.h |    8 ++++++++
 lib/percpu-rwlock.c           |   12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/linux/percpu-rwlock.h b/include/linux/percpu-rwlock.h
index 0caf81f..74eaf4d 100644
--- a/include/linux/percpu-rwlock.h
+++ b/include/linux/percpu-rwlock.h
@@ -28,7 +28,13 @@
 #include <linux/lockdep.h>
 #include <linux/spinlock.h>
 
+struct rw_state {
+	unsigned long	reader_refcnt;
+	bool		writer_signal;
+};
+
 struct percpu_rwlock {
+	struct rw_state __percpu	*rw_state;
 	rwlock_t			global_rwlock;
 };
 
@@ -41,6 +47,8 @@ extern void percpu_write_unlock(struct percpu_rwlock *);
 extern int __percpu_init_rwlock(struct percpu_rwlock *,
 				const char *, struct lock_class_key *);
 
+extern void percpu_free_rwlock(struct percpu_rwlock *);
+
 #define percpu_init_rwlock(pcpu_rwlock)					\
 ({	static struct lock_class_key rwlock_key;			\
 	__percpu_init_rwlock(pcpu_rwlock, #pcpu_rwlock, &rwlock_key);	\
diff --git a/lib/percpu-rwlock.c b/lib/percpu-rwlock.c
index 111a238..f938096 100644
--- a/lib/percpu-rwlock.c
+++ b/lib/percpu-rwlock.c
@@ -31,6 +31,10 @@
 int __percpu_init_rwlock(struct percpu_rwlock *pcpu_rwlock,
 			 const char *name, struct lock_class_key *rwlock_key)
 {
+	pcpu_rwlock->rw_state = alloc_percpu(struct rw_state);
+	if (unlikely(!pcpu_rwlock->rw_state))
+		return -ENOMEM;
+
 	/* ->global_rwlock represents the whole percpu_rwlock for lockdep */
 #ifdef CONFIG_DEBUG_SPINLOCK
 	__rwlock_init(&pcpu_rwlock->global_rwlock, name, rwlock_key);
@@ -41,6 +45,14 @@ int __percpu_init_rwlock(struct percpu_rwlock *pcpu_rwlock,
 	return 0;
 }
 
+void percpu_free_rwlock(struct percpu_rwlock *pcpu_rwlock)
+{
+	free_percpu(pcpu_rwlock->rw_state);
+
+	/* Catch use-after-free bugs */
+	pcpu_rwlock->rw_state = NULL;
+}
+
 void percpu_read_lock(struct percpu_rwlock *pcpu_rwlock)
 {
 	read_lock(&pcpu_rwlock->global_rwlock);

^ permalink raw reply related

* [PATCH v6 03/46] percpu_rwlock: Provide a way to define and init percpu-rwlocks at compile time
From: Srivatsa S. Bhat @ 2013-02-18 12:38 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Add the support for defining and initializing percpu-rwlocks at compile time
for those users who would like to use percpu-rwlocks really early in the boot
process (even before dynamic per-CPU allocations can begin).

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/percpu-rwlock.h |   15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/linux/percpu-rwlock.h b/include/linux/percpu-rwlock.h
index 74eaf4d..5590b1e 100644
--- a/include/linux/percpu-rwlock.h
+++ b/include/linux/percpu-rwlock.h
@@ -49,6 +49,21 @@ extern int __percpu_init_rwlock(struct percpu_rwlock *,
 
 extern void percpu_free_rwlock(struct percpu_rwlock *);
 
+
+#define __PERCPU_RWLOCK_INIT(name)					\
+	{								\
+		.rw_state = &name##_rw_state,				\
+		.global_rwlock = __RW_LOCK_UNLOCKED(name.global_rwlock) \
+	}
+
+#define DEFINE_PERCPU_RWLOCK(name)					\
+	static DEFINE_PER_CPU(struct rw_state, name##_rw_state);	\
+	struct percpu_rwlock (name) = __PERCPU_RWLOCK_INIT(name);
+
+#define DEFINE_STATIC_PERCPU_RWLOCK(name)				\
+	static DEFINE_PER_CPU(struct rw_state, name##_rw_state);	\
+	static struct percpu_rwlock(name) = __PERCPU_RWLOCK_INIT(name);
+
 #define percpu_init_rwlock(pcpu_rwlock)					\
 ({	static struct lock_class_key rwlock_key;			\
 	__percpu_init_rwlock(pcpu_rwlock, #pcpu_rwlock, &rwlock_key);	\

^ permalink raw reply related

* [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Srivatsa S. Bhat @ 2013-02-18 12:38 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Using global rwlocks as the backend for per-CPU rwlocks helps us avoid many
lock-ordering related problems (unlike per-cpu locks). However, global
rwlocks lead to unnecessary cache-line bouncing even when there are no
writers present, which can slow down the system needlessly.

Per-cpu counters can help solve the cache-line bouncing problem. So we
actually use the best of both: per-cpu counters (no-waiting) at the reader
side in the fast-path, and global rwlocks in the slowpath.

[ Fastpath = no writer is active; Slowpath = a writer is active ]

IOW, the readers just increment/decrement their per-cpu refcounts (disabling
interrupts during the updates, if necessary) when no writer is active.
When a writer becomes active, he signals all readers to switch to global
rwlocks for the duration of his activity. The readers switch over when it
is safe for them (ie., when they are about to start a fresh, non-nested
read-side critical section) and start using (holding) the global rwlock for
read in their subsequent critical sections.

The writer waits for every existing reader to switch, and then acquires the
global rwlock for write and enters his critical section. Later, the writer
signals all readers that he is done, and that they can go back to using their
per-cpu refcounts again.

Note that the lock-safety (despite the per-cpu scheme) comes from the fact
that the readers can *choose* _when_ to switch to rwlocks upon the writer's
signal. And the readers don't wait on anybody based on the per-cpu counters.
The only true synchronization that involves waiting at the reader-side in this
scheme, is the one arising from the global rwlock, which is safe from circular
locking dependency issues.

Reader-writer locks and per-cpu counters are recursive, so they can be
used in a nested fashion in the reader-path, which makes per-CPU rwlocks also
recursive. Also, this design of switching the synchronization scheme ensures
that you can safely nest and use these locks in a very flexible manner.

I'm indebted to Michael Wang and Xiao Guangrong for their numerous thoughtful
suggestions and ideas, which inspired and influenced many of the decisions in
this as well as previous designs. Thanks a lot Michael and Xiao!

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 lib/percpu-rwlock.c |  139 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 137 insertions(+), 2 deletions(-)

diff --git a/lib/percpu-rwlock.c b/lib/percpu-rwlock.c
index f938096..edefdea 100644
--- a/lib/percpu-rwlock.c
+++ b/lib/percpu-rwlock.c
@@ -27,6 +27,24 @@
 #include <linux/percpu-rwlock.h>
 #include <linux/errno.h>
 
+#include <asm/processor.h>
+
+
+#define reader_yet_to_switch(pcpu_rwlock, cpu)				    \
+	(ACCESS_ONCE(per_cpu_ptr((pcpu_rwlock)->rw_state, cpu)->reader_refcnt))
+
+#define reader_percpu_nesting_depth(pcpu_rwlock)		  \
+	(__this_cpu_read((pcpu_rwlock)->rw_state->reader_refcnt))
+
+#define reader_uses_percpu_refcnt(pcpu_rwlock)				\
+				reader_percpu_nesting_depth(pcpu_rwlock)
+
+#define reader_nested_percpu(pcpu_rwlock)				\
+			(reader_percpu_nesting_depth(pcpu_rwlock) > 1)
+
+#define writer_active(pcpu_rwlock)					\
+	(__this_cpu_read((pcpu_rwlock)->rw_state->writer_signal))
+
 
 int __percpu_init_rwlock(struct percpu_rwlock *pcpu_rwlock,
 			 const char *name, struct lock_class_key *rwlock_key)
@@ -55,21 +73,138 @@ void percpu_free_rwlock(struct percpu_rwlock *pcpu_rwlock)
 
 void percpu_read_lock(struct percpu_rwlock *pcpu_rwlock)
 {
-	read_lock(&pcpu_rwlock->global_rwlock);
+	preempt_disable();
+
+	/*
+	 * Let the writer know that a reader is active, even before we choose
+	 * our reader-side synchronization scheme.
+	 */
+	this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
+
+	/*
+	 * If we are already using per-cpu refcounts, it is not safe to switch
+	 * the synchronization scheme. So continue using the refcounts.
+	 */
+	if (reader_nested_percpu(pcpu_rwlock))
+		return;
+
+	/*
+	 * The write to 'reader_refcnt' must be visible before we read
+	 * 'writer_signal'.
+	 */
+	smp_mb();
+
+	if (likely(!writer_active(pcpu_rwlock))) {
+		goto out;
+	} else {
+		/* Writer is active, so switch to global rwlock. */
+		read_lock(&pcpu_rwlock->global_rwlock);
+
+		/*
+		 * We might have raced with a writer going inactive before we
+		 * took the read-lock. So re-evaluate whether we still need to
+		 * hold the rwlock or if we can switch back to per-cpu
+		 * refcounts. (This also helps avoid heterogeneous nesting of
+		 * readers).
+		 */
+		if (writer_active(pcpu_rwlock)) {
+			/*
+			 * The above writer_active() check can get reordered
+			 * with this_cpu_dec() below, but this is OK, because
+			 * holding the rwlock is conservative.
+			 */
+			this_cpu_dec(pcpu_rwlock->rw_state->reader_refcnt);
+		} else {
+			read_unlock(&pcpu_rwlock->global_rwlock);
+		}
+	}
+
+out:
+	/* Prevent reordering of any subsequent reads/writes */
+	smp_mb();
 }
 
 void percpu_read_unlock(struct percpu_rwlock *pcpu_rwlock)
 {
-	read_unlock(&pcpu_rwlock->global_rwlock);
+	/*
+	 * We never allow heterogeneous nesting of readers. So it is trivial
+	 * to find out the kind of reader we are, and undo the operation
+	 * done by our corresponding percpu_read_lock().
+	 */
+
+	/* Try to fast-path: a nested percpu reader is the simplest case */
+	if (reader_nested_percpu(pcpu_rwlock)) {
+		this_cpu_dec(pcpu_rwlock->rw_state->reader_refcnt);
+		preempt_enable();
+		return;
+	}
+
+	/*
+	 * Now we are left with only 2 options: a non-nested percpu reader,
+	 * or a reader holding rwlock
+	 */
+	if (reader_uses_percpu_refcnt(pcpu_rwlock)) {
+		/*
+		 * Complete the critical section before decrementing the
+		 * refcnt. We can optimize this away if we are a nested
+		 * reader (the case above).
+		 */
+		smp_mb();
+		this_cpu_dec(pcpu_rwlock->rw_state->reader_refcnt);
+	} else {
+		read_unlock(&pcpu_rwlock->global_rwlock);
+	}
+
+	preempt_enable();
 }
 
 void percpu_write_lock(struct percpu_rwlock *pcpu_rwlock)
 {
+	unsigned int cpu;
+
+	/*
+	 * Tell all readers that a writer is becoming active, so that they
+	 * start switching over to the global rwlock.
+	 */
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = true;
+
+	smp_mb();
+
+	/*
+	 * Wait for every reader to see the writer's signal and switch from
+	 * percpu refcounts to global rwlock.
+	 *
+	 * If a reader is still using percpu refcounts, wait for him to switch.
+	 * Else, we can safely go ahead, because either the reader has already
+	 * switched over, or the next reader that comes along on that CPU will
+	 * notice the writer's signal and will switch over to the rwlock.
+	 */
+
+	for_each_possible_cpu(cpu) {
+		while (reader_yet_to_switch(pcpu_rwlock, cpu))
+			cpu_relax();
+	}
+
+	smp_mb(); /* Complete the wait-for-readers, before taking the lock */
 	write_lock(&pcpu_rwlock->global_rwlock);
 }
 
 void percpu_write_unlock(struct percpu_rwlock *pcpu_rwlock)
 {
+	unsigned int cpu;
+
+	/* Complete the critical section before clearing ->writer_signal */
+	smp_mb();
+
+	/*
+	 * Inform all readers that we are done, so that they can switch back
+	 * to their per-cpu refcounts. (We don't need to wait for them to
+	 * see it).
+	 */
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = false;
+
 	write_unlock(&pcpu_rwlock->global_rwlock);
 }
 

^ permalink raw reply related

* [PATCH v6 05/46] percpu_rwlock: Make percpu-rwlocks IRQ-safe, optimally
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

If interrupt handlers can also be readers, then one of the ways to make
per-CPU rwlocks safe, is to disable interrupts at the reader side before
trying to acquire the per-CPU rwlock and keep it disabled throughout the
duration of the read-side critical section.

The goal is to avoid cases such as:

  1. writer is active and it holds the global rwlock for write

  2. a regular reader comes in and marks itself as present (by incrementing
     its per-CPU refcount) before checking whether writer is active.

  3. an interrupt hits the reader;
     [If it had not hit, the reader would have noticed that the writer is
      active and would have decremented its refcount and would have tried
      to acquire the global rwlock for read].
     Since the interrupt handler also happens to be a reader, it notices
     the non-zero refcount (which was due to the reader who got interrupted)
     and thinks that this is a nested read-side critical section and
     proceeds to take the fastpath, which is wrong. The interrupt handler
     should have noticed that the writer is active and taken the rwlock
     for read.

So, disabling interrupts can help avoid this problem (at the cost of keeping
the interrupts disabled for quite long).

But Oleg had a brilliant idea by which we can do much better than that:
we can manage with disabling interrupts _just_ during the updates (writes to
per-CPU refcounts) to safe-guard against races with interrupt handlers.
Beyond that, we can keep the interrupts enabled and still be safe w.r.t
interrupt handlers that can act as readers.

Basically the idea is that we differentiate between the *part* of the
per-CPU refcount that we use for reference counting vs the part that we use
merely to make the writer wait for us to switch over to the right
synchronization scheme.

The scheme involves splitting the per-CPU refcounts into 2 parts:
eg: the lower 16 bits are used to track the nesting depth of the reader
(a "nested-counter"), and the remaining (upper) bits are used to merely mark
the presence of the reader.

As long as the overall reader_refcnt is non-zero, the writer waits for the
reader (assuming that the reader is still actively using per-CPU refcounts for
synchronization).

The reader first sets one of the higher bits to mark its presence, and then
uses the lower 16 bits to manage the nesting depth. So, an interrupt handler
coming in as illustrated above will be able to distinguish between "this is
a nested read-side critical section" vs "we have merely marked our presence
to make the writer wait for us to switch" by looking at the same refcount.
Thus, it makes it unnecessary to keep interrupts disabled throughout the
read-side critical section, despite having the possibility of interrupt
handlers being readers themselves.


Implement this logic and rename the locking functions appropriately, to
reflect what they do.

Based-on-idea-by: Oleg Nesterov <oleg@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/percpu-rwlock.h |   10 ++++---
 lib/percpu-rwlock.c           |   57 ++++++++++++++++++++++++++---------------
 2 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/include/linux/percpu-rwlock.h b/include/linux/percpu-rwlock.h
index 5590b1e..8c9e145 100644
--- a/include/linux/percpu-rwlock.h
+++ b/include/linux/percpu-rwlock.h
@@ -38,11 +38,13 @@ struct percpu_rwlock {
 	rwlock_t			global_rwlock;
 };
 
-extern void percpu_read_lock(struct percpu_rwlock *);
-extern void percpu_read_unlock(struct percpu_rwlock *);
+extern void percpu_read_lock_irqsafe(struct percpu_rwlock *);
+extern void percpu_read_unlock_irqsafe(struct percpu_rwlock *);
 
-extern void percpu_write_lock(struct percpu_rwlock *);
-extern void percpu_write_unlock(struct percpu_rwlock *);
+extern void percpu_write_lock_irqsave(struct percpu_rwlock *,
+				      unsigned long *flags);
+extern void percpu_write_unlock_irqrestore(struct percpu_rwlock *,
+					   unsigned long *flags);
 
 extern int __percpu_init_rwlock(struct percpu_rwlock *,
 				const char *, struct lock_class_key *);
diff --git a/lib/percpu-rwlock.c b/lib/percpu-rwlock.c
index edefdea..ce7e440 100644
--- a/lib/percpu-rwlock.c
+++ b/lib/percpu-rwlock.c
@@ -30,11 +30,15 @@
 #include <asm/processor.h>
 
 
+#define READER_PRESENT		(1UL << 16)
+#define READER_REFCNT_MASK	(READER_PRESENT - 1)
+
 #define reader_yet_to_switch(pcpu_rwlock, cpu)				    \
 	(ACCESS_ONCE(per_cpu_ptr((pcpu_rwlock)->rw_state, cpu)->reader_refcnt))
 
-#define reader_percpu_nesting_depth(pcpu_rwlock)		  \
-	(__this_cpu_read((pcpu_rwlock)->rw_state->reader_refcnt))
+#define reader_percpu_nesting_depth(pcpu_rwlock)			\
+	(__this_cpu_read((pcpu_rwlock)->rw_state->reader_refcnt) &	\
+	 READER_REFCNT_MASK)
 
 #define reader_uses_percpu_refcnt(pcpu_rwlock)				\
 				reader_percpu_nesting_depth(pcpu_rwlock)
@@ -71,7 +75,7 @@ void percpu_free_rwlock(struct percpu_rwlock *pcpu_rwlock)
 	pcpu_rwlock->rw_state = NULL;
 }
 
-void percpu_read_lock(struct percpu_rwlock *pcpu_rwlock)
+void percpu_read_lock_irqsafe(struct percpu_rwlock *pcpu_rwlock)
 {
 	preempt_disable();
 
@@ -79,14 +83,18 @@ void percpu_read_lock(struct percpu_rwlock *pcpu_rwlock)
 	 * Let the writer know that a reader is active, even before we choose
 	 * our reader-side synchronization scheme.
 	 */
-	this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
+	this_cpu_add(pcpu_rwlock->rw_state->reader_refcnt, READER_PRESENT);
 
 	/*
 	 * If we are already using per-cpu refcounts, it is not safe to switch
 	 * the synchronization scheme. So continue using the refcounts.
 	 */
-	if (reader_nested_percpu(pcpu_rwlock))
+	if (reader_uses_percpu_refcnt(pcpu_rwlock)) {
+		this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
+		this_cpu_sub(pcpu_rwlock->rw_state->reader_refcnt,
+			     READER_PRESENT);
 		return;
+	}
 
 	/*
 	 * The write to 'reader_refcnt' must be visible before we read
@@ -95,9 +103,19 @@ void percpu_read_lock(struct percpu_rwlock *pcpu_rwlock)
 	smp_mb();
 
 	if (likely(!writer_active(pcpu_rwlock))) {
-		goto out;
+		this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
 	} else {
 		/* Writer is active, so switch to global rwlock. */
+
+		/*
+		 * While we are spinning on ->global_rwlock, an
+		 * interrupt can hit us, and the interrupt handler
+		 * might call this function. The distinction between
+		 * READER_PRESENT and the refcnt helps ensure that the
+		 * interrupt handler also takes this branch and spins
+		 * on the ->global_rwlock, as long as the writer is
+		 * active.
+		 */
 		read_lock(&pcpu_rwlock->global_rwlock);
 
 		/*
@@ -107,29 +125,24 @@ void percpu_read_lock(struct percpu_rwlock *pcpu_rwlock)
 		 * refcounts. (This also helps avoid heterogeneous nesting of
 		 * readers).
 		 */
-		if (writer_active(pcpu_rwlock)) {
-			/*
-			 * The above writer_active() check can get reordered
-			 * with this_cpu_dec() below, but this is OK, because
-			 * holding the rwlock is conservative.
-			 */
-			this_cpu_dec(pcpu_rwlock->rw_state->reader_refcnt);
-		} else {
+		if (!writer_active(pcpu_rwlock)) {
+			this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
 			read_unlock(&pcpu_rwlock->global_rwlock);
 		}
 	}
 
-out:
+	this_cpu_sub(pcpu_rwlock->rw_state->reader_refcnt, READER_PRESENT);
+
 	/* Prevent reordering of any subsequent reads/writes */
 	smp_mb();
 }
 
-void percpu_read_unlock(struct percpu_rwlock *pcpu_rwlock)
+void percpu_read_unlock_irqsafe(struct percpu_rwlock *pcpu_rwlock)
 {
 	/*
 	 * We never allow heterogeneous nesting of readers. So it is trivial
 	 * to find out the kind of reader we are, and undo the operation
-	 * done by our corresponding percpu_read_lock().
+	 * done by our corresponding percpu_read_lock_irqsafe().
 	 */
 
 	/* Try to fast-path: a nested percpu reader is the simplest case */
@@ -158,7 +171,8 @@ void percpu_read_unlock(struct percpu_rwlock *pcpu_rwlock)
 	preempt_enable();
 }
 
-void percpu_write_lock(struct percpu_rwlock *pcpu_rwlock)
+void percpu_write_lock_irqsave(struct percpu_rwlock *pcpu_rwlock,
+			       unsigned long *flags)
 {
 	unsigned int cpu;
 
@@ -187,10 +201,11 @@ void percpu_write_lock(struct percpu_rwlock *pcpu_rwlock)
 	}
 
 	smp_mb(); /* Complete the wait-for-readers, before taking the lock */
-	write_lock(&pcpu_rwlock->global_rwlock);
+	write_lock_irqsave(&pcpu_rwlock->global_rwlock, *flags);
 }
 
-void percpu_write_unlock(struct percpu_rwlock *pcpu_rwlock)
+void percpu_write_unlock_irqrestore(struct percpu_rwlock *pcpu_rwlock,
+				    unsigned long *flags)
 {
 	unsigned int cpu;
 
@@ -205,6 +220,6 @@ void percpu_write_unlock(struct percpu_rwlock *pcpu_rwlock)
 	for_each_possible_cpu(cpu)
 		per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = false;
 
-	write_unlock(&pcpu_rwlock->global_rwlock);
+	write_unlock_irqrestore(&pcpu_rwlock->global_rwlock, *flags);
 }
 

^ permalink raw reply related

* [PATCH v6 06/46] percpu_rwlock: Rearrange the read-lock code to fastpath nested percpu readers
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

If we are dealing with a nester percpu reader, we can optimize away quite
a few costly operations. Improve that fastpath further, by avoiding the
unnecessary addition and subtraction of 'READER_PRESENT' to reader_refcnt,
by rearranging the code a bit.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 lib/percpu-rwlock.c |   14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/lib/percpu-rwlock.c b/lib/percpu-rwlock.c
index ce7e440..ed36531 100644
--- a/lib/percpu-rwlock.c
+++ b/lib/percpu-rwlock.c
@@ -80,23 +80,21 @@ void percpu_read_lock_irqsafe(struct percpu_rwlock *pcpu_rwlock)
 	preempt_disable();
 
 	/*
-	 * Let the writer know that a reader is active, even before we choose
-	 * our reader-side synchronization scheme.
-	 */
-	this_cpu_add(pcpu_rwlock->rw_state->reader_refcnt, READER_PRESENT);
-
-	/*
 	 * If we are already using per-cpu refcounts, it is not safe to switch
 	 * the synchronization scheme. So continue using the refcounts.
 	 */
 	if (reader_uses_percpu_refcnt(pcpu_rwlock)) {
 		this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
-		this_cpu_sub(pcpu_rwlock->rw_state->reader_refcnt,
-			     READER_PRESENT);
 		return;
 	}
 
 	/*
+	 * Let the writer know that a reader is active, even before we choose
+	 * our reader-side synchronization scheme.
+	 */
+	this_cpu_add(pcpu_rwlock->rw_state->reader_refcnt, READER_PRESENT);
+
+	/*
 	 * The write to 'reader_refcnt' must be visible before we read
 	 * 'writer_signal'.
 	 */

^ permalink raw reply related

* [PATCH v6 07/46] percpu_rwlock: Allow writers to be readers, and add lockdep annotations
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

CPU hotplug (which will be the first user of per-CPU rwlocks) has a special
requirement with respect to locking: the writer, after acquiring the per-CPU
rwlock for write, must be allowed to take the same lock for read, without
deadlocking and without getting complaints from lockdep. In comparison, this
is similar to what get_online_cpus()/put_online_cpus() does today: it allows
a hotplug writer (who holds the cpu_hotplug.lock mutex) to invoke it without
locking issues, because it silently returns if the caller is the hotplug
writer itself.

This can be easily achieved with per-CPU rwlocks as well (even without a
"is this a writer?" check) by incrementing the per-CPU refcount of the writer
immediately after taking the global rwlock for write, and then decrementing
the per-CPU refcount before releasing the global rwlock.
This ensures that any reader that comes along on that CPU while the writer is
active (on that same CPU), notices the non-zero value of the nested counter
and assumes that it is a nested read-side critical section and proceeds by
just incrementing the refcount. Thus we prevent the reader from taking the
global rwlock for read, which prevents the writer from deadlocking itself.

Add that support and teach lockdep about this special locking scheme so
that it knows that this sort of usage is valid. Also add the required lockdep
annotations to enable it to detect common locking problems with per-CPU
rwlocks.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 lib/percpu-rwlock.c |   33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/lib/percpu-rwlock.c b/lib/percpu-rwlock.c
index ed36531..bf95e40 100644
--- a/lib/percpu-rwlock.c
+++ b/lib/percpu-rwlock.c
@@ -102,6 +102,10 @@ void percpu_read_lock_irqsafe(struct percpu_rwlock *pcpu_rwlock)
 
 	if (likely(!writer_active(pcpu_rwlock))) {
 		this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
+
+		/* Pretend that we take global_rwlock for lockdep */
+		rwlock_acquire_read(&pcpu_rwlock->global_rwlock.dep_map,
+				    0, 0, _RET_IP_);
 	} else {
 		/* Writer is active, so switch to global rwlock. */
 
@@ -126,6 +130,12 @@ void percpu_read_lock_irqsafe(struct percpu_rwlock *pcpu_rwlock)
 		if (!writer_active(pcpu_rwlock)) {
 			this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
 			read_unlock(&pcpu_rwlock->global_rwlock);
+
+			/*
+			 * Pretend that we take global_rwlock for lockdep
+			 */
+			rwlock_acquire_read(&pcpu_rwlock->global_rwlock.dep_map,
+					    0, 0, _RET_IP_);
 		}
 	}
 
@@ -162,6 +172,13 @@ void percpu_read_unlock_irqsafe(struct percpu_rwlock *pcpu_rwlock)
 		 */
 		smp_mb();
 		this_cpu_dec(pcpu_rwlock->rw_state->reader_refcnt);
+
+		/*
+		 * Since this is the last decrement, it is time to pretend
+		 * to lockdep that we are releasing the read lock.
+		 */
+		rwlock_release(&pcpu_rwlock->global_rwlock.dep_map,
+			       1, _RET_IP_);
 	} else {
 		read_unlock(&pcpu_rwlock->global_rwlock);
 	}
@@ -200,6 +217,16 @@ void percpu_write_lock_irqsave(struct percpu_rwlock *pcpu_rwlock,
 
 	smp_mb(); /* Complete the wait-for-readers, before taking the lock */
 	write_lock_irqsave(&pcpu_rwlock->global_rwlock, *flags);
+
+	/*
+	 * It is desirable to allow the writer to acquire the percpu-rwlock
+	 * for read (if necessary), without deadlocking or getting complaints
+	 * from lockdep. To achieve that, just increment the reader_refcnt of
+	 * this CPU - that way, any attempt by the writer to acquire the
+	 * percpu-rwlock for read, will get treated as a case of nested percpu
+	 * reader, which is safe, from a locking perspective.
+	 */
+	this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
 }
 
 void percpu_write_unlock_irqrestore(struct percpu_rwlock *pcpu_rwlock,
@@ -207,6 +234,12 @@ void percpu_write_unlock_irqrestore(struct percpu_rwlock *pcpu_rwlock,
 {
 	unsigned int cpu;
 
+	/*
+	 * Undo the special increment that we had done in the write-lock path
+	 * in order to allow writers to be readers.
+	 */
+	this_cpu_dec(pcpu_rwlock->rw_state->reader_refcnt);
+
 	/* Complete the critical section before clearing ->writer_signal */
 	smp_mb();
 

^ permalink raw reply related

* [PATCH v6 08/46] CPU hotplug: Provide APIs to prevent CPU offline from atomic context
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

There are places where preempt_disable() or local_irq_disable() are used
to prevent any CPU from going offline during the critical section. Let us
call them as "atomic hotplug readers" ("atomic" because they run in atomic,
non-preemptible contexts).

Today, preempt_disable() or its equivalent works because the hotplug writer
uses stop_machine() to take CPUs offline. But once stop_machine() is gone
from the CPU hotplug offline path, the readers won't be able to prevent
CPUs from going offline using preempt_disable().

So the intent here is to provide synchronization APIs for such atomic hotplug
readers, to prevent (any) CPUs from going offline, without depending on
stop_machine() at the writer-side. The new APIs will look something like
this:  get_online_cpus_atomic() and put_online_cpus_atomic()

Some important design requirements and considerations:
-----------------------------------------------------

1. Scalable synchronization at the reader-side, especially in the fast-path

   Any synchronization at the atomic hotplug readers side must be highly
   scalable - avoid global single-holder locks/counters etc. Because, these
   paths currently use the extremely fast preempt_disable(); our replacement
   to preempt_disable() should not become ridiculously costly and also should
   not serialize the readers among themselves needlessly.

   At a minimum, the new APIs must be extremely fast at the reader side
   atleast in the fast-path, when no CPU offline writers are active.

2. preempt_disable() was recursive. The replacement should also be recursive.

3. No (new) lock-ordering restrictions

   preempt_disable() was super-flexible. It didn't impose any ordering
   restrictions or rules for nesting. Our replacement should also be equally
   flexible and usable.

4. No deadlock possibilities

   Regular per-cpu locking is not the way to go if we want to have relaxed
   rules for lock-ordering. Because, we can end up in circular-locking
   dependencies as explained in https://lkml.org/lkml/2012/12/6/290

   So, avoid the usual per-cpu locking schemes (per-cpu locks/per-cpu atomic
   counters with spin-on-contention etc) as much as possible, to avoid
   numerous deadlock possibilities from creeping in.


Implementation of the design:
----------------------------

We use per-CPU reader-writer locks for synchronization because:

  a. They are quite fast and scalable in the fast-path (when no writers are
     active), since they use fast per-cpu counters in those paths.

  b. They are recursive at the reader side.

  c. They provide a good amount of safety against deadlocks; they don't
     spring new deadlock possibilities on us from out of nowhere. As a
     result, they have relaxed locking rules and are quite flexible, and
     thus are best suited for replacing usages of preempt_disable() or
     local_irq_disable() at the reader side.

Together, these satisfy all the requirements mentioned above.

I'm indebted to Michael Wang and Xiao Guangrong for their numerous thoughtful
suggestions and ideas, which inspired and influenced many of the decisions in
this as well as previous designs. Thanks a lot Michael and Xiao!

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: uclinux-dist-devel@blackfin.uclinux.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-am33-list@redhat.com
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 arch/arm/Kconfig      |    1 +
 arch/blackfin/Kconfig |    1 +
 arch/ia64/Kconfig     |    1 +
 arch/mips/Kconfig     |    1 +
 arch/mn10300/Kconfig  |    1 +
 arch/parisc/Kconfig   |    1 +
 arch/powerpc/Kconfig  |    1 +
 arch/s390/Kconfig     |    1 +
 arch/sh/Kconfig       |    1 +
 arch/sparc/Kconfig    |    1 +
 arch/x86/Kconfig      |    1 +
 include/linux/cpu.h   |    4 ++++
 kernel/cpu.c          |   55 +++++++++++++++++++++++++++++++++++++++++++++++--
 13 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 67874b8..cb6b94b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1616,6 +1616,7 @@ config NR_CPUS
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP && HOTPLUG
+	select PERCPU_RWLOCK
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index b6f3ad5..83d9882 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -261,6 +261,7 @@ config NR_CPUS
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP && HOTPLUG
+	select PERCPU_RWLOCK
 	default y
 
 config BF_REV_MIN
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3279646..c246772 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -378,6 +378,7 @@ config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
 	depends on SMP && EXPERIMENTAL
 	select HOTPLUG
+	select PERCPU_RWLOCK
 	default n
 	---help---
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2ac626a..f97c479 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -956,6 +956,7 @@ config SYS_HAS_EARLY_PRINTK
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP && HOTPLUG && SYS_SUPPORTS_HOTPLUG_CPU
+	select PERCPU_RWLOCK
 	help
 	  Say Y here to allow turning CPUs off and on. CPUs can be
 	  controlled through /sys/devices/system/cpu.
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index e70001c..a64e488 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -60,6 +60,7 @@ config ARCH_HAS_ILOG2_U32
 
 config HOTPLUG_CPU
 	def_bool n
+	select PERCPU_RWLOCK
 
 source "init/Kconfig"
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index b77feff..6f55cd4 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -226,6 +226,7 @@ config HOTPLUG_CPU
 	bool
 	default y if SMP
 	select HOTPLUG
+	select PERCPU_RWLOCK
 
 config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 17903f1..56b1f15 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -336,6 +336,7 @@ config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
 	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || \
 	PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC))
+	select PERCPU_RWLOCK
 	---help---
 	  Say Y here to be able to disable and re-enable individual
 	  CPUs at runtime on SMP machines.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b5ea38c..a9aafb4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -299,6 +299,7 @@ config HOTPLUG_CPU
 	prompt "Support for hot-pluggable CPUs"
 	depends on SMP
 	select HOTPLUG
+	select PERCPU_RWLOCK
 	help
 	  Say Y here to be able to turn CPUs off and on. CPUs
 	  can be controlled through /sys/devices/system/cpu/cpu#.
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index babc2b8..8c92eef 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -765,6 +765,7 @@ config NR_CPUS
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
 	depends on SMP && HOTPLUG && EXPERIMENTAL
+	select PERCPU_RWLOCK
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index cb9c333..b22f29d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -254,6 +254,7 @@ config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SPARC64 && SMP
 	select HOTPLUG
+	select PERCPU_RWLOCK
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu/cpu#.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 225543b..1a6d50d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1689,6 +1689,7 @@ config PHYSICAL_ALIGN
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP && HOTPLUG
+	select PERCPU_RWLOCK
 	---help---
 	  Say Y here to allow turning CPUs off and on. CPUs can be
 	  controlled through /sys/devices/system/cpu.
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ce7a074..cf24da1 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -175,6 +175,8 @@ extern struct bus_type cpu_subsys;
 
 extern void get_online_cpus(void);
 extern void put_online_cpus(void);
+extern void get_online_cpus_atomic(void);
+extern void put_online_cpus_atomic(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
@@ -198,6 +200,8 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #define get_online_cpus()	do { } while (0)
 #define put_online_cpus()	do { } while (0)
+#define get_online_cpus_atomic()	do { } while (0)
+#define put_online_cpus_atomic()	do { } while (0)
 #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3046a50..58dd1df 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1,6 +1,18 @@
 /* CPU control.
  * (C) 2001, 2002, 2003, 2004 Rusty Russell
  *
+ * Rework of the CPU hotplug offline mechanism to remove its dependence on
+ * the heavy-weight stop_machine() primitive, by Srivatsa S. Bhat and
+ * Paul E. McKenney.
+ *
+ * Copyright (C) IBM Corporation, 2012-2013
+ * Authors: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+ *          Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ *
+ * With lots of invaluable suggestions from:
+ *	    Oleg Nesterov <oleg@redhat.com>
+ *	    Tejun Heo <tj@kernel.org>
+ *
  * This code is licenced under the GPL.
  */
 #include <linux/proc_fs.h>
@@ -19,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/gfp.h>
 #include <linux/suspend.h>
+#include <linux/percpu-rwlock.h>
 
 #include "smpboot.h"
 
@@ -133,6 +146,38 @@ static void cpu_hotplug_done(void)
 	mutex_unlock(&cpu_hotplug.lock);
 }
 
+/*
+ * Per-CPU Reader-Writer lock to synchronize between atomic hotplug
+ * readers and the CPU offline hotplug writer.
+ */
+DEFINE_STATIC_PERCPU_RWLOCK(hotplug_pcpu_rwlock);
+
+/*
+ * Invoked by atomic hotplug reader (a task which wants to prevent
+ * CPU offline, but which can't afford to sleep), to prevent CPUs from
+ * going offline. So, you can call this function from atomic contexts
+ * (including interrupt handlers).
+ *
+ * Note: This does NOT prevent CPUs from coming online! It only prevents
+ * CPUs from going offline.
+ *
+ * You can call this function recursively.
+ *
+ * Returns with preemption disabled (but interrupts remain as they are;
+ * they are not disabled).
+ */
+void get_online_cpus_atomic(void)
+{
+	percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
+}
+EXPORT_SYMBOL_GPL(get_online_cpus_atomic);
+
+void put_online_cpus_atomic(void)
+{
+	percpu_read_unlock_irqsafe(&hotplug_pcpu_rwlock);
+}
+EXPORT_SYMBOL_GPL(put_online_cpus_atomic);
+
 #else /* #if CONFIG_HOTPLUG_CPU */
 static void cpu_hotplug_begin(void) {}
 static void cpu_hotplug_done(void) {}
@@ -246,15 +291,21 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
 	struct take_cpu_down_param *param = _param;
+	unsigned long flags;
 	int err;
 
+	percpu_write_lock_irqsave(&hotplug_pcpu_rwlock, &flags);
+
 	/* Ensure this CPU doesn't handle any more interrupts. */
 	err = __cpu_disable();
 	if (err < 0)
-		return err;
+		goto out;
 
 	cpu_notify(CPU_DYING | param->mod, param->hcpu);
-	return 0;
+
+out:
+	percpu_write_unlock_irqrestore(&hotplug_pcpu_rwlock, &flags);
+	return err;
 }
 
 /* Requires cpu_add_remove_lock to be held */

^ permalink raw reply related

* [PATCH v6 09/46] CPU hotplug: Convert preprocessor macros to static inline functions
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

On 12/05/2012 06:10 AM, Andrew Morton wrote:
"static inline C functions would be preferred if possible.  Feel free to
fix up the wrong crufty surrounding code as well ;-)"

Convert the macros in the CPU hotplug code to static inline C functions.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

 include/linux/cpu.h |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index cf24da1..eb79f47 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -198,10 +198,10 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #else		/* CONFIG_HOTPLUG_CPU */
 
-#define get_online_cpus()	do { } while (0)
-#define put_online_cpus()	do { } while (0)
-#define get_online_cpus_atomic()	do { } while (0)
-#define put_online_cpus_atomic()	do { } while (0)
+static inline void get_online_cpus(void) {}
+static inline void put_online_cpus(void) {}
+static inline void get_online_cpus_atomic(void) {}
+static inline void put_online_cpus_atomic(void) {}
 #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })

^ permalink raw reply related

* [PATCH v6 10/46] smp, cpu hotplug: Fix smp_call_function_*() to prevent CPU offline properly
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() to prevent CPUs from going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/smp.c |   40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 69f38bd..0f40d36 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -315,7 +315,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 	 * prevent preemption and reschedule on another processor,
 	 * as well as CPU removal
 	 */
-	this_cpu = get_cpu();
+	get_online_cpus_atomic();
+	this_cpu = smp_processor_id();
 
 	/*
 	 * Can deadlock when called with interrupts disabled.
@@ -347,7 +348,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 		}
 	}
 
-	put_cpu();
+	put_online_cpus_atomic();
 
 	return err;
 }
@@ -376,8 +377,10 @@ int smp_call_function_any(const struct cpumask *mask,
 	const struct cpumask *nodemask;
 	int ret;
 
+	get_online_cpus_atomic();
 	/* Try for same CPU (cheapest) */
-	cpu = get_cpu();
+	cpu = smp_processor_id();
+
 	if (cpumask_test_cpu(cpu, mask))
 		goto call;
 
@@ -393,7 +396,7 @@ int smp_call_function_any(const struct cpumask *mask,
 	cpu = cpumask_any_and(mask, cpu_online_mask);
 call:
 	ret = smp_call_function_single(cpu, func, info, wait);
-	put_cpu();
+	put_online_cpus_atomic();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(smp_call_function_any);
@@ -414,25 +417,28 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
 	unsigned int this_cpu;
 	unsigned long flags;
 
-	this_cpu = get_cpu();
+	get_online_cpus_atomic();
+
+	this_cpu = smp_processor_id();
+
 	/*
 	 * Can deadlock when called with interrupts disabled.
 	 * We allow cpu's that are not yet online though, as no one else can
 	 * send smp call function interrupt to this cpu and as such deadlocks
 	 * can't happen.
 	 */
-	WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
+	WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled()
 		     && !oops_in_progress);
 
 	if (cpu == this_cpu) {
 		local_irq_save(flags);
 		data->func(data->info);
 		local_irq_restore(flags);
-	} else {
+	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
 		csd_lock(data);
 		generic_exec_single(cpu, data, wait);
 	}
-	put_cpu();
+	put_online_cpus_atomic();
 }
 
 /**
@@ -456,6 +462,8 @@ void smp_call_function_many(const struct cpumask *mask,
 	unsigned long flags;
 	int refs, cpu, next_cpu, this_cpu = smp_processor_id();
 
+	get_online_cpus_atomic();
+
 	/*
 	 * Can deadlock when called with interrupts disabled.
 	 * We allow cpu's that are not yet online though, as no one else can
@@ -472,17 +480,18 @@ void smp_call_function_many(const struct cpumask *mask,
 
 	/* No online cpus?  We're done. */
 	if (cpu >= nr_cpu_ids)
-		return;
+		goto out_unlock;
 
 	/* Do we have another CPU which isn't us? */
 	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
 	if (next_cpu == this_cpu)
-		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
+		next_cpu = cpumask_next_and(next_cpu, mask,
+						cpu_online_mask);
 
 	/* Fastpath: do that cpu by itself. */
 	if (next_cpu >= nr_cpu_ids) {
 		smp_call_function_single(cpu, func, info, wait);
-		return;
+		goto out_unlock;
 	}
 
 	data = &__get_cpu_var(cfd_data);
@@ -528,7 +537,7 @@ void smp_call_function_many(const struct cpumask *mask,
 	/* Some callers race with other cpus changing the passed mask */
 	if (unlikely(!refs)) {
 		csd_unlock(&data->csd);
-		return;
+		goto out_unlock;
 	}
 
 	/*
@@ -565,6 +574,9 @@ void smp_call_function_many(const struct cpumask *mask,
 	/* Optionally wait for the CPUs to complete */
 	if (wait)
 		csd_lock_wait(&data->csd);
+
+out_unlock:
+	put_online_cpus_atomic();
 }
 EXPORT_SYMBOL(smp_call_function_many);
 
@@ -585,9 +597,9 @@ EXPORT_SYMBOL(smp_call_function_many);
  */
 int smp_call_function(smp_call_func_t func, void *info, int wait)
 {
-	preempt_disable();
+	get_online_cpus_atomic();
 	smp_call_function_many(cpu_online_mask, func, info, wait);
-	preempt_enable();
+	put_online_cpus_atomic();
 
 	return 0;
 }

^ permalink raw reply related

* [PATCH v6 11/46] smp, cpu hotplug: Fix on_each_cpu_*() to prevent CPU offline properly
From: Srivatsa S. Bhat @ 2013-02-18 12:39 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() to prevent CPUs from going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/smp.c |   25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 0f40d36..a8fd381 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -699,12 +699,12 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
 	unsigned long flags;
 	int ret = 0;
 
-	preempt_disable();
+	get_online_cpus_atomic();
 	ret = smp_call_function(func, info, wait);
 	local_irq_save(flags);
 	func(info);
 	local_irq_restore(flags);
-	preempt_enable();
+	put_online_cpus_atomic();
 	return ret;
 }
 EXPORT_SYMBOL(on_each_cpu);
@@ -726,7 +726,11 @@ EXPORT_SYMBOL(on_each_cpu);
 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
 			void *info, bool wait)
 {
-	int cpu = get_cpu();
+	int cpu;
+
+	get_online_cpus_atomic();
+
+	cpu = smp_processor_id();
 
 	smp_call_function_many(mask, func, info, wait);
 	if (cpumask_test_cpu(cpu, mask)) {
@@ -734,7 +738,7 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
 		func(info);
 		local_irq_enable();
 	}
-	put_cpu();
+	put_online_cpus_atomic();
 }
 EXPORT_SYMBOL(on_each_cpu_mask);
 
@@ -759,8 +763,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
  * The function might sleep if the GFP flags indicates a non
  * atomic allocation is allowed.
  *
- * Preemption is disabled to protect against CPUs going offline but not online.
- * CPUs going online during the call will not be seen or sent an IPI.
+ * We use get/put_online_cpus_atomic() to prevent CPUs from going
+ * offline in-between our operation. CPUs coming online during the
+ * call will not be seen or sent an IPI.
  *
  * You must not call this function with disabled interrupts or
  * from a hardware interrupt handler or from a bottom half handler.
@@ -775,26 +780,26 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 	might_sleep_if(gfp_flags & __GFP_WAIT);
 
 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
-		preempt_disable();
+		get_online_cpus_atomic();
 		for_each_online_cpu(cpu)
 			if (cond_func(cpu, info))
 				cpumask_set_cpu(cpu, cpus);
 		on_each_cpu_mask(cpus, func, info, wait);
-		preempt_enable();
+		put_online_cpus_atomic();
 		free_cpumask_var(cpus);
 	} else {
 		/*
 		 * No free cpumask, bother. No matter, we'll
 		 * just have to IPI them one by one.
 		 */
-		preempt_disable();
+		get_online_cpus_atomic();
 		for_each_online_cpu(cpu)
 			if (cond_func(cpu, info)) {
 				ret = smp_call_function_single(cpu, func,
 								info, wait);
 				WARN_ON_ONCE(!ret);
 			}
-		preempt_enable();
+		put_online_cpus_atomic();
 	}
 }
 EXPORT_SYMBOL(on_each_cpu_cond);

^ permalink raw reply related

* [PATCH v6 12/46] sched/timer: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from going
offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/sched/core.c |   24 +++++++++++++++++++++---
 kernel/sched/fair.c |    5 ++++-
 kernel/timer.c      |    2 ++
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 26058d0..ead8af6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1117,11 +1117,11 @@ void kick_process(struct task_struct *p)
 {
 	int cpu;
 
-	preempt_disable();
+	get_online_cpus_atomic();
 	cpu = task_cpu(p);
 	if ((cpu != smp_processor_id()) && task_curr(p))
 		smp_send_reschedule(cpu);
-	preempt_enable();
+	put_online_cpus_atomic();
 }
 EXPORT_SYMBOL_GPL(kick_process);
 #endif /* CONFIG_SMP */
@@ -1129,6 +1129,10 @@ EXPORT_SYMBOL_GPL(kick_process);
 #ifdef CONFIG_SMP
 /*
  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ *  Must be called under get/put_online_cpus_atomic() or
+ *  equivalent, to avoid CPUs from going offline from underneath
+ *  us.
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
@@ -1192,6 +1196,9 @@ out:
 
 /*
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
+ *
+ * Must be called under get/put_online_cpus_atomic(), to prevent
+ * CPUs from going offline from underneath us.
  */
 static inline
 int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
@@ -1432,6 +1439,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	int cpu, success = 0;
 
 	smp_wmb();
+	get_online_cpus_atomic();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	if (!(p->state & state))
 		goto out;
@@ -1472,6 +1480,7 @@ stat:
 	ttwu_stat(p, cpu, wake_flags);
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+	put_online_cpus_atomic();
 
 	return success;
 }
@@ -1693,6 +1702,7 @@ void wake_up_new_task(struct task_struct *p)
 	unsigned long flags;
 	struct rq *rq;
 
+	get_online_cpus_atomic();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 #ifdef CONFIG_SMP
 	/*
@@ -1713,6 +1723,7 @@ void wake_up_new_task(struct task_struct *p)
 		p->sched_class->task_woken(rq, p);
 #endif
 	task_rq_unlock(rq, p, &flags);
+	put_online_cpus_atomic();
 }
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -2610,6 +2621,7 @@ void sched_exec(void)
 	unsigned long flags;
 	int dest_cpu;
 
+	get_online_cpus_atomic();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
 	if (dest_cpu == smp_processor_id())
@@ -2619,11 +2631,13 @@ void sched_exec(void)
 		struct migration_arg arg = { p, dest_cpu };
 
 		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+		put_online_cpus_atomic();
 		stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
 		return;
 	}
 unlock:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+	put_online_cpus_atomic();
 }
 
 #endif
@@ -4373,6 +4387,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt)
 	unsigned long flags;
 	bool yielded = 0;
 
+	get_online_cpus_atomic();
 	local_irq_save(flags);
 	rq = this_rq();
 
@@ -4400,13 +4415,14 @@ again:
 		 * Make p's CPU reschedule; pick_next_entity takes care of
 		 * fairness.
 		 */
-		if (preempt && rq != p_rq)
+		if (preempt && rq != p_rq && cpu_online(task_cpu(p)))
 			resched_task(p_rq->curr);
 	}
 
 out:
 	double_rq_unlock(rq, p_rq);
 	local_irq_restore(flags);
+	put_online_cpus_atomic();
 
 	if (yielded)
 		schedule();
@@ -4811,9 +4827,11 @@ static int migration_cpu_stop(void *data)
 	 * The original target cpu might have gone down and we might
 	 * be on another cpu but it doesn't matter.
 	 */
+	get_online_cpus_atomic();
 	local_irq_disable();
 	__migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
 	local_irq_enable();
+	put_online_cpus_atomic();
 	return 0;
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 81fa536..c602d5c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5695,8 +5695,11 @@ void trigger_load_balance(struct rq *rq, int cpu)
 	    likely(!on_null_domain(cpu)))
 		raise_softirq(SCHED_SOFTIRQ);
 #ifdef CONFIG_NO_HZ
-	if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
+	if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu))) {
+		get_online_cpus_atomic();
 		nohz_balancer_kick(cpu);
+		put_online_cpus_atomic();
+	}
 #endif
 }
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d008..b1820e3 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -924,6 +924,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(timer_pending(timer) || !timer->function);
+	get_online_cpus_atomic();
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
 	debug_activate(timer, timer->expires);
@@ -938,6 +939,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	 */
 	wake_up_idle_cpu(cpu);
 	spin_unlock_irqrestore(&base->lock, flags);
+	put_online_cpus_atomic();
 }
 EXPORT_SYMBOL_GPL(add_timer_on);
 

^ permalink raw reply related

* [PATCH v6 13/46] sched/migration: Use raw_spin_lock/unlock since interrupts are already disabled
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

We need not use the raw_spin_lock_irqsave/restore primitives because
all CPU_DYING notifiers run with interrupts disabled. So just use
raw_spin_lock/unlock.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/sched/core.c |   12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ead8af6..71741d0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4870,9 +4870,7 @@ static void calc_load_migrate(struct rq *rq)
  * Migrate all tasks from the rq, sleeping tasks will be migrated by
  * try_to_wake_up()->select_task_rq().
  *
- * Called with rq->lock held even though we'er in stop_machine() and
- * there's no concurrency possible, we hold the required locks anyway
- * because of lock validation efforts.
+ * Called with rq->lock held.
  */
 static void migrate_tasks(unsigned int dead_cpu)
 {
@@ -4884,8 +4882,8 @@ static void migrate_tasks(unsigned int dead_cpu)
 	 * Fudge the rq selection such that the below task selection loop
 	 * doesn't get stuck on the currently eligible stop task.
 	 *
-	 * We're currently inside stop_machine() and the rq is either stuck
-	 * in the stop_machine_cpu_stop() loop, or we're executing this code,
+	 * We're currently inside stop_one_cpu() and the rq is either stuck
+	 * in the cpu_stopper_thread(), or we're executing this code,
 	 * either way we should never end up calling schedule() until we're
 	 * done here.
 	 */
@@ -5154,14 +5152,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DYING:
 		sched_ttwu_pending();
 		/* Update our root-domain */
-		raw_spin_lock_irqsave(&rq->lock, flags);
+		raw_spin_lock(&rq->lock); /* Interrupts already disabled */
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 			set_rq_offline(rq);
 		}
 		migrate_tasks(cpu);
 		BUG_ON(rq->nr_running != 1); /* the migration thread */
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
+		raw_spin_unlock(&rq->lock);
 		break;
 
 	case CPU_DEAD:

^ permalink raw reply related

* [PATCH v6 14/46] sched/rt: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/sched/rt.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4f02b28..e546c98 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -6,6 +6,7 @@
 #include "sched.h"
 
 #include <linux/slab.h>
+#include <linux/cpu.h>
 
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
@@ -26,7 +27,9 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
 		if (!overrun)
 			break;
 
+		get_online_cpus_atomic();
 		idle = do_sched_rt_period_timer(rt_b, overrun);
+		put_online_cpus_atomic();
 	}
 
 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;

^ permalink raw reply related

* [PATCH v6 15/46] tick: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/time/tick-broadcast.c |    2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f113755..d123a2c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -160,6 +160,7 @@ static void tick_do_broadcast(struct cpumask *mask)
  */
 static void tick_do_periodic_broadcast(void)
 {
+	get_online_cpus_atomic();
 	raw_spin_lock(&tick_broadcast_lock);
 
 	cpumask_and(to_cpumask(tmpmask),
@@ -167,6 +168,7 @@ static void tick_do_periodic_broadcast(void)
 	tick_do_broadcast(to_cpumask(tmpmask));
 
 	raw_spin_unlock(&tick_broadcast_lock);
+	put_online_cpus_atomic();
 }
 
 /*

^ permalink raw reply related

* [PATCH v6 16/46] time/clocksource: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from going
offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: John Stultz <john.stultz@linaro.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/time/clocksource.c |    5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c958338..1c8d735 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -30,6 +30,7 @@
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
 #include <linux/kthread.h>
+#include <linux/cpu.h>
 
 void timecounter_init(struct timecounter *tc,
 		      const struct cyclecounter *cc,
@@ -320,11 +321,13 @@ static void clocksource_watchdog(unsigned long data)
 	 * Cycle through CPUs to check if the CPUs stay synchronized
 	 * to each other.
 	 */
+	get_online_cpus_atomic();
 	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
 	if (next_cpu >= nr_cpu_ids)
 		next_cpu = cpumask_first(cpu_online_mask);
 	watchdog_timer.expires += WATCHDOG_INTERVAL;
 	add_timer_on(&watchdog_timer, next_cpu);
+	put_online_cpus_atomic();
 out:
 	spin_unlock(&watchdog_lock);
 }
@@ -336,7 +339,9 @@ static inline void clocksource_start_watchdog(void)
 	init_timer(&watchdog_timer);
 	watchdog_timer.function = clocksource_watchdog;
 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
+	get_online_cpus_atomic();
 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
+	put_online_cpus_atomic();
 	watchdog_running = 1;
 }
 

^ permalink raw reply related

* [PATCH v6 17/46] clockevents: Use get/put_online_cpus_atomic() in clockevents_notify()
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

The cpu idle code invokes clockevents_notify() during idle state transitions
and the cpu offline code invokes it during the CPU_DYING phase. There
seems to be a race-condition between the two, where the clockevents_lock
never gets released, ending in a lockup. This can be fixed by synchronizing
clockevents_notify() with CPU offline, by wrapping its contents within
get/put_online_cpus_atomic().

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/time/clockevents.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 30b6de0..ca340fd 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
+#include <linux/cpu.h>
 
 #include "tick-internal.h"
 
@@ -431,6 +432,7 @@ void clockevents_notify(unsigned long reason, void *arg)
 	unsigned long flags;
 	int cpu;
 
+	get_online_cpus_atomic();
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 	clockevents_do_notify(reason, arg);
 
@@ -459,6 +461,7 @@ void clockevents_notify(unsigned long reason, void *arg)
 		break;
 	}
 	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
+	put_online_cpus_atomic();
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
 #endif

^ permalink raw reply related

* [PATCH v6 18/46] softirq: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/softirq.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index ed567ba..98c3e27 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -631,6 +631,7 @@ static void remote_softirq_receive(void *data)
 
 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 {
+	get_online_cpus_atomic();
 	if (cpu_online(cpu)) {
 		cp->func = remote_softirq_receive;
 		cp->info = cp;
@@ -638,8 +639,10 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir
 		cp->priv = softirq;
 
 		__smp_call_function_single(cpu, cp, 0);
+		put_online_cpus_atomic();
 		return 0;
 	}
+	put_online_cpus_atomic();
 	return 1;
 }
 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */

^ permalink raw reply related

* [PATCH v6 19/46] irq: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:40 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 kernel/irq/manage.c |    7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e49a288..b4240b9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/cpu.h>
 #include <linux/task_work.h>
 
 #include "internals.h"
@@ -202,7 +203,9 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
 		return -EINVAL;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
+	get_online_cpus_atomic();
 	ret =  __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
+	put_online_cpus_atomic();
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
@@ -343,7 +346,9 @@ int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
 	int ret;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
+	get_online_cpus_atomic();
 	ret = setup_affinity(irq, desc, mask);
+	put_online_cpus_atomic();
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
@@ -1126,7 +1131,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		}
 
 		/* Set default affinity mask once everything is setup */
+		get_online_cpus_atomic();
 		setup_affinity(irq, desc, mask);
+		put_online_cpus_atomic();
 
 	} else if (new->flags & IRQF_TRIGGER_MASK) {
 		unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;

^ permalink raw reply related

* [PATCH v6 20/46] net: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 net/core/dev.c |    9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index f64e439..5421f96 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3089,7 +3089,7 @@ int netif_rx(struct sk_buff *skb)
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
-		preempt_disable();
+		get_online_cpus_atomic();
 		rcu_read_lock();
 
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -3099,7 +3099,7 @@ int netif_rx(struct sk_buff *skb)
 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 
 		rcu_read_unlock();
-		preempt_enable();
+		put_online_cpus_atomic();
 	} else
 #endif
 	{
@@ -3498,6 +3498,7 @@ int netif_receive_skb(struct sk_buff *skb)
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu, ret;
 
+		get_online_cpus_atomic();
 		rcu_read_lock();
 
 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -3505,9 +3506,11 @@ int netif_receive_skb(struct sk_buff *skb)
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
+			put_online_cpus_atomic();
 			return ret;
 		}
 		rcu_read_unlock();
+		put_online_cpus_atomic();
 	}
 #endif
 	return __netif_receive_skb(skb);
@@ -3887,6 +3890,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 		local_irq_enable();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		get_online_cpus_atomic();
 		while (remsd) {
 			struct softnet_data *next = remsd->rps_ipi_next;
 
@@ -3895,6 +3899,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 							   &remsd->csd, 0);
 			remsd = next;
 		}
+		put_online_cpus_atomic();
 	} else
 #endif
 		local_irq_enable();

^ permalink raw reply related

* [PATCH v6 21/46] block: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 block/blk-softirq.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 467c8de..448f9a9 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -58,6 +58,8 @@ static void trigger_softirq(void *data)
  */
 static int raise_blk_irq(int cpu, struct request *rq)
 {
+	get_online_cpus_atomic();
+
 	if (cpu_online(cpu)) {
 		struct call_single_data *data = &rq->csd;
 
@@ -66,9 +68,11 @@ static int raise_blk_irq(int cpu, struct request *rq)
 		data->flags = 0;
 
 		__smp_call_function_single(cpu, data, 0);
+		put_online_cpus_atomic();
 		return 0;
 	}
 
+	put_online_cpus_atomic();
 	return 1;
 }
 #else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */

^ permalink raw reply related

* [PATCH v6 22/46] crypto: pcrypt - Protect access to cpu_online_mask with get/put_online_cpus()
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

The pcrypt_aead_init_tfm() function access the cpu_online_mask without
disabling CPU hotplug. And it looks like it can afford to sleep, so use
the get/put_online_cpus() APIs to protect against CPU hotplug.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 crypto/pcrypt.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index b2c99dc..10f64e2 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -280,12 +280,16 @@ static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm)
 
 	ictx->tfm_count++;
 
+	get_online_cpus();
+
 	cpu_index = ictx->tfm_count % cpumask_weight(cpu_online_mask);
 
 	ctx->cb_cpu = cpumask_first(cpu_online_mask);
 	for (cpu = 0; cpu < cpu_index; cpu++)
 		ctx->cb_cpu = cpumask_next(ctx->cb_cpu, cpu_online_mask);
 
+	put_online_cpus();
+
 	cipher = crypto_spawn_aead(crypto_instance_ctx(inst));
 
 	if (IS_ERR(cipher))

^ permalink raw reply related

* [PATCH v6 23/46] infiniband: ehca: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: Roland Dreier <roland@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 drivers/infiniband/hw/ehca/ehca_irq.c |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 8615d7c..d61936c 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -43,6 +43,7 @@
 
 #include <linux/slab.h>
 #include <linux/smpboot.h>
+#include <linux/cpu.h>
 
 #include "ehca_classes.h"
 #include "ehca_irq.h"
@@ -653,6 +654,9 @@ void ehca_tasklet_eq(unsigned long data)
 	ehca_process_eq((struct ehca_shca*)data, 1);
 }
 
+/*
+ * Must be called under get_online_cpus_atomic() and put_online_cpus_atomic().
+ */
 static int find_next_online_cpu(struct ehca_comp_pool *pool)
 {
 	int cpu;
@@ -703,6 +707,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
 	int cq_jobs;
 	unsigned long flags;
 
+	get_online_cpus_atomic();
 	cpu_id = find_next_online_cpu(pool);
 	BUG_ON(!cpu_online(cpu_id));
 
@@ -720,6 +725,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
 		BUG_ON(!cct || !thread);
 	}
 	__queue_comp_task(__cq, cct, thread);
+	put_online_cpus_atomic();
 }
 
 static void run_comp_task(struct ehca_cpu_comp_task *cct)
@@ -759,6 +765,7 @@ static void comp_task_park(unsigned int cpu)
 	list_splice_init(&cct->cq_list, &list);
 	spin_unlock_irq(&cct->task_lock);
 
+	get_online_cpus_atomic();
 	cpu = find_next_online_cpu(pool);
 	target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
 	thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
@@ -768,6 +775,7 @@ static void comp_task_park(unsigned int cpu)
 		__queue_comp_task(cq, target, thread);
 	}
 	spin_unlock_irq(&target->task_lock);
+	put_online_cpus_atomic();
 }
 
 static void comp_task_stop(unsigned int cpu, bool online)

^ permalink raw reply related

* [PATCH v6 24/46] [SCSI] fcoe: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: Robert Love <robert.w.love@intel.com>
Cc: "James E.J. Bottomley" <JBottomley@parallels.com>
Cc: devel@open-fcoe.org
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 drivers/scsi/fcoe/fcoe.c |    7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 666b7ac..c971a17 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1475,6 +1475,7 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
 	 * was originated, otherwise select cpu using rx exchange id
 	 * or fcoe_select_cpu().
 	 */
+	get_online_cpus_atomic();
 	if (ntoh24(fh->fh_f_ctl) & FC_FC_EX_CTX)
 		cpu = ntohs(fh->fh_ox_id) & fc_cpu_mask;
 	else {
@@ -1484,8 +1485,10 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
 			cpu = ntohs(fh->fh_rx_id) & fc_cpu_mask;
 	}
 
-	if (cpu >= nr_cpu_ids)
+	if (cpu >= nr_cpu_ids) {
+		put_online_cpus_atomic();
 		goto err;
+	}
 
 	fps = &per_cpu(fcoe_percpu, cpu);
 	spin_lock(&fps->fcoe_rx_list.lock);
@@ -1505,6 +1508,7 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
 		spin_lock(&fps->fcoe_rx_list.lock);
 		if (!fps->thread) {
 			spin_unlock(&fps->fcoe_rx_list.lock);
+			put_online_cpus_atomic();
 			goto err;
 		}
 	}
@@ -1526,6 +1530,7 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
 	if (fps->thread->state == TASK_INTERRUPTIBLE)
 		wake_up_process(fps->thread);
 	spin_unlock(&fps->fcoe_rx_list.lock);
+	put_online_cpus_atomic();
 
 	return 0;
 err:

^ permalink raw reply related

* [PATCH v6 25/46] staging: octeon: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: David Daney <david.daney@cavium.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 drivers/staging/octeon/ethernet-rx.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 34afc16..8588b4d 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -36,6 +36,7 @@
 #include <linux/prefetch.h>
 #include <linux/ratelimit.h>
 #include <linux/smp.h>
+#include <linux/cpu.h>
 #include <linux/interrupt.h>
 #include <net/dst.h>
 #ifdef CONFIG_XFRM
@@ -97,6 +98,7 @@ static void cvm_oct_enable_one_cpu(void)
 		return;
 
 	/* ... if a CPU is available, Turn on NAPI polling for that CPU.  */
+	get_online_cpus_atomic();
 	for_each_online_cpu(cpu) {
 		if (!cpu_test_and_set(cpu, core_state.cpu_state)) {
 			v = smp_call_function_single(cpu, cvm_oct_enable_napi,
@@ -106,6 +108,7 @@ static void cvm_oct_enable_one_cpu(void)
 			break;
 		}
 	}
+	put_online_cpus_atomic();
 }
 
 static void cvm_oct_no_more_work(void)

^ permalink raw reply related

* [PATCH v6 26/46] x86: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Srivatsa S. Bhat @ 2013-02-18 12:41 UTC (permalink / raw)
  To: tglx, peterz, tj, oleg, paulmck, rusty, mingo, akpm, namhyung
  Cc: linux-arch, linux, nikunj, linux-pm, fweisbec, linux-doc,
	linux-kernel, rostedt, xiaoguangrong, rjw, sbw, wangyun,
	srivatsa.bhat, netdev, vincent.guittot, walken, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20130218123714.26245.61816.stgit@srivatsabhat.in.ibm.com>

Once stop_machine() is gone from the CPU offline path, we won't be able to
depend on preempt_disable() or local_irq_disable() to prevent CPUs from
going offline from under us.

Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
while invoking from atomic context.

Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Daniel J Blueman <daniel@numascale-asia.com>
Cc: Steffen Persvold <sp@numascale.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: linux-edac@vger.kernel.org
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 arch/x86/include/asm/ipi.h               |    5 +++++
 arch/x86/kernel/apic/apic_flat_64.c      |   10 ++++++++++
 arch/x86/kernel/apic/apic_numachip.c     |    5 +++++
 arch/x86/kernel/apic/es7000_32.c         |    5 +++++
 arch/x86/kernel/apic/io_apic.c           |    7 +++++--
 arch/x86/kernel/apic/ipi.c               |   10 ++++++++++
 arch/x86/kernel/apic/x2apic_cluster.c    |    4 ++++
 arch/x86/kernel/apic/x2apic_uv_x.c       |    4 ++++
 arch/x86/kernel/cpu/mcheck/therm_throt.c |    4 ++--
 arch/x86/mm/tlb.c                        |   14 +++++++-------
 10 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 615fa90..112249c 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -20,6 +20,7 @@
  * Subject to the GNU Public License, v.2
  */
 
+#include <linux/cpu.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
 #include <asm/smp.h>
@@ -131,18 +132,22 @@ extern int no_broadcast;
 
 static inline void __default_local_send_IPI_allbutself(int vector)
 {
+	get_online_cpus_atomic();
 	if (no_broadcast || vector == NMI_VECTOR)
 		apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
 	else
 		__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
+	put_online_cpus_atomic();
 }
 
 static inline void __default_local_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	if (no_broadcast || vector == NMI_VECTOR)
 		apic->send_IPI_mask(cpu_online_mask, vector);
 	else
 		__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
+	put_online_cpus_atomic();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 00c77cf..8207ade 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
+#include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
@@ -92,6 +93,8 @@ static void flat_send_IPI_allbutself(int vector)
 #else
 	int hotplug = 0;
 #endif
+
+	get_online_cpus_atomic();
 	if (hotplug || vector == NMI_VECTOR) {
 		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
 			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
@@ -105,16 +108,19 @@ static void flat_send_IPI_allbutself(int vector)
 		__default_send_IPI_shortcut(APIC_DEST_ALLBUT,
 					    vector, apic->dest_logical);
 	}
+	put_online_cpus_atomic();
 }
 
 static void flat_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	if (vector == NMI_VECTOR) {
 		flat_send_IPI_mask(cpu_online_mask, vector);
 	} else {
 		__default_send_IPI_shortcut(APIC_DEST_ALLINC,
 					    vector, apic->dest_logical);
 	}
+	put_online_cpus_atomic();
 }
 
 static unsigned int flat_get_apic_id(unsigned long x)
@@ -255,12 +261,16 @@ static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
 
 static void physflat_send_IPI_allbutself(int vector)
 {
+	get_online_cpus_atomic();
 	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+	put_online_cpus_atomic();
 }
 
 static void physflat_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	physflat_send_IPI_mask(cpu_online_mask, vector);
+	put_online_cpus_atomic();
 }
 
 static int physflat_probe(void)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9c2aa89..7d19c1d 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
+#include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -131,15 +132,19 @@ static void numachip_send_IPI_allbutself(int vector)
 	unsigned int this_cpu = smp_processor_id();
 	unsigned int cpu;
 
+	get_online_cpus_atomic();
 	for_each_online_cpu(cpu) {
 		if (cpu != this_cpu)
 			numachip_send_IPI_one(cpu, vector);
 	}
+	put_online_cpus_atomic();
 }
 
 static void numachip_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	numachip_send_IPI_mask(cpu_online_mask, vector);
+	put_online_cpus_atomic();
 }
 
 static void numachip_send_IPI_self(int vector)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 0874799..ddf2995 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -45,6 +45,7 @@
 #include <linux/gfp.h>
 #include <linux/nmi.h>
 #include <linux/smp.h>
+#include <linux/cpu.h>
 #include <linux/io.h>
 
 #include <asm/apicdef.h>
@@ -412,12 +413,16 @@ static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
 
 static void es7000_send_IPI_allbutself(int vector)
 {
+	get_online_cpus_atomic();
 	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+	put_online_cpus_atomic();
 }
 
 static void es7000_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	es7000_send_IPI_mask(cpu_online_mask, vector);
+	put_online_cpus_atomic();
 }
 
 static int es7000_apic_id_registered(void)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d39..ca1c2a5 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/cpu.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
 #include <linux/compiler.h>
@@ -1788,13 +1789,13 @@ __apicdebuginit(void) print_local_APICs(int maxcpu)
 	if (!maxcpu)
 		return;
 
-	preempt_disable();
+	get_online_cpus_atomic();
 	for_each_online_cpu(cpu) {
 		if (cpu >= maxcpu)
 			break;
 		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
 	}
-	preempt_enable();
+	put_online_cpus_atomic();
 }
 
 __apicdebuginit(void) print_PIC(void)
@@ -2209,6 +2210,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 {
 	cpumask_var_t cleanup_mask;
 
+	get_online_cpus_atomic();
 	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
 		unsigned int i;
 		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
@@ -2219,6 +2221,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 		free_cpumask_var(cleanup_mask);
 	}
 	cfg->move_in_progress = 0;
+	put_online_cpus_atomic();
 }
 
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf..c65aa77 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -29,12 +29,14 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
 	 * to an arbitrary mask, so I do a unicast to each CPU instead.
 	 * - mbligh
 	 */
+	get_online_cpus_atomic();
 	local_irq_save(flags);
 	for_each_cpu(query_cpu, mask) {
 		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
 				query_cpu), vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
+	put_online_cpus_atomic();
 }
 
 void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
@@ -46,6 +48,7 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 
 	/* See Hack comment above */
 
+	get_online_cpus_atomic();
 	local_irq_save(flags);
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
@@ -54,6 +57,7 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 				 query_cpu), vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
+	put_online_cpus_atomic();
 }
 
 #ifdef CONFIG_X86_32
@@ -70,12 +74,14 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 	 * should be modified to do 1 message per cluster ID - mbligh
 	 */
 
+	get_online_cpus_atomic();
 	local_irq_save(flags);
 	for_each_cpu(query_cpu, mask)
 		__default_send_IPI_dest_field(
 			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
 			vector, apic->dest_logical);
 	local_irq_restore(flags);
+	put_online_cpus_atomic();
 }
 
 void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
@@ -87,6 +93,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 
 	/* See Hack comment above */
 
+	get_online_cpus_atomic();
 	local_irq_save(flags);
 	for_each_cpu(query_cpu, mask) {
 		if (query_cpu == this_cpu)
@@ -96,6 +103,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
 			vector, apic->dest_logical);
 		}
 	local_irq_restore(flags);
+	put_online_cpus_atomic();
 }
 
 /*
@@ -109,10 +117,12 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
 	if (WARN_ONCE(!mask, "empty IPI mask"))
 		return;
 
+	get_online_cpus_atomic();
 	local_irq_save(flags);
 	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
 	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
 	local_irq_restore(flags);
+	put_online_cpus_atomic();
 }
 
 void default_send_IPI_allbutself(int vector)
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index c88baa4..cb08e6b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -88,12 +88,16 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
+	get_online_cpus_atomic();
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+	put_online_cpus_atomic();
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+	put_online_cpus_atomic();
 }
 
 static int
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8cfade9..cc469a3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -244,15 +244,19 @@ static void uv_send_IPI_allbutself(int vector)
 	unsigned int this_cpu = smp_processor_id();
 	unsigned int cpu;
 
+	get_online_cpus_atomic();
 	for_each_online_cpu(cpu) {
 		if (cpu != this_cpu)
 			uv_send_IPI_one(cpu, vector);
 	}
+	put_online_cpus_atomic();
 }
 
 static void uv_send_IPI_all(int vector)
 {
+	get_online_cpus_atomic();
 	uv_send_IPI_mask(cpu_online_mask, vector);
+	put_online_cpus_atomic();
 }
 
 static int uv_apic_id_valid(int apicid)
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 47a1870..d128ba4 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -82,13 +82,13 @@ static ssize_t therm_throt_device_show_##event##_##name(		\
 	unsigned int cpu = dev->id;					\
 	ssize_t ret;							\
 									\
-	preempt_disable();	/* CPU hotplug */			\
+	get_online_cpus_atomic();	/* CPU hotplug */		\
 	if (cpu_online(cpu)) {						\
 		ret = sprintf(buf, "%lu\n",				\
 			      per_cpu(thermal_state, cpu).event.name);	\
 	} else								\
 		ret = 0;						\
-	preempt_enable();						\
+	put_online_cpus_atomic();					\
 									\
 	return ret;							\
 }
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 13a6b29..2c3ec76 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -147,12 +147,12 @@ void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
 
-	preempt_disable();
+	get_online_cpus_atomic();
 
 	local_flush_tlb();
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
-	preempt_enable();
+	put_online_cpus_atomic();
 }
 
 /*
@@ -187,7 +187,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	unsigned long addr;
 	unsigned act_entries, tlb_entries = 0;
 
-	preempt_disable();
+	get_online_cpus_atomic();
 	if (current->active_mm != mm)
 		goto flush_all;
 
@@ -225,21 +225,21 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 		if (cpumask_any_but(mm_cpumask(mm),
 				smp_processor_id()) < nr_cpu_ids)
 			flush_tlb_others(mm_cpumask(mm), mm, start, end);
-		preempt_enable();
+		put_online_cpus_atomic();
 		return;
 	}
 
 flush_all:
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
-	preempt_enable();
+	put_online_cpus_atomic();
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 {
 	struct mm_struct *mm = vma->vm_mm;
 
-	preempt_disable();
+	get_online_cpus_atomic();
 
 	if (current->active_mm == mm) {
 		if (current->mm)
@@ -251,7 +251,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
 		flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
 
-	preempt_enable();
+	put_online_cpus_atomic();
 }
 
 static void do_flush_tlb_all(void *info)

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox