Re: [PATCH 1/11] Add generic helpers for arch IPI function calls

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Paul E. McKenney" <paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
To: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: Jens Axboe <jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>,
	linux-arch-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	npiggin-l3A5Bk7waGM@public.gmane.org,
	torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org,
	peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org,
	sam-uyr5N9Q2VtJg9hUCZPvPmw@public.gmane.org
Subject: Re: [PATCH 1/11] Add generic helpers for arch IPI function calls
Date: Sat, 26 Apr 2008 17:58:16 -0700	[thread overview]
Message-ID: <20080427005816.GB21687@linux.vnet.ibm.com> (raw)
In-Reply-To: <20080425231100.b93a1601.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>

On Fri, Apr 25, 2008 at 11:11:00PM -0700, Andrew Morton wrote:
> > On Tue, 22 Apr 2008 20:50:17 +0200 Jens Axboe <jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> wrote:
> > This adds kernel/smp.c which contains helpers for IPI function calls. In
> > addition to supporting the existing smp_call_function() in a more efficient
> > manner, it also adds a more scalable variant called smp_call_function_single()
> > for calling a given function on a single CPU only.

Where is the new smp_call_function() defined?  I see only the declaration.

Probably just blind...

See comments interspersed below.

> > The core of this is based on the x86-64 patch from Nick Piggin, lots of
> > changes since then. "Alan D. Brunelle" <Alan.Brunelle-VXdhtT5mjnY@public.gmane.org> has
> > contributed lots of fixes and suggestions as well.
> > 
> > Acked-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
> > Signed-off-by: Jens Axboe <jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
> > ---
> >  arch/Kconfig        |    3 +
> >  include/linux/smp.h |   27 ++++-
> >  init/main.c         |    3 +
> >  kernel/Makefile     |    1 +
> >  kernel/smp.c        |  366 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  5 files changed, 398 insertions(+), 2 deletions(-)
> >  create mode 100644 kernel/smp.c
> > 
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 694c9af..a5a0184 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -36,3 +36,6 @@ config HAVE_KPROBES
> >  
> >  config HAVE_KRETPROBES
> >  	def_bool n
> > +
> > +config USE_GENERIC_SMP_HELPERS
> > +	def_bool n
> > diff --git a/include/linux/smp.h b/include/linux/smp.h
> > index 55232cc..4a5418b 100644
> > --- a/include/linux/smp.h
> > +++ b/include/linux/smp.h
> > @@ -7,9 +7,19 @@
> >   */
> >  
> >  #include <linux/errno.h>
> > +#include <linux/list.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/cpumask.h>
> >  
> >  extern void cpu_idle(void);
> >  
> > +struct call_single_data {
> > +	struct list_head list;
> > +	void (*func) (void *info);
> > +	void *info;
> > +	unsigned int flags;
> > +};
> > +
> >  #ifdef CONFIG_SMP
> >  
> >  #include <linux/preempt.h>
> > @@ -53,9 +63,23 @@ extern void smp_cpus_done(unsigned int max_cpus);
> >   * Call a function on all other processors
> >   */
> >  int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
> > -
> > +int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
> > +				int wait);
> >  int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
> >  				int retry, int wait);
> > +void __smp_call_function_single(int cpuid, struct call_single_data *data);
> > +
> > +/*
> > + * Generic and arch helpers
> > + */
> > +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
> > +void generic_smp_call_function_single_interrupt(void);
> > +void generic_smp_call_function_interrupt(void);
> > +void init_call_single_data(void);
> > +void arch_send_call_function_single_ipi(int cpu);
> > +void arch_send_call_function_ipi(cpumask_t mask);
> > +extern spinlock_t call_function_lock;
> > +#endif
> 
> Add a #else here ...
> 
> > diff --git a/init/main.c b/init/main.c
> > index 833a67d..0b7578c 100644
> > --- a/init/main.c
> > +++ b/init/main.c
> > @@ -773,6 +773,9 @@ static void __init do_pre_smp_initcalls(void)
> >  {
> >  	extern int spawn_ksoftirqd(void);
> >  
> > +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
> > +	init_call_single_data();
> > +#endif
> 
> and we can lose this ifdef here.
> 
> Also, init_call_single_data() is __cpuinit but isn't declared that way. 
> There have been rare occasions (FRV was one) where this matters - iirc the
> compiler was emitting a short-relative-addressing-form instruction which
> which wasn't able to branch far enough once things were linked.  We hae
> this problem in eight zillion other places, of course.  And it's a pita to
> go adding __cpunit etc to the declaration because the compiler usually
> won't tell us when it gets out of sync with reality.  So we could leave he
> code as-is and wait for stuff to break :(
> 
> 
> >  	migration_init();
> >  	spawn_ksoftirqd();
> >  	if (!nosoftlockup)
> > diff --git a/kernel/Makefile b/kernel/Makefile
> > index 6c5f081..7e275d4 100644
> > --- a/kernel/Makefile
> > +++ b/kernel/Makefile
> > @@ -28,6 +28,7 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
> >  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
> >  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
> >  obj-$(CONFIG_SMP) += cpu.o spinlock.o
> > +obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
> >  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
> >  obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
> >  obj-$(CONFIG_UID16) += uid16.o
> > diff --git a/kernel/smp.c b/kernel/smp.c
> > new file mode 100644
> > index 0000000..a177a0d
> > --- /dev/null
> > +++ b/kernel/smp.c
> > @@ -0,0 +1,366 @@
> > +/*
> > + * Generic helpers for smp ipi calls
> > + *
> > + * (C) Jens Axboe <jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org> 2008
> > + *
> > + */
> > +#include <linux/init.h>
> > +#include <linux/module.h>
> > +#include <linux/percpu.h>
> > +#include <linux/rcupdate.h>
> > +#include <linux/smp.h>
> > +
> > +static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
> > +static LIST_HEAD(call_function_queue);
> > +__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
> > +
> > +enum {
> > +	CSD_FLAG_WAIT		= 0x01,
> > +	CSD_FLAG_ALLOC		= 0x02,
> > +	CSD_FLAG_FALLBACK	= 0x04,
> > +};
> > +
> > +struct call_function_data {
> > +	struct call_single_data csd;
> > +	spinlock_t lock;
> > +	unsigned int refs;
> > +	cpumask_t cpumask;
> > +	struct rcu_head rcu_head;
> > +};
> > +
> > +struct call_single_queue {
> > +	struct list_head list;
> > +	spinlock_t lock;
> > +};
> > +
> > +static struct call_function_data cfd_fallback;
> > +static unsigned long cfd_fallback_used;
> > +
> > +void __cpuinit init_call_single_data(void)
> > +{
> > +	int i;
> > +
> > +	for_each_cpu_mask(i, cpu_possible_map) {
> 
> for_each_possible_cpu()?
> 
> Do we _have_ to consider all possible CPUs here?  That can be much larger
> than num_online_cpus.
> 
> > +		struct call_single_queue *q = &per_cpu(call_single_queue, i);
> > +
> > +		spin_lock_init(&q->lock);
> > +		INIT_LIST_HEAD(&q->list);
> > +	}
> > +}
> > +
> > +static inline void csd_flag_wait(struct call_single_data *data)
> > +{
> > +	/* Wait for response */
> > +	do {
> > +		/*
> > +		 * We need to see the flags store in the IPI handler
> > +		 */
> > +		smp_mb();

But don't we need the smp_mb() -after- the CSD_FLAG_WAIT check?

We want the caller to see the effects of the on-other-CPU-called
function upon return, correct?

> > +		if (!(data->flags & CSD_FLAG_WAIT))
> > +			break;
> > +		cpu_relax();
> > +	} while (1);
> > +}
> 
> -ETOOLAGETOINLINE?
> 
> > +/*
> > + * Insert a previously allocated call_single_data element for execution
> > + * on the given CPU. data must already have ->func, ->info, and ->flags set.
> > + */
> > +static void generic_exec_single(int cpu, struct call_single_data *data)
> > +{
> > +	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
> > +	int wait = data->flags & CSD_FLAG_WAIT, ipi;
> > +	unsigned long flags;
> > +
> > +	spin_lock_irqsave(&dst->lock, flags);
> > +	ipi = list_empty(&dst->list);
> > +	list_add_tail(&data->list, &dst->list);
> > +	spin_unlock_irqrestore(&dst->lock, flags);
> > +
> > +	if (ipi)
> > +		arch_send_call_function_single_ipi(cpu);
> > +
> > +	if (wait)
> > +		csd_flag_wait(data);
> > +}
> 
> Is this a must-be-called-with-local-interrupts-enable function?  (It
> doesn't say so in the covering comment) If so, a runtime check for that
> would be needed - we screw that up regularly.  Ditto any other places where
> this applies.
> 
> > +static void rcu_free_call_data(struct rcu_head *head)
> > +{
> > +	struct call_function_data *cfd;
> > +
> > +	cfd = container_of(head, struct call_function_data, rcu_head);
> > +	kfree(cfd);
> > +}
> > +
> > +static void call_func_data_free(struct call_function_data *data)
> > +{
> > +	if (data->csd.flags & CSD_FLAG_ALLOC)
> > +		call_rcu(&data->rcu_head, rcu_free_call_data);
> > +	else
> > +		clear_bit_unlock(0, &cfd_fallback_used);
> > +}
> 
> Let's cc Mr RCU - he reviews well and often finds problems.

Well, one question...  Is it really OK to clear this bit immediately?
Why don't you need to wait for a grace period to elapse before freeing it?

(OK, I eventually saw why you can't wait for a grace period, but you
still seem to have a bug...  See interspersed below...)

> > +/*
> > + * Invoked by arch to handle an IPI for call function. Must be called with
> > + * interrupts disabled.
> 
> A runtime check would be nice.  The get_cpu() will give us partial coverage
> but won't detect irqs-on-inside-spinlock state.
> 
> > + */
> > +void generic_smp_call_function_interrupt(void)
> > +{
> > +	struct list_head *pos;
> > +	int cpu = get_cpu();
> > +
> > +	/*
> > +	 * It's ok to use list_for_each_rcu() here even though we may delete
> > +	 * 'pos', since list_del_rcu() doesn't clear ->next
> > +	 */
> > +	rcu_read_lock();
> 
> Sigh.  irqs are disabled, so this is a waste of CPU cycles.  With some of
> our many RCU flavours, at least.

To safely omit the rcu_read_lock(), Jens would need to use the in-flight
call_rcu_sched() instead of call_rcu().  In addition, if we ever get
threaded interrupts, this interrupt handler would need IRQ_NODELAY.

So keeping the rcu_read_lock() woud be more future-proof...

> > +	list_for_each_rcu(pos, &call_function_queue) {
> > +		struct call_function_data *data;
> > +		int refs;
> > +
> > +		data = list_entry(pos, struct call_function_data, csd.list);

How about using list_for_each_entry_rcu() instead?

	list_for_each_entry_rcu(pos, &call_function_queue, csd.list) {
		int refs;

And then move the definition of "data" up to replace that of "pos".

A couple fewer lines and one fewer local variable.  ;-)

> > +		if (!cpu_isset(cpu, data->cpumask))
> > +			continue;

Need an smp_mb() here, at least in the case where you are using the
fallback queue element.  See my comments in smp_call_function_mask()
for the justification -- the key point is that if the CPU sees itself
in the mask, it also needs to see the data element all filled out.

See below for a scenario requiring this.

Such a memory barrier is -not- needed for the normally allocated
queue elements.

But memory barriers are a bit ugly...  How about instead acquiring the
data->lock around the cpu_isset() check above?

> > +
> > +		data->csd.func(data->csd.info);
> > +
> > +		spin_lock(&data->lock);
> > +		cpu_clear(cpu, data->cpumask);
> > +		WARN_ON(data->refs == 0);
> > +		data->refs--;
> > +		refs = data->refs;
> > +		spin_unlock(&data->lock);
> > +
> > +		if (refs)
> > +			continue;
> > +
> > +		WARN_ON(cpus_weight(data->cpumask));
> 
> !cpus_empty()
> 
> > +		spin_lock(&call_function_lock);
> > +		list_del_rcu(&data->csd.list);
> > +		spin_unlock(&call_function_lock);
> > +
> > +		if (data->csd.flags & CSD_FLAG_WAIT) {
> > +			smp_wmb();
> 
> It's nice to comment open-coded barriers.
> 
> > +			data->csd.flags &= ~CSD_FLAG_WAIT;
> > +		} else
> > +			call_func_data_free(data);

And the above statements are one reason that I believe you need to wait
for a grace period even when using the static fallback element.  Here
is the sequence of events that I am worried about:

o	CPU 0 is the last CPU to invoke a given queue element, and is just
	about to list_del_rcu() it.

o	CPU 1 gets the interrupt (but already did this block last time).
	It nevertheless finds the queue element, because CPU 0 has not
	yet list_del_rcu() it.

	CPU 1 is just about to execute the: 
	
		"if (!cpu_isset(cpu, data->cpumask))"

o	CPU 0 now does the list_del_rcu() that CPU 1 already has a
	reference to, and eventually invokes the call_func_data_free().

o	call_func_data_free() sees the CSD_FLAG_ALLOC flag set, and
	therefore simply clears the bit, without waiting for a grace
	period.

o	The queue element is eligible to be reused.  CPU 0 therefore
	starts filling in data.  Note that both the compiler and the
	CPU are free to fully scramble the order of the queue element's
	initialization.

o	CPU 1 resumes execution, sees the half-filled-in queue element,
	which might have CPU 1's bit set but garbage otherwise.

	Life gets hard.  For example, CPU 1 might end up calling the
	wrong function, or the right function with the wrong argument,
	and so on.

Or am I missing something here?

Well, one thing that I was missing is that you are spinning waiting for
the element to become free, so any such grace period would never terminate.

Blech.  Some alternatives:

o	Use memory barriers.  See my comments in smp_call_function_mask()
	and the comment above.

o	Move the check of the bitmask under the per-element lock.
	Then the initialization sequence must also acquire the lock,
	which means that the fallback element must have its lock
	initialized at boot-up.

Other thoughts?

> > +	}
> > +	rcu_read_unlock();
> > +
> > +	put_cpu();
> > +}
> > +
> > +/*
> > + * Invoked by arch to handle an IPI for call function single. Must be called
> > + * from the arch with interrupts disabled.
> 
> runtime check?
> 
> > + */
> > +void generic_smp_call_function_single_interrupt(void)
> > +{
> > +	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
> > +	LIST_HEAD(list);
> > +
> > +	smp_mb();
> 
> Unclear why this is here - comment?

Got me on that one...  Forcing ordering between the initialization of
"q" and the list_empty() check?  Don't immediately see why that is
needed.

> > +	while (!list_empty(&q->list)) {
> > +		unsigned int data_flags;
> > +
> > +		spin_lock(&q->lock);
> > +		list_replace_init(&q->list, &list);
> > +		spin_unlock(&q->lock);
> > +
> > +		while (!list_empty(&list)) {
> > +			struct call_single_data *data;
> > +
> > +			data = list_entry(list.next, struct call_single_data,
> > +						list);
> > +			list_del(&data->list);
> > +
> > +			/*
> > +			 * 'data' can be invalid after this call if
> > +			 * flags == 0 (when called through
> > +			 * generic_exec_single(), so save them away before
> > +			 * making the call.
> > +			 */
> > +			data_flags = data->flags;
> > +
> > +			data->func(data->info);
> > +
> > +			if (data_flags & CSD_FLAG_WAIT) {
> > +				smp_wmb();

This one, as well.  Looks like you want to make sure that the above
function call's writes are seen before the following flag-clearing.

> > +				data->flags &= ~CSD_FLAG_WAIT;
> > +			} else if (data_flags & CSD_FLAG_ALLOC)
> > +				kfree(data);
> > +			else if (data_flags & CSD_FLAG_FALLBACK)
> > +				clear_bit_unlock(0, &cfd_fallback_used);
> > +		}
> > +		smp_mb();

Can't say that I understand what the above memory barrier is doing...

> > +	}
> > +}
> > +
> > +/*
> > + * smp_call_function_single - Run a function on a specific CPU
> > + * @func: The function to run. This must be fast and non-blocking.
> > + * @info: An arbitrary pointer to pass to the function.
> > + * @retry: Unused
> > + * @wait: If true, wait until function has completed on other CPUs.
> > + *
> > + * Returns 0 on success, else a negative status code.
> > + *
> 
> stray line here.
> 
> > + */
> 
> Some of the exported functions have kerneldoc, others do not.
> 
> > +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
> > +			     int retry, int wait)
> > +{
> > +	unsigned long flags;
> > +	/* prevent preemption and reschedule on another processor */
> > +	int me = get_cpu();
> > +
> > +	/* Can deadlock when called with interrupts disabled */
> > +	WARN_ON(wait && irqs_disabled());
> 
> Perhaps that addresses above comments?
> 
> > +	if (cpu == me) {
> > +		local_irq_save(flags);
> > +		func(info);
> > +		local_irq_restore(flags);
> > +	} else {
> > +		struct call_single_data *data;
> > +
> > +		if (wait) {
> > +			struct call_single_data d;
> > +
> > +			data = &d;
> > +			data->flags = CSD_FLAG_WAIT;
> > +		} else {
> > +			data = kmalloc(sizeof(*data), GFP_ATOMIC);
> > +			if (data)
> > +				data->flags = CSD_FLAG_ALLOC;
> > +			else {
> > +				while (test_and_set_bit_lock(0,
> > +							&cfd_fallback_used))
> 
> cfd_fallback_used is rather mysterious - it could do with a comment at its
> definition site.  I'm wondering if we should/could be doing bit_spin_lock
> on it.  But if we did that, it could just be a spinlock.
> 
> > +					cpu_relax();
> > +
> > +				data = &cfd_fallback.csd;
> > +				data->flags = CSD_FLAG_FALLBACK;
> > +			}
> > +		}
> > +
> > +		data->func = func;
> > +		data->info = info;
> > +		generic_exec_single(cpu, data);
> > +	}
> > +
> > +	put_cpu();
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(smp_call_function_single);
> > +
> > +/**
> > + * __smp_call_function_single(): Run a function on another CPU
> > + * @cpu: The CPU to run on.
> > + * @data: Pre-allocated and setup data structure
> > + *
> > + * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
> > + * data structure. Useful for embedding @data inside other structures, for
> > + * instance.
> > + *
> > + */
> > +void __smp_call_function_single(int cpu, struct call_single_data *data)
> > +{
> > +	generic_exec_single(cpu, data);
> > +}
> > +
> > +/**
> > + * smp_call_function_mask(): Run a function on a set of other CPUs.
> > + * @mask: The set of cpus to run on.
> > + * @func: The function to run. This must be fast and non-blocking.
> > + * @info: An arbitrary pointer to pass to the function.
> > + * @wait: If true, wait (atomically) until function has completed on other CPUs.
> > + *
> > + * Returns 0 on success, else a negative status code.
> > + *
> > + * If @wait is true, then returns once @func has returned.
> > + *
> > + * You must not call this function with disabled interrupts or from a
> > + * hardware interrupt handler or from a bottom half handler.
> > + */
> > +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> > +			   int wait)
> > +{
> > +	struct call_function_data *data;
> > +	cpumask_t allbutself;
> > +	unsigned long flags;
> > +	int num_cpus;
> > +
> > +	/* Can deadlock when called with interrupts disabled */
> > +	WARN_ON(wait && irqs_disabled());
> 
> Ditto
> 
> > +	allbutself = cpu_online_map;
> > +	cpu_clear(smp_processor_id(), allbutself);
> > +	cpus_and(mask, mask, allbutself);
> > +	num_cpus = cpus_weight(mask);
> > +
> > +	if (!num_cpus)
> > +		return 0;
> > +
> > +	if (wait) {
> > +		struct call_function_data d;
> > +
> > +		data = &d;
> > +		data->csd.flags = CSD_FLAG_WAIT;
> > +	} else {
> > +		data = kmalloc(sizeof(*data), GFP_ATOMIC);
> > +		if (data)
> > +			data->csd.flags = CSD_FLAG_ALLOC;
> > +		else {
> > +			while (test_and_set_bit_lock(0, &cfd_fallback_used))
> > +				cpu_relax();
> > +
> > +			data = &cfd_fallback;
> > +			data->csd.flags = CSD_FLAG_FALLBACK;
> > +		}
> > +	}
> > +
> > +	spin_lock_init(&data->lock);
> > +	data->csd.func = func;
> > +	data->csd.info = info;
> > +	data->refs = num_cpus;

You need an smp_wmb() here.  Otherwise, a CPU that picked up a reference
to the fallback element after having processed it on the first pass could
see the next use of the fallback element half-initialized.

See 

> > +	data->cpumask = mask;
> > +
> > +	spin_lock_irqsave(&call_function_lock, flags);
> > +	list_add_tail_rcu(&data->csd.list, &call_function_queue);
> > +	spin_unlock_irqrestore(&call_function_lock, flags);
> > +
> > +	/* Send a message to all CPUs in the map */
> > +	arch_send_call_function_ipi(mask);
> > +
> > +	/* optionally wait for the CPUs to complete */
> > +	if (wait)
> > +		csd_flag_wait(&data->csd);
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(smp_call_function_mask);
>

WARNING: multiple messages have this Message-ID (diff)

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <jens.axboe@oracle.com>,
	linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
	npiggin@suse.de, torvalds@linux-foundation.org,
	peterz@infradead.org, sam@ravnborg.org
Subject: Re: [PATCH 1/11] Add generic helpers for arch IPI function calls
Date: Sat, 26 Apr 2008 17:58:16 -0700	[thread overview]
Message-ID: <20080427005816.GB21687@linux.vnet.ibm.com> (raw)
Message-ID: <20080427005816.WBXbfDOm9yIKxhHwKwgoB18KtlbzIBHZQwBmJ1eg_EA@z> (raw)
In-Reply-To: <20080425231100.b93a1601.akpm@linux-foundation.org>

On Fri, Apr 25, 2008 at 11:11:00PM -0700, Andrew Morton wrote:
> > On Tue, 22 Apr 2008 20:50:17 +0200 Jens Axboe <jens.axboe@oracle.com> wrote:
> > This adds kernel/smp.c which contains helpers for IPI function calls. In
> > addition to supporting the existing smp_call_function() in a more efficient
> > manner, it also adds a more scalable variant called smp_call_function_single()
> > for calling a given function on a single CPU only.

Where is the new smp_call_function() defined?  I see only the declaration.

Probably just blind...

See comments interspersed below.

> > The core of this is based on the x86-64 patch from Nick Piggin, lots of
> > changes since then. "Alan D. Brunelle" <Alan.Brunelle@hp.com> has
> > contributed lots of fixes and suggestions as well.
> > 
> > Acked-by: Ingo Molnar <mingo@elte.hu>
> > Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> > ---
> >  arch/Kconfig        |    3 +
> >  include/linux/smp.h |   27 ++++-
> >  init/main.c         |    3 +
> >  kernel/Makefile     |    1 +
> >  kernel/smp.c        |  366 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  5 files changed, 398 insertions(+), 2 deletions(-)
> >  create mode 100644 kernel/smp.c
> > 
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 694c9af..a5a0184 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -36,3 +36,6 @@ config HAVE_KPROBES
> >  
> >  config HAVE_KRETPROBES
> >  	def_bool n
> > +
> > +config USE_GENERIC_SMP_HELPERS
> > +	def_bool n
> > diff --git a/include/linux/smp.h b/include/linux/smp.h
> > index 55232cc..4a5418b 100644
> > --- a/include/linux/smp.h
> > +++ b/include/linux/smp.h
> > @@ -7,9 +7,19 @@
> >   */
> >  
> >  #include <linux/errno.h>
> > +#include <linux/list.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/cpumask.h>
> >  
> >  extern void cpu_idle(void);
> >  
> > +struct call_single_data {
> > +	struct list_head list;
> > +	void (*func) (void *info);
> > +	void *info;
> > +	unsigned int flags;
> > +};
> > +
> >  #ifdef CONFIG_SMP
> >  
> >  #include <linux/preempt.h>
> > @@ -53,9 +63,23 @@ extern void smp_cpus_done(unsigned int max_cpus);
> >   * Call a function on all other processors
> >   */
> >  int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
> > -
> > +int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
> > +				int wait);
> >  int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
> >  				int retry, int wait);
> > +void __smp_call_function_single(int cpuid, struct call_single_data *data);
> > +
> > +/*
> > + * Generic and arch helpers
> > + */
> > +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
> > +void generic_smp_call_function_single_interrupt(void);
> > +void generic_smp_call_function_interrupt(void);
> > +void init_call_single_data(void);
> > +void arch_send_call_function_single_ipi(int cpu);
> > +void arch_send_call_function_ipi(cpumask_t mask);
> > +extern spinlock_t call_function_lock;
> > +#endif
> 
> Add a #else here ...
> 
> > diff --git a/init/main.c b/init/main.c
> > index 833a67d..0b7578c 100644
> > --- a/init/main.c
> > +++ b/init/main.c
> > @@ -773,6 +773,9 @@ static void __init do_pre_smp_initcalls(void)
> >  {
> >  	extern int spawn_ksoftirqd(void);
> >  
> > +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
> > +	init_call_single_data();
> > +#endif
> 
> and we can lose this ifdef here.
> 
> Also, init_call_single_data() is __cpuinit but isn't declared that way. 
> There have been rare occasions (FRV was one) where this matters - iirc the
> compiler was emitting a short-relative-addressing-form instruction which
> which wasn't able to branch far enough once things were linked.  We hae
> this problem in eight zillion other places, of course.  And it's a pita to
> go adding __cpunit etc to the declaration because the compiler usually
> won't tell us when it gets out of sync with reality.  So we could leave he
> code as-is and wait for stuff to break :(
> 
> 
> >  	migration_init();
> >  	spawn_ksoftirqd();
> >  	if (!nosoftlockup)
> > diff --git a/kernel/Makefile b/kernel/Makefile
> > index 6c5f081..7e275d4 100644
> > --- a/kernel/Makefile
> > +++ b/kernel/Makefile
> > @@ -28,6 +28,7 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
> >  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
> >  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
> >  obj-$(CONFIG_SMP) += cpu.o spinlock.o
> > +obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
> >  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
> >  obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
> >  obj-$(CONFIG_UID16) += uid16.o
> > diff --git a/kernel/smp.c b/kernel/smp.c
> > new file mode 100644
> > index 0000000..a177a0d
> > --- /dev/null
> > +++ b/kernel/smp.c
> > @@ -0,0 +1,366 @@
> > +/*
> > + * Generic helpers for smp ipi calls
> > + *
> > + * (C) Jens Axboe <jens.axboe@oracle.com> 2008
> > + *
> > + */
> > +#include <linux/init.h>
> > +#include <linux/module.h>
> > +#include <linux/percpu.h>
> > +#include <linux/rcupdate.h>
> > +#include <linux/smp.h>
> > +
> > +static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
> > +static LIST_HEAD(call_function_queue);
> > +__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
> > +
> > +enum {
> > +	CSD_FLAG_WAIT		= 0x01,
> > +	CSD_FLAG_ALLOC		= 0x02,
> > +	CSD_FLAG_FALLBACK	= 0x04,
> > +};
> > +
> > +struct call_function_data {
> > +	struct call_single_data csd;
> > +	spinlock_t lock;
> > +	unsigned int refs;
> > +	cpumask_t cpumask;
> > +	struct rcu_head rcu_head;
> > +};
> > +
> > +struct call_single_queue {
> > +	struct list_head list;
> > +	spinlock_t lock;
> > +};
> > +
> > +static struct call_function_data cfd_fallback;
> > +static unsigned long cfd_fallback_used;
> > +
> > +void __cpuinit init_call_single_data(void)
> > +{
> > +	int i;
> > +
> > +	for_each_cpu_mask(i, cpu_possible_map) {
> 
> for_each_possible_cpu()?
> 
> Do we _have_ to consider all possible CPUs here?  That can be much larger
> than num_online_cpus.
> 
> > +		struct call_single_queue *q = &per_cpu(call_single_queue, i);
> > +
> > +		spin_lock_init(&q->lock);
> > +		INIT_LIST_HEAD(&q->list);
> > +	}
> > +}
> > +
> > +static inline void csd_flag_wait(struct call_single_data *data)
> > +{
> > +	/* Wait for response */
> > +	do {
> > +		/*
> > +		 * We need to see the flags store in the IPI handler
> > +		 */
> > +		smp_mb();

But don't we need the smp_mb() -after- the CSD_FLAG_WAIT check?

We want the caller to see the effects of the on-other-CPU-called
function upon return, correct?

> > +		if (!(data->flags & CSD_FLAG_WAIT))
> > +			break;
> > +		cpu_relax();
> > +	} while (1);
> > +}
> 
> -ETOOLAGETOINLINE?
> 
> > +/*
> > + * Insert a previously allocated call_single_data element for execution
> > + * on the given CPU. data must already have ->func, ->info, and ->flags set.
> > + */
> > +static void generic_exec_single(int cpu, struct call_single_data *data)
> > +{
> > +	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
> > +	int wait = data->flags & CSD_FLAG_WAIT, ipi;
> > +	unsigned long flags;
> > +
> > +	spin_lock_irqsave(&dst->lock, flags);
> > +	ipi = list_empty(&dst->list);
> > +	list_add_tail(&data->list, &dst->list);
> > +	spin_unlock_irqrestore(&dst->lock, flags);
> > +
> > +	if (ipi)
> > +		arch_send_call_function_single_ipi(cpu);
> > +
> > +	if (wait)
> > +		csd_flag_wait(data);
> > +}
> 
> Is this a must-be-called-with-local-interrupts-enable function?  (It
> doesn't say so in the covering comment) If so, a runtime check for that
> would be needed - we screw that up regularly.  Ditto any other places where
> this applies.
> 
> > +static void rcu_free_call_data(struct rcu_head *head)
> > +{
> > +	struct call_function_data *cfd;
> > +
> > +	cfd = container_of(head, struct call_function_data, rcu_head);
> > +	kfree(cfd);
> > +}
> > +
> > +static void call_func_data_free(struct call_function_data *data)
> > +{
> > +	if (data->csd.flags & CSD_FLAG_ALLOC)
> > +		call_rcu(&data->rcu_head, rcu_free_call_data);
> > +	else
> > +		clear_bit_unlock(0, &cfd_fallback_used);
> > +}
> 
> Let's cc Mr RCU - he reviews well and often finds problems.

Well, one question...  Is it really OK to clear this bit immediately?
Why don't you need to wait for a grace period to elapse before freeing it?

(OK, I eventually saw why you can't wait for a grace period, but you
still seem to have a bug...  See interspersed below...)

> > +/*
> > + * Invoked by arch to handle an IPI for call function. Must be called with
> > + * interrupts disabled.
> 
> A runtime check would be nice.  The get_cpu() will give us partial coverage
> but won't detect irqs-on-inside-spinlock state.
> 
> > + */
> > +void generic_smp_call_function_interrupt(void)
> > +{
> > +	struct list_head *pos;
> > +	int cpu = get_cpu();
> > +
> > +	/*
> > +	 * It's ok to use list_for_each_rcu() here even though we may delete
> > +	 * 'pos', since list_del_rcu() doesn't clear ->next
> > +	 */
> > +	rcu_read_lock();
> 
> Sigh.  irqs are disabled, so this is a waste of CPU cycles.  With some of
> our many RCU flavours, at least.

To safely omit the rcu_read_lock(), Jens would need to use the in-flight
call_rcu_sched() instead of call_rcu().  In addition, if we ever get
threaded interrupts, this interrupt handler would need IRQ_NODELAY.

So keeping the rcu_read_lock() woud be more future-proof...

> > +	list_for_each_rcu(pos, &call_function_queue) {
> > +		struct call_function_data *data;
> > +		int refs;
> > +
> > +		data = list_entry(pos, struct call_function_data, csd.list);

How about using list_for_each_entry_rcu() instead?

	list_for_each_entry_rcu(pos, &call_function_queue, csd.list) {
		int refs;

And then move the definition of "data" up to replace that of "pos".

A couple fewer lines and one fewer local variable.  ;-)

> > +		if (!cpu_isset(cpu, data->cpumask))
> > +			continue;

Need an smp_mb() here, at least in the case where you are using the
fallback queue element.  See my comments in smp_call_function_mask()
for the justification -- the key point is that if the CPU sees itself
in the mask, it also needs to see the data element all filled out.

See below for a scenario requiring this.

Such a memory barrier is -not- needed for the normally allocated
queue elements.

But memory barriers are a bit ugly...  How about instead acquiring the
data->lock around the cpu_isset() check above?

> > +
> > +		data->csd.func(data->csd.info);
> > +
> > +		spin_lock(&data->lock);
> > +		cpu_clear(cpu, data->cpumask);
> > +		WARN_ON(data->refs == 0);
> > +		data->refs--;
> > +		refs = data->refs;
> > +		spin_unlock(&data->lock);
> > +
> > +		if (refs)
> > +			continue;
> > +
> > +		WARN_ON(cpus_weight(data->cpumask));
> 
> !cpus_empty()
> 
> > +		spin_lock(&call_function_lock);
> > +		list_del_rcu(&data->csd.list);
> > +		spin_unlock(&call_function_lock);
> > +
> > +		if (data->csd.flags & CSD_FLAG_WAIT) {
> > +			smp_wmb();
> 
> It's nice to comment open-coded barriers.
> 
> > +			data->csd.flags &= ~CSD_FLAG_WAIT;
> > +		} else
> > +			call_func_data_free(data);

And the above statements are one reason that I believe you need to wait
for a grace period even when using the static fallback element.  Here
is the sequence of events that I am worried about:

o	CPU 0 is the last CPU to invoke a given queue element, and is just
	about to list_del_rcu() it.

o	CPU 1 gets the interrupt (but already did this block last time).
	It nevertheless finds the queue element, because CPU 0 has not
	yet list_del_rcu() it.

	CPU 1 is just about to execute the: 
	
		"if (!cpu_isset(cpu, data->cpumask))"

o	CPU 0 now does the list_del_rcu() that CPU 1 already has a
	reference to, and eventually invokes the call_func_data_free().

o	call_func_data_free() sees the CSD_FLAG_ALLOC flag set, and
	therefore simply clears the bit, without waiting for a grace
	period.

o	The queue element is eligible to be reused.  CPU 0 therefore
	starts filling in data.  Note that both the compiler and the
	CPU are free to fully scramble the order of the queue element's
	initialization.

o	CPU 1 resumes execution, sees the half-filled-in queue element,
	which might have CPU 1's bit set but garbage otherwise.

	Life gets hard.  For example, CPU 1 might end up calling the
	wrong function, or the right function with the wrong argument,
	and so on.

Or am I missing something here?

Well, one thing that I was missing is that you are spinning waiting for
the element to become free, so any such grace period would never terminate.

Blech.  Some alternatives:

o	Use memory barriers.  See my comments in smp_call_function_mask()
	and the comment above.

o	Move the check of the bitmask under the per-element lock.
	Then the initialization sequence must also acquire the lock,
	which means that the fallback element must have its lock
	initialized at boot-up.

Other thoughts?

> > +	}
> > +	rcu_read_unlock();
> > +
> > +	put_cpu();
> > +}
> > +
> > +/*
> > + * Invoked by arch to handle an IPI for call function single. Must be called
> > + * from the arch with interrupts disabled.
> 
> runtime check?
> 
> > + */
> > +void generic_smp_call_function_single_interrupt(void)
> > +{
> > +	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
> > +	LIST_HEAD(list);
> > +
> > +	smp_mb();
> 
> Unclear why this is here - comment?

Got me on that one...  Forcing ordering between the initialization of
"q" and the list_empty() check?  Don't immediately see why that is
needed.

> > +	while (!list_empty(&q->list)) {
> > +		unsigned int data_flags;
> > +
> > +		spin_lock(&q->lock);
> > +		list_replace_init(&q->list, &list);
> > +		spin_unlock(&q->lock);
> > +
> > +		while (!list_empty(&list)) {
> > +			struct call_single_data *data;
> > +
> > +			data = list_entry(list.next, struct call_single_data,
> > +						list);
> > +			list_del(&data->list);
> > +
> > +			/*
> > +			 * 'data' can be invalid after this call if
> > +			 * flags == 0 (when called through
> > +			 * generic_exec_single(), so save them away before
> > +			 * making the call.
> > +			 */
> > +			data_flags = data->flags;
> > +
> > +			data->func(data->info);
> > +
> > +			if (data_flags & CSD_FLAG_WAIT) {
> > +				smp_wmb();

This one, as well.  Looks like you want to make sure that the above
function call's writes are seen before the following flag-clearing.

> > +				data->flags &= ~CSD_FLAG_WAIT;
> > +			} else if (data_flags & CSD_FLAG_ALLOC)
> > +				kfree(data);
> > +			else if (data_flags & CSD_FLAG_FALLBACK)
> > +				clear_bit_unlock(0, &cfd_fallback_used);
> > +		}
> > +		smp_mb();

Can't say that I understand what the above memory barrier is doing...

> > +	}
> > +}
> > +
> > +/*
> > + * smp_call_function_single - Run a function on a specific CPU
> > + * @func: The function to run. This must be fast and non-blocking.
> > + * @info: An arbitrary pointer to pass to the function.
> > + * @retry: Unused
> > + * @wait: If true, wait until function has completed on other CPUs.
> > + *
> > + * Returns 0 on success, else a negative status code.
> > + *
> 
> stray line here.
> 
> > + */
> 
> Some of the exported functions have kerneldoc, others do not.
> 
> > +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
> > +			     int retry, int wait)
> > +{
> > +	unsigned long flags;
> > +	/* prevent preemption and reschedule on another processor */
> > +	int me = get_cpu();
> > +
> > +	/* Can deadlock when called with interrupts disabled */
> > +	WARN_ON(wait && irqs_disabled());
> 
> Perhaps that addresses above comments?
> 
> > +	if (cpu == me) {
> > +		local_irq_save(flags);
> > +		func(info);
> > +		local_irq_restore(flags);
> > +	} else {
> > +		struct call_single_data *data;
> > +
> > +		if (wait) {
> > +			struct call_single_data d;
> > +
> > +			data = &d;
> > +			data->flags = CSD_FLAG_WAIT;
> > +		} else {
> > +			data = kmalloc(sizeof(*data), GFP_ATOMIC);
> > +			if (data)
> > +				data->flags = CSD_FLAG_ALLOC;
> > +			else {
> > +				while (test_and_set_bit_lock(0,
> > +							&cfd_fallback_used))
> 
> cfd_fallback_used is rather mysterious - it could do with a comment at its
> definition site.  I'm wondering if we should/could be doing bit_spin_lock
> on it.  But if we did that, it could just be a spinlock.
> 
> > +					cpu_relax();
> > +
> > +				data = &cfd_fallback.csd;
> > +				data->flags = CSD_FLAG_FALLBACK;
> > +			}
> > +		}
> > +
> > +		data->func = func;
> > +		data->info = info;
> > +		generic_exec_single(cpu, data);
> > +	}
> > +
> > +	put_cpu();
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(smp_call_function_single);
> > +
> > +/**
> > + * __smp_call_function_single(): Run a function on another CPU
> > + * @cpu: The CPU to run on.
> > + * @data: Pre-allocated and setup data structure
> > + *
> > + * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
> > + * data structure. Useful for embedding @data inside other structures, for
> > + * instance.
> > + *
> > + */
> > +void __smp_call_function_single(int cpu, struct call_single_data *data)
> > +{
> > +	generic_exec_single(cpu, data);
> > +}
> > +
> > +/**
> > + * smp_call_function_mask(): Run a function on a set of other CPUs.
> > + * @mask: The set of cpus to run on.
> > + * @func: The function to run. This must be fast and non-blocking.
> > + * @info: An arbitrary pointer to pass to the function.
> > + * @wait: If true, wait (atomically) until function has completed on other CPUs.
> > + *
> > + * Returns 0 on success, else a negative status code.
> > + *
> > + * If @wait is true, then returns once @func has returned.
> > + *
> > + * You must not call this function with disabled interrupts or from a
> > + * hardware interrupt handler or from a bottom half handler.
> > + */
> > +int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
> > +			   int wait)
> > +{
> > +	struct call_function_data *data;
> > +	cpumask_t allbutself;
> > +	unsigned long flags;
> > +	int num_cpus;
> > +
> > +	/* Can deadlock when called with interrupts disabled */
> > +	WARN_ON(wait && irqs_disabled());
> 
> Ditto
> 
> > +	allbutself = cpu_online_map;
> > +	cpu_clear(smp_processor_id(), allbutself);
> > +	cpus_and(mask, mask, allbutself);
> > +	num_cpus = cpus_weight(mask);
> > +
> > +	if (!num_cpus)
> > +		return 0;
> > +
> > +	if (wait) {
> > +		struct call_function_data d;
> > +
> > +		data = &d;
> > +		data->csd.flags = CSD_FLAG_WAIT;
> > +	} else {
> > +		data = kmalloc(sizeof(*data), GFP_ATOMIC);
> > +		if (data)
> > +			data->csd.flags = CSD_FLAG_ALLOC;
> > +		else {
> > +			while (test_and_set_bit_lock(0, &cfd_fallback_used))
> > +				cpu_relax();
> > +
> > +			data = &cfd_fallback;
> > +			data->csd.flags = CSD_FLAG_FALLBACK;
> > +		}
> > +	}
> > +
> > +	spin_lock_init(&data->lock);
> > +	data->csd.func = func;
> > +	data->csd.info = info;
> > +	data->refs = num_cpus;

You need an smp_wmb() here.  Otherwise, a CPU that picked up a reference
to the fallback element after having processed it on the first pass could
see the next use of the fallback element half-initialized.

See 

> > +	data->cpumask = mask;
> > +
> > +	spin_lock_irqsave(&call_function_lock, flags);
> > +	list_add_tail_rcu(&data->csd.list, &call_function_queue);
> > +	spin_unlock_irqrestore(&call_function_lock, flags);
> > +
> > +	/* Send a message to all CPUs in the map */
> > +	arch_send_call_function_ipi(mask);
> > +
> > +	/* optionally wait for the CPUs to complete */
> > +	if (wait)
> > +		csd_flag_wait(&data->csd);
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(smp_call_function_mask);
>

next prev parent reply	other threads:[~2008-04-27  0:58 UTC|newest]

Thread overview: 162+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-22 18:50 [PATCH 0/11] Generic smp_call_function() #2 Jens Axboe
2008-04-22 18:50 ` Jens Axboe
     [not found] ` <1208890227-24808-1-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-22 18:50   ` [PATCH 1/11] Add generic helpers for arch IPI function calls Jens Axboe
2008-04-22 18:50     ` Jens Axboe
     [not found]     ` <1208890227-24808-2-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-22 20:17       ` Peter Zijlstra
2008-04-22 20:17         ` Peter Zijlstra
2008-04-23  6:07         ` Jens Axboe
2008-04-23  6:07           ` Jens Axboe
     [not found]           ` <20080423060725.GT12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-23  6:32             ` Peter Zijlstra
2008-04-23  6:32               ` Peter Zijlstra
2008-04-23  7:49               ` Jens Axboe
2008-04-23  7:49                 ` Jens Axboe
     [not found]                 ` <20080423074933.GB12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-23  7:50                   ` Peter Zijlstra
2008-04-23  7:50                     ` Peter Zijlstra
2008-04-24 22:01       ` Russell King
2008-04-24 22:01         ` Russell King
     [not found]         ` <20080424220157.GA26179-f404yB8NqCZvn6HldHNs0ANdhmdF6hFW@public.gmane.org>
2008-04-25  7:18           ` Jens Axboe
2008-04-25  7:18             ` Jens Axboe
     [not found]             ` <20080425071823.GF12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-26  6:28               ` Jeremy Fitzhardinge
2008-04-26  6:28                 ` Jeremy Fitzhardinge
     [not found]                 ` <4812CB99.1070600-TSDbQ3PG+2Y@public.gmane.org>
2008-04-28  7:38                   ` Jes Sorensen
2008-04-28  7:38                     ` Jes Sorensen
2008-04-26  6:11       ` Andrew Morton
2008-04-26  6:11         ` Andrew Morton
2008-04-26  6:11         ` Andrew Morton
     [not found]         ` <20080425231100.b93a1601.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2008-04-26 14:13           ` James Bottomley
2008-04-26 14:13             ` James Bottomley
2008-04-27  0:58           ` Paul E. McKenney [this message]
2008-04-27  0:58             ` Paul E. McKenney
     [not found]             ` <20080427005816.GB21687-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2008-04-27 10:36               ` Jens Axboe
2008-04-27 10:36                 ` Jens Axboe
2008-04-27 10:30           ` Jens Axboe
2008-04-27 10:30             ` Jens Axboe
     [not found]         ` <1209219236.3113.6.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2008-04-28 14:25           ` David Howells
2008-04-28 14:25             ` David Howells
     [not found]             ` <18470.1209392727-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2008-04-28 14:43               ` James Bottomley
2008-04-28 14:43                 ` James Bottomley
2008-04-22 18:50   ` [PATCH 2/11] x86: convert to generic helpers for " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 19:03     ` Linus Torvalds
     [not found]       ` <alpine.LFD.1.10.0804221157220.2779-5CScLwifNT1QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>
2008-04-22 19:12         ` Ingo Molnar
2008-04-22 19:12           ` Ingo Molnar
     [not found]           ` <20080422191213.GA6370-X9Un+BFzKDI@public.gmane.org>
2008-04-22 19:22             ` Linus Torvalds
2008-04-22 19:22               ` Linus Torvalds
2008-04-22 19:26               ` Ingo Molnar
     [not found]                 ` <20080422192601.GB12588-X9Un+BFzKDI@public.gmane.org>
2008-04-22 19:50                   ` Linus Torvalds
2008-04-22 19:50                     ` Linus Torvalds
     [not found]                     ` <alpine.LFD.1.10.0804221244350.2779-5CScLwifNT1QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>
2008-04-23  1:11                       ` Nick Piggin
2008-04-23  1:11                         ` Nick Piggin
     [not found]                         ` <20080423011153.GB17572-B4tOwbsTzaBolqkO4TVVkw@public.gmane.org>
2008-04-23  1:22                           ` Linus Torvalds
2008-04-23  1:22                             ` Linus Torvalds
     [not found]                             ` <alpine.LFD.1.10.0804221817050.2779-5CScLwifNT1QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>
2008-04-23  1:36                               ` Nick Piggin
2008-04-23  1:36                                 ` Nick Piggin
2008-04-23  7:08                           ` Jens Axboe
2008-04-23  7:08                             ` Jens Axboe
2008-04-23 12:54         ` Jens Axboe
2008-04-23 12:54           ` Jens Axboe
     [not found]     ` <1208890227-24808-3-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-26  6:44       ` Jeremy Fitzhardinge
2008-04-26  6:44         ` Jeremy Fitzhardinge
     [not found]         ` <4812CF5B.4080902-TSDbQ3PG+2Y@public.gmane.org>
2008-04-27 10:23           ` Jens Axboe
2008-04-27 10:23             ` Jens Axboe
     [not found]             ` <20080427102335.GS12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-27 15:18               ` Jeremy Fitzhardinge
2008-04-27 15:18                 ` Jeremy Fitzhardinge
2008-04-22 18:50   ` [PATCH 3/11] powerpc: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 18:50   ` [PATCH 4/11] ia64: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 18:50   ` [PATCH 5/11] alpha: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 18:50   ` [PATCH 6/11] arm: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 18:50   ` [PATCH 7/11] m32r: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 18:50   ` [PATCH 8/11] mips: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 23:18     ` Ralf Baechle
2008-04-23  7:18       ` Jens Axboe
2008-04-22 18:50   ` [PATCH 9/11] parisc: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
2008-04-22 18:50   ` [PATCH 10/11] sh: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
     [not found]     ` <1208890227-24808-11-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-25  8:56       ` Paul Mundt
2008-04-25  8:56         ` Paul Mundt
     [not found]         ` <20080425085637.GA30569-M7jkjyW5wf5g9hUCZPvPmw@public.gmane.org>
2008-04-25  9:16           ` Jens Axboe
2008-04-25  9:16             ` Jens Axboe
2008-04-22 18:50   ` [PATCH 11/11] s390: " Jens Axboe
2008-04-22 18:50     ` Jens Axboe
     [not found]     ` <1208890227-24808-12-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-23  7:58       ` Heiko Carstens
2008-04-23  7:58         ` Heiko Carstens
     [not found]         ` <20080423075831.GB15850-Pmgahw53EmNLmI7Nx2oIsGnsbthNF6/HVpNB7YpNyf8@public.gmane.org>
2008-04-23  8:11           ` Jens Axboe
2008-04-23  8:11             ` Jens Axboe
     [not found]             ` <20080423081147.GC12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-23 11:21               ` Jens Axboe
2008-04-23 11:21                 ` Jens Axboe
     [not found]                 ` <20080423112124.GJ12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-23 11:47                   ` Heiko Carstens
2008-04-23 11:47                     ` Heiko Carstens
     [not found]                     ` <20080423114744.GA29422-Pmgahw53EmNLmI7Nx2oIsGnsbthNF6/HVpNB7YpNyf8@public.gmane.org>
2008-04-23 11:54                       ` Jens Axboe
2008-04-23 11:54                         ` Jens Axboe
2008-04-23 12:42                       ` Martin Schwidefsky
2008-04-23 12:42                         ` Martin Schwidefsky
2008-04-23 15:56                         ` Rusty Russell
2008-04-23 15:56                           ` Rusty Russell
  -- strict thread matches above, loose matches on Subject: below --
2008-04-22  7:57 [PATCH 0/11] Generic smp_call_function() and friends Jens Axboe
     [not found] ` <1208851058-8500-1-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-22  7:57   ` [PATCH 1/11] Add generic helpers for arch IPI function calls Jens Axboe
2008-04-22  7:57     ` Jens Axboe
     [not found]     ` <1208851058-8500-2-git-send-email-jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2008-04-22  9:16       ` Avi Kivity
2008-04-22  9:16         ` Avi Kivity
     [not found]         ` <480DACDD.7040108-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-04-22  9:22           ` Jens Axboe
2008-04-22  9:22             ` Jens Axboe
     [not found]             ` <20080422092230.GW12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-22 11:14               ` Jens Axboe
2008-04-22 11:14                 ` Jens Axboe
2008-04-22 13:00                 ` Peter Zijlstra
2008-04-22 14:25                   ` Jens Axboe
2008-04-22 14:25                     ` Jens Axboe
     [not found]                     ` <20080422142543.GG12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-22 14:38                       ` Avi Kivity
2008-04-22 14:38                         ` Avi Kivity
     [not found]                         ` <480DF861.6000705-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-04-22 14:43                           ` Peter Zijlstra
2008-04-22 14:43                             ` Peter Zijlstra
2008-04-22 14:47                             ` Avi Kivity
2008-04-22 14:47                               ` Avi Kivity
2008-04-22 14:53                         ` Jens Axboe
2008-04-22 14:43       ` Linus Torvalds
2008-04-22 14:43         ` Linus Torvalds
     [not found]         ` <alpine.LFD.1.10.0804220735350.2779-5CScLwifNT1QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>
2008-04-22 14:51           ` Jens Axboe
2008-04-22 14:51             ` Jens Axboe
2008-04-22 15:01             ` Linus Torvalds
2008-04-22 16:49               ` Jens Axboe
2008-04-22 16:49                 ` Jens Axboe
     [not found]                 ` <20080422164947.GN12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-22 17:04                   ` Jens Axboe
2008-04-22 17:04                     ` Jens Axboe
     [not found]                     ` <20080422170405.GO12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-22 17:13                       ` Jens Axboe
2008-04-22 17:13                         ` Jens Axboe
     [not found]                         ` <20080422171324.GP12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-22 17:29                           ` Linus Torvalds
2008-04-22 17:29                             ` Linus Torvalds
     [not found]                             ` <alpine.LFD.1.10.0804221027210.2779-5CScLwifNT1QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>
2008-04-22 18:23                               ` Jens Axboe
2008-04-22 18:23                                 ` Jens Axboe
2008-04-22 18:23                                 ` Jens Axboe
     [not found]                                 ` <20080422182337.GQ12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-22 18:39                                   ` Linus Torvalds
2008-04-22 18:39                                     ` Linus Torvalds
2008-04-22 14:58           ` Linus Torvalds
2008-04-22 14:58             ` Linus Torvalds
     [not found]             ` <alpine.LFD.1.10.0804220749450.2779-5CScLwifNT1QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org>
2008-04-22 15:07               ` Jens Axboe
2008-04-22 15:07                 ` Jens Axboe
2008-04-22 23:12       ` Mark Lord
2008-04-22 23:12         ` Mark Lord
     [not found]         ` <480E70ED.3030701-gsilrlXbHYg@public.gmane.org>
2008-04-23  7:24           ` Jens Axboe
2008-04-23  7:24             ` Jens Axboe
     [not found]             ` <20080423072432.GX12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-23 13:42               ` Mark Lord
2008-04-23 13:42                 ` Mark Lord
     [not found]                 ` <480F3CBC.60305-gsilrlXbHYg@public.gmane.org>
2008-04-23 13:51                   ` Jens Axboe
2008-04-23 13:51                     ` Jens Axboe
2008-04-23 14:46                     ` Mark Lord
     [not found]                       ` <480F4BD9.8090003-gsilrlXbHYg@public.gmane.org>
2008-04-24 10:59                         ` Jens Axboe
2008-04-24 10:59                           ` Jens Axboe
     [not found]                           ` <20080424105908.GW12774-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>
2008-04-24 12:44                             ` Mark Lord
2008-04-24 12:44                               ` Mark Lord
     [not found]                               ` <481080A0.9050804-gsilrlXbHYg@public.gmane.org>
2008-04-24 21:30                                 ` Rafael J. Wysocki
2008-04-24 21:30                                   ` Rafael J. Wysocki
2008-04-25 11:08                                 ` Pavel Machek
2008-04-25 11:08                                   ` Pavel Machek
2008-04-26  8:04                             ` Pavel Machek
2008-04-26  8:04                               ` Pavel Machek
2008-04-28 15:13                               ` Mark Lord
2008-05-01 16:23                                 ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080427005816.GB21687@linux.vnet.ibm.com \
    --to=paulmck-23vcf4htsmix0ybbhkvfkdbpr1lh4cv8@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=jens.axboe-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org \
    --cc=linux-arch-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=npiggin-l3A5Bk7waGM@public.gmane.org \
    --cc=peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org \
    --cc=sam-uyr5N9Q2VtJg9hUCZPvPmw@public.gmane.org \
    --cc=torvalds-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.