From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Jens Axboe <jens.axboe@oracle.com>
Cc: linux-kernel@vger.kernel.org, npiggin@suse.de, dgc@sgi.com
Subject: Re: [PATCH 1/7] x86-64: introduce fast variant of smp_call_function_single()
Date: Fri, 14 Mar 2008 11:21:16 -0700 [thread overview]
Message-ID: <47DAC21C.1040805@goop.org> (raw)
In-Reply-To: <1205322940-20127-2-git-send-email-jens.axboe@oracle.com>
Jens Axboe wrote:
> rom: Nick Piggin <npiggin@suse.de>
>
Why is this necessary? How is smp_call_function_single slow?
J
> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
> ---
> arch/x86/kernel/entry_64.S | 3 +
> arch/x86/kernel/i8259_64.c | 1 +
> arch/x86/kernel/smp_64.c | 303 +++++++++++++++++++++--------
> include/asm-x86/hw_irq_64.h | 4 +-
> include/asm-x86/mach-default/entry_arch.h | 1 +
> include/linux/smp.h | 2 +-
> 6 files changed, 232 insertions(+), 82 deletions(-)
>
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index c20c9e7..22caf56 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -713,6 +713,9 @@ END(invalidate_interrupt\num)
> ENTRY(call_function_interrupt)
> apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
> END(call_function_interrupt)
> +ENTRY(call_function_single_interrupt)
> + apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
> +END(call_function_single_interrupt)
> ENTRY(irq_move_cleanup_interrupt)
> apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
> END(irq_move_cleanup_interrupt)
> diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c
> index fa57a15..2b0b6d2 100644
> --- a/arch/x86/kernel/i8259_64.c
> +++ b/arch/x86/kernel/i8259_64.c
> @@ -493,6 +493,7 @@ void __init native_init_IRQ(void)
>
> /* IPI for generic function call */
> set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
> + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
>
> /* Low priority IPI to cleanup after moving an irq */
> set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
> diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
> index 2fd74b0..1196a12 100644
> --- a/arch/x86/kernel/smp_64.c
> +++ b/arch/x86/kernel/smp_64.c
> @@ -18,6 +18,7 @@
> #include <linux/kernel_stat.h>
> #include <linux/mc146818rtc.h>
> #include <linux/interrupt.h>
> +#include <linux/rcupdate.h>
>
> #include <asm/mtrr.h>
> #include <asm/pgalloc.h>
> @@ -295,21 +296,29 @@ void smp_send_reschedule(int cpu)
> send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
> }
>
> +#define CALL_WAIT 0x01
> +#define CALL_FALLBACK 0x02
> /*
> * Structure and data for smp_call_function(). This is designed to minimise
> * static memory requirements. It also looks cleaner.
> */
> static DEFINE_SPINLOCK(call_lock);
>
> -struct call_data_struct {
> +struct call_data {
> + spinlock_t lock;
> + struct list_head list;
> void (*func) (void *info);
> void *info;
> - atomic_t started;
> - atomic_t finished;
> - int wait;
> + unsigned int flags;
> + unsigned int refs;
> + cpumask_t cpumask;
> + struct rcu_head rcu_head;
> };
>
> -static struct call_data_struct * call_data;
> +static LIST_HEAD(call_queue);
> +
> +static unsigned long call_fallback_used;
> +static struct call_data call_data_fallback;
>
> void lock_ipi_call_lock(void)
> {
> @@ -321,55 +330,47 @@ void unlock_ipi_call_lock(void)
> spin_unlock_irq(&call_lock);
> }
>
> -/*
> - * this function sends a 'generic call function' IPI to all other CPU
> - * of the system defined in the mask.
> - */
> -static int __smp_call_function_mask(cpumask_t mask,
> - void (*func)(void *), void *info,
> - int wait)
> -{
> - struct call_data_struct data;
> - cpumask_t allbutself;
> - int cpus;
>
> - allbutself = cpu_online_map;
> - cpu_clear(smp_processor_id(), allbutself);
> -
> - cpus_and(mask, mask, allbutself);
> - cpus = cpus_weight(mask);
> -
> - if (!cpus)
> - return 0;
> -
> - data.func = func;
> - data.info = info;
> - atomic_set(&data.started, 0);
> - data.wait = wait;
> - if (wait)
> - atomic_set(&data.finished, 0);
> +struct call_single_data {
> + struct list_head list;
> + void (*func) (void *info);
> + void *info;
> + unsigned int flags;
> +};
>
> - call_data = &data;
> - wmb();
> +struct call_single_queue {
> + spinlock_t lock;
> + struct list_head list;
> +};
> +static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
>
> - /* Send a message to other CPUs */
> - if (cpus_equal(mask, allbutself))
> - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
> - else
> - send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
> +static unsigned long call_single_fallback_used;
> +static struct call_single_data call_single_data_fallback;
>
> - /* Wait for response */
> - while (atomic_read(&data.started) != cpus)
> - cpu_relax();
> +int __cpuinit init_smp_call(void)
> +{
> + int i;
>
> - if (!wait)
> - return 0;
> + for_each_cpu_mask(i, cpu_possible_map) {
> + spin_lock_init(&per_cpu(call_single_queue, i).lock);
> + INIT_LIST_HEAD(&per_cpu(call_single_queue, i).list);
> + }
> + return 0;
> +}
> +core_initcall(init_smp_call);
>
> - while (atomic_read(&data.finished) != cpus)
> - cpu_relax();
> +static void rcu_free_call_data(struct rcu_head *head)
> +{
> + struct call_data *data;
> + data = container_of(head, struct call_data, rcu_head);
> + kfree(data);
> +}
>
> - return 0;
> +static void free_call_data(struct call_data *data)
> +{
> + call_rcu(&data->rcu_head, rcu_free_call_data);
> }
> +
> /**
> * smp_call_function_mask(): Run a function on a set of other CPUs.
> * @mask: The set of cpus to run on. Must not include the current cpu.
> @@ -389,15 +390,69 @@ int smp_call_function_mask(cpumask_t mask,
> void (*func)(void *), void *info,
> int wait)
> {
> - int ret;
> + struct call_data *data;
> + cpumask_t allbutself;
> + unsigned int flags;
> + int cpus;
>
> /* Can deadlock when called with interrupts disabled */
> WARN_ON(irqs_disabled());
> + WARN_ON(preemptible());
> +
> + allbutself = cpu_online_map;
> + cpu_clear(smp_processor_id(), allbutself);
> +
> + cpus_and(mask, mask, allbutself);
> + cpus = cpus_weight(mask);
> +
> + if (!cpus)
> + return 0;
>
> - spin_lock(&call_lock);
> - ret = __smp_call_function_mask(mask, func, info, wait);
> + flags = wait ? CALL_WAIT : 0;
> + data = kmalloc(sizeof(struct call_data), GFP_ATOMIC);
> + if (unlikely(!data)) {
> + while (test_and_set_bit_lock(0, &call_fallback_used))
> + cpu_relax();
> + data = &call_data_fallback;
> + flags |= CALL_FALLBACK;
> + /* XXX: can IPI all to "synchronize" RCU? */
> + }
> +
> + spin_lock_init(&data->lock);
> + data->func = func;
> + data->info = info;
> + data->flags = flags;
> + data->refs = cpus;
> + data->cpumask = mask;
> +
> + local_irq_disable();
> + while (!spin_trylock(&call_lock)) {
> + local_irq_enable();
> + cpu_relax();
> + local_irq_disable();
> + }
> + /* could do ipi = list_empty(&dst->list) || !cpumask_ipi_pending() */
> + list_add_tail_rcu(&data->list, &call_queue);
> spin_unlock(&call_lock);
> - return ret;
> + local_irq_enable();
> +
> + /* Send a message to other CPUs */
> + if (cpus_equal(mask, allbutself))
> + send_IPI_allbutself(CALL_FUNCTION_VECTOR);
> + else
> + send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
> +
> + if (wait) {
> + /* Wait for response */
> + while (data->flags)
> + cpu_relax();
> + if (likely(!(flags & CALL_FALLBACK)))
> + free_call_data(data);
> + else
> + clear_bit_unlock(0, &call_fallback_used);
> + }
> +
> + return 0;
> }
> EXPORT_SYMBOL(smp_call_function_mask);
>
> @@ -414,11 +469,11 @@ EXPORT_SYMBOL(smp_call_function_mask);
> * or is or has executed.
> */
>
> -int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
> +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
> int nonatomic, int wait)
> {
> /* prevent preemption and reschedule on another processor */
> - int ret, me = get_cpu();
> + int me = get_cpu();
>
> /* Can deadlock when called with interrupts disabled */
> WARN_ON(irqs_disabled());
> @@ -427,14 +482,54 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
> local_irq_disable();
> func(info);
> local_irq_enable();
> - put_cpu();
> - return 0;
> - }
> + } else {
> + struct call_single_data d;
> + struct call_single_data *data;
> + struct call_single_queue *dst;
> + cpumask_t mask = cpumask_of_cpu(cpu);
> + unsigned int flags = wait ? CALL_WAIT : 0;
> + int ipi;
> +
> + if (!wait) {
> + data = kmalloc(sizeof(struct call_single_data), GFP_ATOMIC);
> + if (unlikely(!data)) {
> + while (test_and_set_bit_lock(0, &call_single_fallback_used))
> + cpu_relax();
> + data = &call_single_data_fallback;
> + flags |= CALL_FALLBACK;
> + }
> + } else {
> + data = &d;
> + }
> +
> + data->func = func;
> + data->info = info;
> + data->flags = flags;
> + dst = &per_cpu(call_single_queue, cpu);
> +
> + local_irq_disable();
> + while (!spin_trylock(&dst->lock)) {
> + local_irq_enable();
> + cpu_relax();
> + local_irq_disable();
> + }
> + ipi = list_empty(&dst->list);
> + list_add_tail(&data->list, &dst->list);
> + spin_unlock(&dst->lock);
> + local_irq_enable();
>
> - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
> + if (ipi)
> + send_IPI_mask(mask, CALL_FUNCTION_SINGLE_VECTOR);
> +
> + if (wait) {
> + /* Wait for response */
> + while (data->flags)
> + cpu_relax();
> + }
> + }
>
> put_cpu();
> - return ret;
> + return 0;
> }
> EXPORT_SYMBOL(smp_call_function_single);
>
> @@ -474,18 +569,13 @@ static void stop_this_cpu(void *dummy)
>
> void smp_send_stop(void)
> {
> - int nolock;
> unsigned long flags;
>
> if (reboot_force)
> return;
>
> - /* Don't deadlock on the call lock in panic */
> - nolock = !spin_trylock(&call_lock);
> local_irq_save(flags);
> - __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
> - if (!nolock)
> - spin_unlock(&call_lock);
> + smp_call_function(stop_this_cpu, NULL, 0, 0);
> disable_local_APIC();
> local_irq_restore(flags);
> }
> @@ -503,28 +593,83 @@ asmlinkage void smp_reschedule_interrupt(void)
>
> asmlinkage void smp_call_function_interrupt(void)
> {
> - void (*func) (void *info) = call_data->func;
> - void *info = call_data->info;
> - int wait = call_data->wait;
> + struct list_head *pos, *tmp;
> + int cpu = smp_processor_id();
>
> ack_APIC_irq();
> - /*
> - * Notify initiating CPU that I've grabbed the data and am
> - * about to execute the function
> - */
> - mb();
> - atomic_inc(&call_data->started);
> - /*
> - * At this point the info structure may be out of scope unless wait==1
> - */
> exit_idle();
> irq_enter();
> - (*func)(info);
> +
> + list_for_each_safe_rcu(pos, tmp, &call_queue) {
> + struct call_data *data;
> + int refs;
> +
> + data = list_entry(pos, struct call_data, list);
> + if (!cpu_isset(cpu, data->cpumask))
> + continue;
> +
> + data->func(data->info);
> + spin_lock(&data->lock);
> + WARN_ON(!cpu_isset(cpu, data->cpumask));
> + cpu_clear(cpu, data->cpumask);
> + WARN_ON(data->refs == 0);
> + data->refs--;
> + refs = data->refs;
> + spin_unlock(&data->lock);
> +
> + if (refs == 0) {
> + WARN_ON(cpus_weight(data->cpumask));
> + spin_lock(&call_lock);
> + list_del_rcu(&data->list);
> + spin_unlock(&call_lock);
> + if (data->flags & CALL_WAIT) {
> + smp_wmb();
> + data->flags = 0;
> + } else {
> + if (likely(!(data->flags & CALL_FALLBACK)))
> + free_call_data(data);
> + else
> + clear_bit_unlock(0, &call_fallback_used);
> + }
> + }
> + }
> +
> add_pda(irq_call_count, 1);
> irq_exit();
> - if (wait) {
> - mb();
> - atomic_inc(&call_data->finished);
> +}
> +
> +asmlinkage void smp_call_function_single_interrupt(void)
> +{
> + struct call_single_queue *q;
> + LIST_HEAD(list);
> +
> + ack_APIC_irq();
> + exit_idle();
> + irq_enter();
> +
> + q = &__get_cpu_var(call_single_queue);
> + spin_lock(&q->lock);
> + list_replace_init(&q->list, &list);
> + spin_unlock(&q->lock);
> +
> + while (!list_empty(&list)) {
> + struct call_single_data *data;
> +
> + data = list_entry(list.next, struct call_single_data, list);
> + list_del(&data->list);
> +
> + data->func(data->info);
> + if (data->flags & CALL_WAIT) {
> + smp_wmb();
> + data->flags = 0;
> + } else {
> + if (likely(!(data->flags & CALL_FALLBACK)))
> + kfree(data);
> + else
> + clear_bit_unlock(0, &call_single_fallback_used);
> + }
> }
> + add_pda(irq_call_count, 1);
> + irq_exit();
> }
>
> diff --git a/include/asm-x86/hw_irq_64.h b/include/asm-x86/hw_irq_64.h
> index 312a58d..06ac80c 100644
> --- a/include/asm-x86/hw_irq_64.h
> +++ b/include/asm-x86/hw_irq_64.h
> @@ -68,8 +68,7 @@
> #define ERROR_APIC_VECTOR 0xfe
> #define RESCHEDULE_VECTOR 0xfd
> #define CALL_FUNCTION_VECTOR 0xfc
> -/* fb free - please don't readd KDB here because it's useless
> - (hint - think what a NMI bit does to a vector) */
> +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
> #define THERMAL_APIC_VECTOR 0xfa
> #define THRESHOLD_APIC_VECTOR 0xf9
> /* f8 free */
> @@ -102,6 +101,7 @@ void spurious_interrupt(void);
> void error_interrupt(void);
> void reschedule_interrupt(void);
> void call_function_interrupt(void);
> +void call_function_single_interrupt(void);
> void irq_move_cleanup_interrupt(void);
> void invalidate_interrupt0(void);
> void invalidate_interrupt1(void);
> diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
> index bc86146..9283b60 100644
> --- a/include/asm-x86/mach-default/entry_arch.h
> +++ b/include/asm-x86/mach-default/entry_arch.h
> @@ -13,6 +13,7 @@
> BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
> BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
> BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
> +BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
> #endif
>
> /*
> diff --git a/include/linux/smp.h b/include/linux/smp.h
> index 55232cc..c938d26 100644
> --- a/include/linux/smp.h
> +++ b/include/linux/smp.h
> @@ -53,7 +53,6 @@ extern void smp_cpus_done(unsigned int max_cpus);
> * Call a function on all other processors
> */
> int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
> -
> int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
> int retry, int wait);
>
> @@ -92,6 +91,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
> }
> #define smp_call_function(func, info, retry, wait) \
> (up_smp_call_function(func, info))
> +
> #define on_each_cpu(func,info,retry,wait) \
> ({ \
> local_irq_disable(); \
>
next prev parent reply other threads:[~2008-03-14 18:23 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-12 11:55 [PATCH 0/7] IO CPU affinity testing series Jens Axboe
2008-03-12 11:55 ` [PATCH 1/7] x86-64: introduce fast variant of smp_call_function_single() Jens Axboe
2008-03-14 18:21 ` Jeremy Fitzhardinge [this message]
2008-03-16 18:45 ` Jens Axboe
2008-03-16 22:58 ` Jeremy Fitzhardinge
2008-03-17 2:24 ` Nick Piggin
2008-03-17 7:25 ` Jens Axboe
2008-03-12 11:55 ` [PATCH 2/7] x86-64: speedup and tweak smp_call_function_single() Jens Axboe
2008-03-12 11:55 ` [PATCH 3/7] x86: add fast smp_call_function_single() Jens Axboe
2008-03-12 11:55 ` [PATCH 4/7] block: split softirq handling into blk-softirq.c Jens Axboe
2008-03-12 11:55 ` [PATCH 5/7] Add interface for queuing work on a specific CPU Jens Axboe
2008-03-12 11:55 ` [PATCH 6/7] block: make kblockd_schedule_work() take the queue as parameter Jens Axboe
2008-03-12 11:55 ` [PATCH 7/7] block: add test code for testing CPU affinity Jens Axboe
2008-03-12 16:41 ` [PATCH 0/7] IO CPU affinity testing series Alan D. Brunelle
2008-03-12 17:54 ` Jens Axboe
2008-03-12 20:37 ` Max Krasnyanskiy
2008-03-13 12:13 ` Jens Axboe
2008-03-13 14:54 ` Alan D. Brunelle
2008-03-13 15:00 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=47DAC21C.1040805@goop.org \
--to=jeremy@goop.org \
--cc=dgc@sgi.com \
--cc=jens.axboe@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=npiggin@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.