From: Rusty Russell <rusty@rustcorp.com.au>
To: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
Thomas Gleixner <tglx@linutronix.de>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/4] x86: fix cpu_mask_to_apicid_and to include cpu_online_mask
Date: Fri, 12 Dec 2008 21:36:33 +1030 [thread overview]
Message-ID: <200812122136.34780.rusty@rustcorp.com.au> (raw)
In-Reply-To: <20081211112806.873424000@polaris-admin.engr.sgi.com>
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 16339 bytes --]
On Thursday 11 December 2008 21:58:08 Mike Travis wrote:> Impact: fix potential problem.> > In determining the destination apicid, there are usually three cpumasks> that are considered: the incoming cpumask arg, cfg->domain and the> cpu_online_mask. Since we are just introducing the cpu_mask_to_apicid_and> function, make sure it includes the cpu_online_mask in it's evaluation.
Yerk. Can we really "fail" cpu_mask_to_apicid_and with no repercussions?And does it make sense to try to fix this there?
This is not a new problem with the cpumask patches is it? I toyed with apatch which converts flush_tlb_others, and it actually ensures that thosecases never hand an offline mask to cpu_mask_to_apicid_and as a sideeffect (others still might).
Patch below for your reading (x86:flush_tlb_others-cpumask-ptr.patch inmy series file).
Rusty.
x86: change flush_tlb_others to take a const struct cpumask *. FIXME: REVIEW
This is made a little more tricky by uv_flush_tlb_others whichactually alters its argument, for an IPI to be sent to the remainingcpus in the mask.
I solve this by allocating a cpumask_var_t for this case and falling backto IPI should this fail.
To eliminate temporaries in the caller, all flush_tlb_others implementationsnow do the this-cpu-elimination step themselves.
Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)"which has been there since pre-git and yet f->flush_cpumask is always zeroat this point.
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_codiff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h--- a/arch/x86/include/asm/paravirt.h+++ b/arch/x86/include/asm/paravirt.h@@ -244,7 +244,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr);- void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,+ void (*flush_tlb_others)(const struct cpumask *cpus,+ struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */@@ -984,10 +985,11 @@ static inline void __flush_tlb_single(un PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,+static inline void flush_tlb_others(const struct cpumask *cpumask,+ struct mm_struct *mm, unsigned long va) {- PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);+ PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm)diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h--- a/arch/x86/include/asm/tlbflush.h+++ b/arch/x86/include/asm/tlbflush.h@@ -113,7 +113,7 @@ static inline void flush_tlb_range(struc __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask,+static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) {@@ -142,8 +142,8 @@ static inline void flush_tlb_range(struc flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,- unsigned long va);+void native_flush_tlb_others(const struct cpumask *cpumask,+ struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h--- a/arch/x86/include/asm/uv/uv_bau.h+++ b/arch/x86/include/asm/uv/uv_bau.h@@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(str #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);+extern int uv_flush_tlb_others(const struct cpumask *,+ struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c--- a/arch/x86/kernel/tlb_32.c+++ b/arch/x86/kernel/tlb_32.c@@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlb * Optimizations Manfred Spraul <manfred@colorfullife.com> */ -static cpumask_t flush_cpumask;+static cpumask_var_t flush_cpumask; static struct mm_struct *flush_mm; static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock);@@ -93,7 +94,7 @@ void smp_invalidate_interrupt(struct pt_ cpu = get_cpu(); - if (!cpu_isset(cpu, flush_cpumask))+ if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the@@ -115,34 +116,21 @@ void smp_invalidate_interrupt(struct pt_ } ack_APIC_irq(); smp_mb__before_clear_bit();- cpu_clear(cpu, flush_cpumask);+ cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); out: put_cpu_no_resched(); __get_cpu_var(irq_stat).irq_tlb_count++; } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,- unsigned long va)+void native_flush_tlb_others(const struct cpumask *cpumask,+ struct mm_struct *mm, unsigned long va) {- cpumask_t cpumask = *cpumaskp;- /*- * A couple of (to be removed) sanity checks:- *- * - current CPU must not be in mask * - mask must exist :) */- BUG_ON(cpus_empty(cpumask));- BUG_ON(cpu_isset(smp_processor_id(), cpumask));+ BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm);--#ifdef CONFIG_HOTPLUG_CPU- /* If a CPU which we ran on has gone down, OK. */- cpus_and(cpumask, cpumask, cpu_online_map);- if (unlikely(cpus_empty(cpumask)))- return;-#endif /* * i'm not happy about this global shared spinlock in the@@ -151,9 +139,17 @@ void native_flush_tlb_others(const cpuma */ spin_lock(&tlbstate_lock); + cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));+#ifdef CONFIG_HOTPLUG_CPU+ /* If a CPU which we ran on has gone down, OK. */+ cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);+ if (unlikely(cpumask_empty(flush_cpumask))) {+ spin_unlock(&tlbstate_lock);+ return;+ }+#endif flush_mm = mm; flush_va = va;- cpus_or(flush_cpumask, cpumask, flush_cpumask); /* * Make the above memory operations globally visible before@@ -164,9 +160,9 @@ void native_flush_tlb_others(const cpuma * We have to send the IPI only to * CPUs affected. */- send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);+ send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - while (!cpus_empty(flush_cpumask))+ while (!cpumask_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ cpu_relax(); @@ -178,25 +174,17 @@ void flush_tlb_current_task(void) void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm;- cpumask_t cpu_mask; preempt_disable();- cpu_mask = mm->cpu_vm_mask;- cpu_clear(smp_processor_id(), cpu_mask);- local_flush_tlb();- if (!cpus_empty(cpu_mask))- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);+ if (cpumask_any_but(mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)+ flush_tlb_others(mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) {- cpumask_t cpu_mask;- preempt_disable();- cpu_mask = *mm->cpu_vm_mask;- cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm)@@ -204,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); }- if (!cpus_empty(cpu_mask))- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);+ if (cpumask_any_but(mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)+ flush_tlb_others(mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); }@@ -213,12 +201,8 @@ void flush_tlb_page(struct vm_area_struc void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm;- cpumask_t cpu_mask; preempt_disable();- cpu_mask = *mm->cpu_vm_mask;- cpu_clear(smp_processor_id(), cpu_mask);- if (current->active_mm == mm) { if (current->mm) __flush_tlb_one(va);@@ -226,9 +210,8 @@ void flush_tlb_page(struct vm_area_struc leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask))- flush_tlb_others(cpu_mask, mm, va);-+ if (cpumask_any_but(mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)+ flush_tlb_others(mm->cpu_vm_mask, mm, va); preempt_enable(); } EXPORT_SYMBOL(flush_tlb_page);@@ -255,3 +238,9 @@ void reset_lazy_tlbstate(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } +static int init_flush_cpumask(void)+{+ alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);+ return 0;+}+early_initcall(init_flush_cpumask);diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c--- a/arch/x86/kernel/tlb_64.c+++ b/arch/x86/kernel/tlb_64.c@@ -43,10 +43,10 @@ union smp_flush_state { struct {- cpumask_t flush_cpumask; struct mm_struct *flush_mm; unsigned long flush_va; spinlock_t tlbstate_lock;+ DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned;@@ -131,7 +131,7 @@ asmlinkage void smp_invalidate_interrupt sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, f->flush_cpumask))+ if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* * This was a BUG() but until someone can quote me the@@ -153,19 +153,15 @@ asmlinkage void smp_invalidate_interrupt } out: ack_APIC_irq();- cpu_clear(cpu, f->flush_cpumask);+ cpumask_clear(cpu, to_cpumask(f->flush_cpumask)); add_pda(irq_tlb_count, 1); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,- unsigned long va)+static void flush_tlb_others_ipi(const struct cpumask *cpumask,+ struct mm_struct *mm, unsigned long va) { int sender; union smp_flush_state *f;- cpumask_t cpumask = *cpumaskp;-- if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))- return; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;@@ -180,7 +176,8 @@ void native_flush_tlb_others(const cpuma f->flush_mm = mm; f->flush_va = va;- cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);+ cpumask_andnot(to_cpumask(f->flush_cpumask),+ cpumask, cpumask_of(smp_processor_id)); /* * Make the above memory operations globally visible before@@ -191,14 +188,32 @@ void native_flush_tlb_others(const cpuma * We have to send the IPI only to * CPUs affected. */- send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);+ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); - while (!cpus_empty(f->flush_cpumask))+ while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); f->flush_mm = NULL; f->flush_va = 0; spin_unlock(&f->tlbstate_lock);+++void native_flush_tlb_others(const struct cpumask *cpumask,+ struct mm_struct *mm, unsigned long va)+{+ if (is_uv_system()) {+ cpumask_var_t after_uv_flush;++ if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) {+ cpumask_andnot(after_uv_flush,+ cpumask, cpumask_of(smp_processor_id()));+ if (!uv_flush_tlb_others(after_uv_flush, mm, va))+ flush_tlb_others_ipi(after_uv_flush, mm, va);+ free_cpumask_var(after_uv_flush);+ return;+ }+ }+ flush_tlb_others_ipi(cpumask, mm, va); } static int __cpuinit init_smp_flush(void)@@ -215,34 +230,26 @@ void flush_tlb_current_task(void) void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm;- cpumask_t cpu_mask; preempt_disable();- cpu_mask = *mm->cpu_vm_mask;- cpu_clear(smp_processor_id(), cpu_mask);- local_flush_tlb();- if (!cpus_empty(cpu_mask))- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);+ if (cpumask_any_but(mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)+ flush_tlb_others(mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) {- cpumask_t cpu_mask; preempt_disable();- cpu_mask = *mm->cpu_vm_mask;- cpu_clear(smp_processor_id(), cpu_mask);- if (current->active_mm == mm) { if (current->mm) local_flush_tlb(); else leave_mm(smp_processor_id()); }- if (!cpus_empty(cpu_mask))- flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);+ if (cpumask_any_but(mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)+ flush_tlb_others(mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); }@@ -250,11 +257,8 @@ void flush_tlb_page(struct vm_area_struc void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm;- cpumask_t cpu_mask; preempt_disable();- cpu_mask = *mm->cpu_vm_mask;- cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm)@@ -263,8 +267,8 @@ void flush_tlb_page(struct vm_area_struc leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask))- flush_tlb_others(cpu_mask, mm, va);+ if (cpumask_any_but(mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)+ flush_tlb_others(mm->cpu_vm_mask, mm, va); preempt_enable(); }diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c--- a/arch/x86/kernel/tlb_uv.c+++ b/arch/x86/kernel/tlb_uv.c@@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau * The cpumaskp mask contains the cpus the broadcast was sent to. * * Returns 1 if all remote flushing was done. The mask is zeroed.- * Returns 0 if some remote flushing remains to be done. The mask is left- * unchanged.+ * Returns 0 if some remote flushing remains to be done. The mask will have+ * some bits still set. */ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,- cpumask_t *cpumaskp)+ struct cpumask *cpumaskp) { int completion_status = 0; int right_shift;@@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */- for_each_cpu_mask(bit, *cpumaskp) {+ for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue;- cpu_clear(bit, *cpumaskp);+ cpumask_clear_cpu(bit, cpumaskp); }- if (!cpus_empty(*cpumaskp))+ if (!cpumask_empty(cpumaskp)) return 0; return 1; }@@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int * Returns 1 if all remote flushing was done. * Returns 0 if some remote flushing remains to be done. */-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,+int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, unsigned long va) { int i;@@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpuma bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0;- for_each_cpu_mask(bit, *cpumaskp) {+ for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) {diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c--- a/arch/x86/xen/enlighten.c+++ b/arch/x86/xen/enlighten.c@@ -633,35 +633,27 @@ static void xen_flush_tlb_single(unsigne preempt_enable(); } -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,- unsigned long va)+static void xen_flush_tlb_others(const struct cpumask *cpus,+ struct mm_struct *mm, unsigned long va) { struct { struct mmuext_op op;- cpumask_t mask;+ DECLARE_BITMAP(mask, NR_CPUS); } *args;- cpumask_t cpumask = *cpus; struct multicall_space mcs; - /*- * A couple of (to be removed) sanity checks:- *- * - current CPU must not be in mask- * - mask must exist :)- */- BUG_ON(cpus_empty(cpumask));- BUG_ON(cpu_isset(smp_processor_id(), cpumask));+ BUG_ON(cpumask_empty(cpus)); BUG_ON(!mm);-- /* If a CPU which we ran on has gone down, OK. */- cpus_and(cpumask, cpumask, cpu_online_map);- if (cpus_empty(cpumask))- return; mcs = xen_mc_entry(sizeof(*args)); args = mcs.args;- args->mask = cpumask;- args->op.arg2.vcpumask = &args->mask;+ args->op.arg2.vcpumask = to_cpumask(args->mask);++ /* Remove us, and any offline CPUS. */+ cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);+ cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));+ if (unlikely(cpumask_empty(to_cpumask(args->mask))))+ goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;@@ -672,6 +664,7 @@ static void xen_flush_tlb_others(const c MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); +issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } \0ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥
next prev parent reply other threads:[~2008-12-12 11:06 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-12-11 11:28 [PATCH 0/4] cpumask: fixups and additions Mike Travis
2008-12-11 11:28 ` [PATCH 1/4] x86: fix assign_irq_vector boot up problem Mike Travis
2008-12-12 8:27 ` Rusty Russell
2008-12-12 9:20 ` Ingo Molnar
2008-12-12 18:10 ` Mike Travis
2008-12-12 19:06 ` Mike Travis
2008-12-11 11:28 ` [PATCH 2/4] x86: fix cpu_mask_to_apicid_and to include cpu_online_mask Mike Travis
2008-12-12 11:06 ` Rusty Russell [this message]
2008-12-12 16:37 ` Mike Travis
2008-12-13 12:03 ` Rusty Russell
2008-12-11 11:28 ` [PATCH 3/4] cpumask: use maxcpus=NUM to extend the cpu limit as well as restrict the limit Mike Travis
2008-12-11 13:41 ` Heiko Carstens
2008-12-11 18:19 ` Mike Travis
2008-12-12 10:03 ` Heiko Carstens
2008-12-12 11:41 ` Rusty Russell
2008-12-12 15:38 ` Mike Travis
2008-12-11 11:28 ` [PATCH 4/4] cpumask: add sysfs displays for configured and disabled cpu maps Mike Travis
2008-12-12 11:44 ` Rusty Russell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200812122136.34780.rusty@rustcorp.com.au \
--to=rusty@rustcorp.com.au \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=travis@sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox