From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Chirag Jog <chirag@linux.vnet.ibm.com>
Cc: linux-rt-users@vger.kernel.org,
Josh Triplett <josht@linux.vnet.ibm.com>,
Steven Rostedt <rostedt@goodmis.org>,
linuxppc-dev@ozlabs.org, Nivedita Singhvi <niv@us.ibm.com>,
"Timothy R. Chavez" <tim.chavez@linux.vnet.ibm.com>,
paulmck@linux.vnet.ibm.com, linux.kernel@vger.kernel.org
Subject: Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
Date: Tue, 15 Jul 2008 11:32:01 +1000 [thread overview]
Message-ID: <1216085521.7740.37.camel@pasglop> (raw)
In-Reply-To: <20080709160543.GG7101@linux.vnet.ibm.com>
On Wed, 2008-07-09 at 21:35 +0530, Chirag Jog wrote:
> Hi,
> This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
> variables are accessed in a preempt unsafe way.
> When a power box with -rt kernel is booted, multiple BUG messages are
> generated "BUG: init:1 task might have lost a preemption check!".
> After booting a kernel with these patches applied, these messages
> don't appear.
>
> Also I ran the realtime tests from ltp to ensure the stability.
That sounds bad tho...
IE. You are changing the code to lock/unlock on all those TLB batching
operations, but seem to miss the core reason why it was done that way:
ie, the code assumes that it will not change CPU -between- those calls,
since the whole stuff should be already have been within a per-cpu
locked section at the caller level.
As for the TCE code, well, it lived on the assumption that the upper
level spinlock did the job of preventing preempt, I suppose that's not
the case anymore. So that part of the patch sounds ok.
Ben.
>
> Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>
> arch/powerpc/mm/tlb_64.c | 31 ++++++++++++++++---------------
> arch/powerpc/platforms/pseries/iommu.c | 14 ++++++++++----
> include/asm-powerpc/tlb.h | 5 ++---
> 3 files changed, 28 insertions(+), 22 deletions(-)
>
>
> Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c 2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c 2008-07-09 21:30:37.000000000 +0530
> @@ -38,7 +38,6 @@
> * include/asm-powerpc/tlb.h file -- tgall
> */
> DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
> unsigned long pte_freelist_forced_free;
>
> struct pte_freelist_batch
> @@ -48,7 +47,7 @@
> pgtable_free_t tables[0];
> };
>
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
> +DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
> unsigned long pte_freelist_forced_free;
>
> #define PTE_FREELIST_SIZE \
> @@ -92,24 +91,21 @@
>
> void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
> {
> - /*
> - * This is safe since tlb_gather_mmu has disabled preemption.
> - * tlb->cpu is set by tlb_gather_mmu as well.
> - */
> + int cpu;
> cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
> - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> + struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>
> if (atomic_read(&tlb->mm->mm_users) < 2 ||
> cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
> pgtable_free(pgf);
> - return;
> + goto cleanup;
> }
>
> if (*batchp == NULL) {
> *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
> if (*batchp == NULL) {
> pgtable_free_now(pgf);
> - return;
> + goto cleanup;
> }
> (*batchp)->index = 0;
> }
> @@ -118,6 +114,9 @@
> pte_free_submit(*batchp);
> *batchp = NULL;
> }
> +
> + cleanup:
> + put_cpu_var_locked(pte_freelist_cur, cpu);
> }
>
> /*
> @@ -253,13 +252,15 @@
>
> void pte_free_finish(void)
> {
> - /* This is safe since tlb_gather_mmu has disabled preemption */
> - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> + int cpu;
> + struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>
> - if (*batchp == NULL)
> - return;
> - pte_free_submit(*batchp);
> - *batchp = NULL;
> + if (*batchp) {
> + pte_free_submit(*batchp);
> + *batchp = NULL;
> + }
> +
> + put_cpu_var_locked(pte_freelist_cur, cpu);
> }
>
> /**
> Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h 2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h 2008-07-09 21:29:41.000000000 +0530
> @@ -40,18 +40,17 @@
>
> static inline void tlb_flush(struct mmu_gather *tlb)
> {
> - struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
> + struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
>
> /* If there's a TLB batch pending, then we must flush it because the
> * pages are going to be freed and we really don't want to have a CPU
> * access a freed page because it has a stale TLB
> */
> if (tlbbatch->index) {
> - preempt_disable();
> __flush_tlb_pending(tlbbatch);
> - preempt_enable();
> }
>
> + put_cpu_var(ppc64_tlb_batch);
> pte_free_finish();
> }
>
> Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c 2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c 2008-07-09 21:29:41.000000000 +0530
> @@ -124,7 +124,7 @@
> }
> }
>
> -static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
> +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
>
> static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
> long npages, unsigned long uaddr,
> @@ -135,12 +135,13 @@
> u64 *tcep;
> u64 rpn;
> long l, limit;
> + int cpu;
>
> if (npages == 1)
> return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> direction);
>
> - tcep = __get_cpu_var(tce_page);
> + tcep = get_cpu_var_locked(tce_page, &cpu);
>
> /* This is safe to do since interrupts are off when we're called
> * from iommu_alloc{,_sg}()
> @@ -148,10 +149,13 @@
> if (!tcep) {
> tcep = (u64 *)__get_free_page(GFP_ATOMIC);
> /* If allocation fails, fall back to the loop implementation */
> - if (!tcep)
> + if (!tcep) {
> + put_cpu_var_locked(tce_page, cpu);
> return tce_build_pSeriesLP(tbl, tcenum, npages,
> uaddr, direction);
> - __get_cpu_var(tce_page) = tcep;
> + }
> +
> + per_cpu_var_locked(tce_page, cpu) = tcep;
> }
>
> rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
> @@ -188,6 +192,8 @@
> printk("\ttce[0] val = 0x%lx\n", tcep[0]);
> show_stack(current, (unsigned long *)__get_SP());
> }
> +
> + put_cpu_var_locked(tce_page, cpu);
> }
>
> static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
WARNING: multiple messages have this Message-ID (diff)
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
To: Chirag Jog <chirag@linux.vnet.ibm.com>
Cc: linux.kernel@vger.kernel.org, linux-rt-users@vger.kernel.org,
linuxppc-dev@ozlabs.org, Josh Triplett <josht@linux.vnet.ibm.com>,
Steven Rostedt <rostedt@goodmis.org>,
Nivedita Singhvi <niv@us.ibm.com>,
"Timothy R. Chavez" <tim.chavez@linux.vnet.ibm.com>,
paulmck@linux.vnet.ibm.com
Subject: Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
Date: Tue, 15 Jul 2008 11:32:01 +1000 [thread overview]
Message-ID: <1216085521.7740.37.camel@pasglop> (raw)
In-Reply-To: <20080709160543.GG7101@linux.vnet.ibm.com>
On Wed, 2008-07-09 at 21:35 +0530, Chirag Jog wrote:
> Hi,
> This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
> variables are accessed in a preempt unsafe way.
> When a power box with -rt kernel is booted, multiple BUG messages are
> generated "BUG: init:1 task might have lost a preemption check!".
> After booting a kernel with these patches applied, these messages
> don't appear.
>
> Also I ran the realtime tests from ltp to ensure the stability.
That sounds bad tho...
IE. You are changing the code to lock/unlock on all those TLB batching
operations, but seem to miss the core reason why it was done that way:
ie, the code assumes that it will not change CPU -between- those calls,
since the whole stuff should be already have been within a per-cpu
locked section at the caller level.
As for the TCE code, well, it lived on the assumption that the upper
level spinlock did the job of preventing preempt, I suppose that's not
the case anymore. So that part of the patch sounds ok.
Ben.
>
> Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>
> arch/powerpc/mm/tlb_64.c | 31 ++++++++++++++++---------------
> arch/powerpc/platforms/pseries/iommu.c | 14 ++++++++++----
> include/asm-powerpc/tlb.h | 5 ++---
> 3 files changed, 28 insertions(+), 22 deletions(-)
>
>
> Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c 2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c 2008-07-09 21:30:37.000000000 +0530
> @@ -38,7 +38,6 @@
> * include/asm-powerpc/tlb.h file -- tgall
> */
> DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
> unsigned long pte_freelist_forced_free;
>
> struct pte_freelist_batch
> @@ -48,7 +47,7 @@
> pgtable_free_t tables[0];
> };
>
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
> +DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
> unsigned long pte_freelist_forced_free;
>
> #define PTE_FREELIST_SIZE \
> @@ -92,24 +91,21 @@
>
> void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
> {
> - /*
> - * This is safe since tlb_gather_mmu has disabled preemption.
> - * tlb->cpu is set by tlb_gather_mmu as well.
> - */
> + int cpu;
> cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
> - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> + struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>
> if (atomic_read(&tlb->mm->mm_users) < 2 ||
> cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
> pgtable_free(pgf);
> - return;
> + goto cleanup;
> }
>
> if (*batchp == NULL) {
> *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
> if (*batchp == NULL) {
> pgtable_free_now(pgf);
> - return;
> + goto cleanup;
> }
> (*batchp)->index = 0;
> }
> @@ -118,6 +114,9 @@
> pte_free_submit(*batchp);
> *batchp = NULL;
> }
> +
> + cleanup:
> + put_cpu_var_locked(pte_freelist_cur, cpu);
> }
>
> /*
> @@ -253,13 +252,15 @@
>
> void pte_free_finish(void)
> {
> - /* This is safe since tlb_gather_mmu has disabled preemption */
> - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> + int cpu;
> + struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>
> - if (*batchp == NULL)
> - return;
> - pte_free_submit(*batchp);
> - *batchp = NULL;
> + if (*batchp) {
> + pte_free_submit(*batchp);
> + *batchp = NULL;
> + }
> +
> + put_cpu_var_locked(pte_freelist_cur, cpu);
> }
>
> /**
> Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h 2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h 2008-07-09 21:29:41.000000000 +0530
> @@ -40,18 +40,17 @@
>
> static inline void tlb_flush(struct mmu_gather *tlb)
> {
> - struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
> + struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
>
> /* If there's a TLB batch pending, then we must flush it because the
> * pages are going to be freed and we really don't want to have a CPU
> * access a freed page because it has a stale TLB
> */
> if (tlbbatch->index) {
> - preempt_disable();
> __flush_tlb_pending(tlbbatch);
> - preempt_enable();
> }
>
> + put_cpu_var(ppc64_tlb_batch);
> pte_free_finish();
> }
>
> Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c 2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c 2008-07-09 21:29:41.000000000 +0530
> @@ -124,7 +124,7 @@
> }
> }
>
> -static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
> +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
>
> static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
> long npages, unsigned long uaddr,
> @@ -135,12 +135,13 @@
> u64 *tcep;
> u64 rpn;
> long l, limit;
> + int cpu;
>
> if (npages == 1)
> return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> direction);
>
> - tcep = __get_cpu_var(tce_page);
> + tcep = get_cpu_var_locked(tce_page, &cpu);
>
> /* This is safe to do since interrupts are off when we're called
> * from iommu_alloc{,_sg}()
> @@ -148,10 +149,13 @@
> if (!tcep) {
> tcep = (u64 *)__get_free_page(GFP_ATOMIC);
> /* If allocation fails, fall back to the loop implementation */
> - if (!tcep)
> + if (!tcep) {
> + put_cpu_var_locked(tce_page, cpu);
> return tce_build_pSeriesLP(tbl, tcenum, npages,
> uaddr, direction);
> - __get_cpu_var(tce_page) = tcep;
> + }
> +
> + per_cpu_var_locked(tce_page, cpu) = tcep;
> }
>
> rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
> @@ -188,6 +192,8 @@
> printk("\ttce[0] val = 0x%lx\n", tcep[0]);
> show_stack(current, (unsigned long *)__get_SP());
> }
> +
> + put_cpu_var_locked(tce_page, cpu);
> }
>
> static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
next prev parent reply other threads:[~2008-07-15 1:34 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-07-09 16:05 [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables Chirag Jog
2008-07-09 16:05 ` Chirag Jog
2008-07-11 8:19 ` Sebastien Dugue
2008-07-11 8:19 ` Sebastien Dugue
2008-07-15 1:32 ` Benjamin Herrenschmidt [this message]
2008-07-15 1:32 ` Benjamin Herrenschmidt
2008-07-17 12:56 ` Chirag Jog
2008-07-17 12:56 ` Chirag Jog
2008-07-17 20:14 ` Benjamin Herrenschmidt
2008-07-17 20:14 ` Benjamin Herrenschmidt
2008-07-18 10:11 ` Chirag Jog
2008-07-18 10:11 ` Chirag Jog
2008-07-18 22:05 ` Benjamin Herrenschmidt
2008-07-18 22:05 ` Benjamin Herrenschmidt
2008-07-19 1:26 ` Steven Rostedt
2008-07-19 1:26 ` Steven Rostedt
2008-07-19 3:53 ` Benjamin Herrenschmidt
2008-07-19 3:53 ` Benjamin Herrenschmidt
2008-07-21 10:23 ` Chirag Jog
-- strict thread matches above, loose matches on Subject: below --
2008-07-09 15:33 Chirag Jog
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1216085521.7740.37.camel@pasglop \
--to=benh@kernel.crashing.org \
--cc=chirag@linux.vnet.ibm.com \
--cc=josht@linux.vnet.ibm.com \
--cc=linux-rt-users@vger.kernel.org \
--cc=linux.kernel@vger.kernel.org \
--cc=linuxppc-dev@ozlabs.org \
--cc=niv@us.ibm.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=rostedt@goodmis.org \
--cc=tim.chavez@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.