linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
@ 2008-07-09 15:33 Chirag Jog
  0 siblings, 0 replies; 11+ messages in thread
From: Chirag Jog @ 2008-07-09 15:33 UTC (permalink / raw)
  To: linux.kernel, linux-rt-users, linuxppc-dev
  Cc: Timothy R. Chavez, Nivedita Singhvi, paulmck


Hi,
This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
variables are accessed in a preempt unsafe way.
When a power box with -rt kernel is booted, multiple BUG messages are
generated "BUG: init:1 task might have lost a preemption check!".
After booting a kernel with these patches applied, these messages
don't appear.

Also I ran the realtime tests from ltp to ensure the stability.


Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>

arch/powerpc/mm/tlb_64.c               |   29 ++++++++++++++++-------------
arch/powerpc/platforms/pseries/iommu.c |   14 ++++++++++----
include/asm-powerpc/tlb.h              |    5 ++---
3 files changed, 28 insertions(+), 20 deletions(-)

        
Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c	2008-07-07 13:13:59.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c	2008-07-09 20:57:01.000000000 +0530
@@ -38,7 +38,6 @@
  * include/asm-powerpc/tlb.h file -- tgall
  */
 DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
 struct pte_freelist_batch
@@ -48,7 +47,7 @@
 	pgtable_free_t	tables[0];
 };
 
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
 #define PTE_FREELIST_SIZE \
@@ -92,16 +91,14 @@
 
 void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
 {
-	/*
-	 * This is safe since tlb_gather_mmu has disabled preemption.
-	 * tlb->cpu is set by tlb_gather_mmu as well.
-	 */
+	int cpu;
         cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
 		pgtable_free(pgf);
+		goto cleanup;
 		return;
 	}
 
@@ -109,6 +106,7 @@
 		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
 		if (*batchp == NULL) {
 			pgtable_free_now(pgf);
+			goto cleanup;
 			return;
 		}
 		(*batchp)->index = 0;
@@ -118,6 +116,9 @@
 		pte_free_submit(*batchp);
 		*batchp = NULL;
 	}
+
+ cleanup:
+	put_cpu_var_locked(pte_freelist_cur, cpu);
 }
 
 /*
@@ -253,13 +254,15 @@
 
 void pte_free_finish(void)
 {
-	/* This is safe since tlb_gather_mmu has disabled preemption */
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	int cpu;
+	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
 
-	if (*batchp == NULL)
-		return;
-	pte_free_submit(*batchp);
-	*batchp = NULL;
+	if (*batchp) {
+		pte_free_submit(*batchp);
+		*batchp = NULL;
+	}
+
+	put_cpu_var_locked(pte_freelist_cur, cpu);
 }
 
 /**
Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
===================================================================
--- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h	2008-07-07 22:58:37.000000000 +0530
+++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h	2008-07-09 10:22:51.000000000 +0530
@@ -40,18 +40,17 @@
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
 
 	/* If there's a TLB batch pending, then we must flush it because the
 	 * pages are going to be freed and we really don't want to have a CPU
 	 * access a freed page because it has a stale TLB
 	 */
 	if (tlbbatch->index) {
-		preempt_disable();
 		__flush_tlb_pending(tlbbatch);
-		preempt_enable();
 	}
 
+	put_cpu_var(ppc64_tlb_batch);
 	pte_free_finish();
 }
 
Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c	2008-07-07 23:16:29.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 10:49:21.000000000 +0530
@@ -124,7 +124,7 @@
 	}
 }
 
-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
+static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
 
 static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
@@ -135,12 +135,13 @@
 	u64 *tcep;
 	u64 rpn;
 	long l, limit;
+	int cpu;
 
 	if (npages == 1)
 		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 					   direction);
 
-	tcep = __get_cpu_var(tce_page);
+	tcep = get_cpu_var_locked(tce_page, &cpu);
 
 	/* This is safe to do since interrupts are off when we're called
 	 * from iommu_alloc{,_sg}()
@@ -148,10 +149,13 @@
 	if (!tcep) {
 		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
 		/* If allocation fails, fall back to the loop implementation */
-		if (!tcep)
+		if (!tcep) {
+			put_cpu_var_locked(tce_page, cpu);
 			return tce_build_pSeriesLP(tbl, tcenum, npages,
 						   uaddr, direction);
-		__get_cpu_var(tce_page) = tcep;
+		}
+
+		per_cpu_var_locked(tce_page, cpu) = tcep;
 	}
 
 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
@@ -188,6 +192,8 @@
 		printk("\ttce[0] val = 0x%lx\n", tcep[0]);
 		show_stack(current, (unsigned long *)__get_SP());
 	}
+
+	put_cpu_var_locked(tce_page, cpu);
 }
 
 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
@ 2008-07-09 16:05 Chirag Jog
  2008-07-11  8:19 ` Sebastien Dugue
  2008-07-15  1:32 ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 11+ messages in thread
From: Chirag Jog @ 2008-07-09 16:05 UTC (permalink / raw)
  To: linux.kernel, linux-rt-users, linuxppc-dev
  Cc: Josh Triplett, Steven Rostedt, Nivedita Singhvi,
	Timothy R. Chavez, paulmck


Hi,
This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
variables are accessed in a preempt unsafe way.
When a power box with -rt kernel is booted, multiple BUG messages are
generated "BUG: init:1 task might have lost a preemption check!".
After booting a kernel with these patches applied, these messages
don't appear.

Also I ran the realtime tests from ltp to ensure the stability.


Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>
arch/powerpc/mm/tlb_64.c               |   31 ++++++++++++++++---------------
arch/powerpc/platforms/pseries/iommu.c |   14 ++++++++++----
include/asm-powerpc/tlb.h              |    5 ++---
3 files changed, 28 insertions(+), 22 deletions(-)

 
Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c	2008-07-09 21:29:21.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c	2008-07-09 21:30:37.000000000 +0530
@@ -38,7 +38,6 @@
  * include/asm-powerpc/tlb.h file -- tgall
  */
 DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
 struct pte_freelist_batch
@@ -48,7 +47,7 @@
 	pgtable_free_t	tables[0];
 };
 
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
 #define PTE_FREELIST_SIZE \
@@ -92,24 +91,21 @@
 
 void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
 {
-	/*
-	 * This is safe since tlb_gather_mmu has disabled preemption.
-	 * tlb->cpu is set by tlb_gather_mmu as well.
-	 */
+	int cpu;
         cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
 		pgtable_free(pgf);
-		return;
+		goto cleanup;
 	}
 
 	if (*batchp == NULL) {
 		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
 		if (*batchp == NULL) {
 			pgtable_free_now(pgf);
-			return;
+			goto cleanup;
 		}
 		(*batchp)->index = 0;
 	}
@@ -118,6 +114,9 @@
 		pte_free_submit(*batchp);
 		*batchp = NULL;
 	}
+
+ cleanup:
+	put_cpu_var_locked(pte_freelist_cur, cpu);
 }
 
 /*
@@ -253,13 +252,15 @@
 
 void pte_free_finish(void)
 {
-	/* This is safe since tlb_gather_mmu has disabled preemption */
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	int cpu;
+	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
 
-	if (*batchp == NULL)
-		return;
-	pte_free_submit(*batchp);
-	*batchp = NULL;
+	if (*batchp) {
+		pte_free_submit(*batchp);
+		*batchp = NULL;
+	}
+
+	put_cpu_var_locked(pte_freelist_cur, cpu);
 }
 
 /**
Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
===================================================================
--- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h	2008-07-09 21:29:21.000000000 +0530
+++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h	2008-07-09 21:29:41.000000000 +0530
@@ -40,18 +40,17 @@
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
 
 	/* If there's a TLB batch pending, then we must flush it because the
 	 * pages are going to be freed and we really don't want to have a CPU
 	 * access a freed page because it has a stale TLB
 	 */
 	if (tlbbatch->index) {
-		preempt_disable();
 		__flush_tlb_pending(tlbbatch);
-		preempt_enable();
 	}
 
+	put_cpu_var(ppc64_tlb_batch);
 	pte_free_finish();
 }
 
Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 21:29:21.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 21:29:41.000000000 +0530
@@ -124,7 +124,7 @@
 	}
 }
 
-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
+static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
 
 static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
@@ -135,12 +135,13 @@
 	u64 *tcep;
 	u64 rpn;
 	long l, limit;
+	int cpu;
 
 	if (npages == 1)
 		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 					   direction);
 
-	tcep = __get_cpu_var(tce_page);
+	tcep = get_cpu_var_locked(tce_page, &cpu);
 
 	/* This is safe to do since interrupts are off when we're called
 	 * from iommu_alloc{,_sg}()
@@ -148,10 +149,13 @@
 	if (!tcep) {
 		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
 		/* If allocation fails, fall back to the loop implementation */
-		if (!tcep)
+		if (!tcep) {
+			put_cpu_var_locked(tce_page, cpu);
 			return tce_build_pSeriesLP(tbl, tcenum, npages,
 						   uaddr, direction);
-		__get_cpu_var(tce_page) = tcep;
+		}
+
+		per_cpu_var_locked(tce_page, cpu) = tcep;
 	}
 
 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
@@ -188,6 +192,8 @@
 		printk("\ttce[0] val = 0x%lx\n", tcep[0]);
 		show_stack(current, (unsigned long *)__get_SP());
 	}
+
+	put_cpu_var_locked(tce_page, cpu);
 }
 
 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-09 16:05 [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables Chirag Jog
@ 2008-07-11  8:19 ` Sebastien Dugue
  2008-07-15  1:32 ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 11+ messages in thread
From: Sebastien Dugue @ 2008-07-11  8:19 UTC (permalink / raw)
  To: Chirag Jog
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R.  Chavez, paulmck, linux.kernel


  Hi Chirag,

On Wed, 9 Jul 2008 21:35:43 +0530 Chirag Jog <chirag@linux.vnet.ibm.com> wrote:

> 
> Hi,
> This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
> variables are accessed in a preempt unsafe way.
> When a power box with -rt kernel is booted, multiple BUG messages are
> generated "BUG: init:1 task might have lost a preemption check!".
> After booting a kernel with these patches applied, these messages
> don't appear.

  That does indeed greatly reduce BUGs display. Good. Thanks.

  Tested-by: Sebastien Dugue <sebastien.dugue@bull.net>

  Sebastien.

> 
> Also I ran the realtime tests from ltp to ensure the stability.
> 
> 
> Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>
> arch/powerpc/mm/tlb_64.c               |   31 ++++++++++++++++---------------
> arch/powerpc/platforms/pseries/iommu.c |   14 ++++++++++----
> include/asm-powerpc/tlb.h              |    5 ++---
> 3 files changed, 28 insertions(+), 22 deletions(-)
> 
>  
> Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c	2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c	2008-07-09 21:30:37.000000000 +0530
> @@ -38,7 +38,6 @@
>   * include/asm-powerpc/tlb.h file -- tgall
>   */
>  DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
>  unsigned long pte_freelist_forced_free;
>  
>  struct pte_freelist_batch
> @@ -48,7 +47,7 @@
>  	pgtable_free_t	tables[0];
>  };
>  
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
> +DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
>  unsigned long pte_freelist_forced_free;
>  
>  #define PTE_FREELIST_SIZE \
> @@ -92,24 +91,21 @@
>  
>  void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
>  {
> -	/*
> -	 * This is safe since tlb_gather_mmu has disabled preemption.
> -	 * tlb->cpu is set by tlb_gather_mmu as well.
> -	 */
> +	int cpu;
>          cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
> -	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> +	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>  
>  	if (atomic_read(&tlb->mm->mm_users) < 2 ||
>  	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
>  		pgtable_free(pgf);
> -		return;
> +		goto cleanup;
>  	}
>  
>  	if (*batchp == NULL) {
>  		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
>  		if (*batchp == NULL) {
>  			pgtable_free_now(pgf);
> -			return;
> +			goto cleanup;
>  		}
>  		(*batchp)->index = 0;
>  	}
> @@ -118,6 +114,9 @@
>  		pte_free_submit(*batchp);
>  		*batchp = NULL;
>  	}
> +
> + cleanup:
> +	put_cpu_var_locked(pte_freelist_cur, cpu);
>  }
>  
>  /*
> @@ -253,13 +252,15 @@
>  
>  void pte_free_finish(void)
>  {
> -	/* This is safe since tlb_gather_mmu has disabled preemption */
> -	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> +	int cpu;
> +	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>  
> -	if (*batchp == NULL)
> -		return;
> -	pte_free_submit(*batchp);
> -	*batchp = NULL;
> +	if (*batchp) {
> +		pte_free_submit(*batchp);
> +		*batchp = NULL;
> +	}
> +
> +	put_cpu_var_locked(pte_freelist_cur, cpu);
>  }
>  
>  /**
> Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h	2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h	2008-07-09 21:29:41.000000000 +0530
> @@ -40,18 +40,17 @@
>  
>  static inline void tlb_flush(struct mmu_gather *tlb)
>  {
> -	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
> +	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
>  
>  	/* If there's a TLB batch pending, then we must flush it because the
>  	 * pages are going to be freed and we really don't want to have a CPU
>  	 * access a freed page because it has a stale TLB
>  	 */
>  	if (tlbbatch->index) {
> -		preempt_disable();
>  		__flush_tlb_pending(tlbbatch);
> -		preempt_enable();
>  	}
>  
> +	put_cpu_var(ppc64_tlb_batch);
>  	pte_free_finish();
>  }
>  
> Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 21:29:41.000000000 +0530
> @@ -124,7 +124,7 @@
>  	}
>  }
>  
> -static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
> +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
>  
>  static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  				     long npages, unsigned long uaddr,
> @@ -135,12 +135,13 @@
>  	u64 *tcep;
>  	u64 rpn;
>  	long l, limit;
> +	int cpu;
>  
>  	if (npages == 1)
>  		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
>  					   direction);
>  
> -	tcep = __get_cpu_var(tce_page);
> +	tcep = get_cpu_var_locked(tce_page, &cpu);
>  
>  	/* This is safe to do since interrupts are off when we're called
>  	 * from iommu_alloc{,_sg}()
> @@ -148,10 +149,13 @@
>  	if (!tcep) {
>  		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
>  		/* If allocation fails, fall back to the loop implementation */
> -		if (!tcep)
> +		if (!tcep) {
> +			put_cpu_var_locked(tce_page, cpu);
>  			return tce_build_pSeriesLP(tbl, tcenum, npages,
>  						   uaddr, direction);
> -		__get_cpu_var(tce_page) = tcep;
> +		}
> +
> +		per_cpu_var_locked(tce_page, cpu) = tcep;
>  	}
>  
>  	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
> @@ -188,6 +192,8 @@
>  		printk("\ttce[0] val = 0x%lx\n", tcep[0]);
>  		show_stack(current, (unsigned long *)__get_SP());
>  	}
> +
> +	put_cpu_var_locked(tce_page, cpu);
>  }
>  
>  static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-09 16:05 [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables Chirag Jog
  2008-07-11  8:19 ` Sebastien Dugue
@ 2008-07-15  1:32 ` Benjamin Herrenschmidt
  2008-07-17 12:56   ` Chirag Jog
  1 sibling, 1 reply; 11+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-15  1:32 UTC (permalink / raw)
  To: Chirag Jog
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R. Chavez, paulmck, linux.kernel

On Wed, 2008-07-09 at 21:35 +0530, Chirag Jog wrote:
> Hi,
> This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
> variables are accessed in a preempt unsafe way.
> When a power box with -rt kernel is booted, multiple BUG messages are
> generated "BUG: init:1 task might have lost a preemption check!".
> After booting a kernel with these patches applied, these messages
> don't appear.
> 
> Also I ran the realtime tests from ltp to ensure the stability.

That sounds bad tho...

IE. You are changing the code to lock/unlock on all those TLB batching
operations, but seem to miss the core reason why it was done that way:
ie, the code assumes that it will not change CPU -between- those calls,
since the whole stuff should be already have been within a per-cpu
locked section at the caller level.

As for the TCE code, well, it lived on the assumption that the upper
level spinlock did the job of preventing preempt, I suppose that's not
the case anymore. So that part of the patch sounds ok.

Ben.

> 
> Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>
> arch/powerpc/mm/tlb_64.c               |   31 ++++++++++++++++---------------
> arch/powerpc/platforms/pseries/iommu.c |   14 ++++++++++----
> include/asm-powerpc/tlb.h              |    5 ++---
> 3 files changed, 28 insertions(+), 22 deletions(-)
> 
>  
> Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c	2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c	2008-07-09 21:30:37.000000000 +0530
> @@ -38,7 +38,6 @@
>   * include/asm-powerpc/tlb.h file -- tgall
>   */
>  DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
>  unsigned long pte_freelist_forced_free;
>  
>  struct pte_freelist_batch
> @@ -48,7 +47,7 @@
>  	pgtable_free_t	tables[0];
>  };
>  
> -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
> +DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
>  unsigned long pte_freelist_forced_free;
>  
>  #define PTE_FREELIST_SIZE \
> @@ -92,24 +91,21 @@
>  
>  void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
>  {
> -	/*
> -	 * This is safe since tlb_gather_mmu has disabled preemption.
> -	 * tlb->cpu is set by tlb_gather_mmu as well.
> -	 */
> +	int cpu;
>          cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
> -	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> +	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>  
>  	if (atomic_read(&tlb->mm->mm_users) < 2 ||
>  	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
>  		pgtable_free(pgf);
> -		return;
> +		goto cleanup;
>  	}
>  
>  	if (*batchp == NULL) {
>  		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
>  		if (*batchp == NULL) {
>  			pgtable_free_now(pgf);
> -			return;
> +			goto cleanup;
>  		}
>  		(*batchp)->index = 0;
>  	}
> @@ -118,6 +114,9 @@
>  		pte_free_submit(*batchp);
>  		*batchp = NULL;
>  	}
> +
> + cleanup:
> +	put_cpu_var_locked(pte_freelist_cur, cpu);
>  }
>  
>  /*
> @@ -253,13 +252,15 @@
>  
>  void pte_free_finish(void)
>  {
> -	/* This is safe since tlb_gather_mmu has disabled preemption */
> -	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
> +	int cpu;
> +	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
>  
> -	if (*batchp == NULL)
> -		return;
> -	pte_free_submit(*batchp);
> -	*batchp = NULL;
> +	if (*batchp) {
> +		pte_free_submit(*batchp);
> +		*batchp = NULL;
> +	}
> +
> +	put_cpu_var_locked(pte_freelist_cur, cpu);
>  }
>  
>  /**
> Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h	2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h	2008-07-09 21:29:41.000000000 +0530
> @@ -40,18 +40,17 @@
>  
>  static inline void tlb_flush(struct mmu_gather *tlb)
>  {
> -	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
> +	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
>  
>  	/* If there's a TLB batch pending, then we must flush it because the
>  	 * pages are going to be freed and we really don't want to have a CPU
>  	 * access a freed page because it has a stale TLB
>  	 */
>  	if (tlbbatch->index) {
> -		preempt_disable();
>  		__flush_tlb_pending(tlbbatch);
> -		preempt_enable();
>  	}
>  
> +	put_cpu_var(ppc64_tlb_batch);
>  	pte_free_finish();
>  }
>  
> Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
> ===================================================================
> --- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 21:29:21.000000000 +0530
> +++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c	2008-07-09 21:29:41.000000000 +0530
> @@ -124,7 +124,7 @@
>  	}
>  }
>  
> -static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
> +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
>  
>  static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
>  				     long npages, unsigned long uaddr,
> @@ -135,12 +135,13 @@
>  	u64 *tcep;
>  	u64 rpn;
>  	long l, limit;
> +	int cpu;
>  
>  	if (npages == 1)
>  		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
>  					   direction);
>  
> -	tcep = __get_cpu_var(tce_page);
> +	tcep = get_cpu_var_locked(tce_page, &cpu);
>  
>  	/* This is safe to do since interrupts are off when we're called
>  	 * from iommu_alloc{,_sg}()
> @@ -148,10 +149,13 @@
>  	if (!tcep) {
>  		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
>  		/* If allocation fails, fall back to the loop implementation */
> -		if (!tcep)
> +		if (!tcep) {
> +			put_cpu_var_locked(tce_page, cpu);
>  			return tce_build_pSeriesLP(tbl, tcenum, npages,
>  						   uaddr, direction);
> -		__get_cpu_var(tce_page) = tcep;
> +		}
> +
> +		per_cpu_var_locked(tce_page, cpu) = tcep;
>  	}
>  
>  	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
> @@ -188,6 +192,8 @@
>  		printk("\ttce[0] val = 0x%lx\n", tcep[0]);
>  		show_stack(current, (unsigned long *)__get_SP());
>  	}
> +
> +	put_cpu_var_locked(tce_page, cpu);
>  }
>  
>  static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-15  1:32 ` Benjamin Herrenschmidt
@ 2008-07-17 12:56   ` Chirag Jog
  2008-07-17 20:14     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 11+ messages in thread
From: Chirag Jog @ 2008-07-17 12:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel

Hi Benjamin,
   Thanks for the review
* Benjamin Herrenschmidt <benh@kernel.crashing.org> [2008-07-15 11:32:01]:

> On Wed, 2008-07-09 at 21:35 +0530, Chirag Jog wrote:
> > Hi,
> > This patch fixes various paths in the -rt kernel on powerpc64 where per_cpu
> > variables are accessed in a preempt unsafe way.
> > When a power box with -rt kernel is booted, multiple BUG messages are
> > generated "BUG: init:1 task might have lost a preemption check!".
> > After booting a kernel with these patches applied, these messages
> > don't appear.
> > 
> > Also I ran the realtime tests from ltp to ensure the stability.
> 
> That sounds bad tho...
> 
> IE. You are changing the code to lock/unlock on all those TLB batching
> operations, but seem to miss the core reason why it was done that way:
> ie, the code assumes that it will not change CPU -between- those calls,
> since the whole stuff should be already have been within a per-cpu
> locked section at the caller level.
> 
All these operations are done assuming that tlb_gather_mmu disables
preemption and tlb_finish_mmu enables preemption again.
This is not true for -rt.
For x86, none of the code paths between tlb_gather_mmu and
tlb_finish_mmu access any per_cpu variables.
But this is not true for powerpc64 as we can see.

One way could be to make tlb_gather_mmu disable preemption as it does
in mainline but only for powerpc.
Although i am not sure, if this is the right step ahead.

I am attaching a patch below for the same.
I have left out the tce bits, as they are fine.

Note: I haven't extensively tested the patch
                    
- Thanks, 
    Chirag


Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c	2008-07-17 16:51:31.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c	2008-07-17 16:51:33.000000000 +0530
@@ -37,7 +37,7 @@
 /* This is declared as we are using the more or less generic
  * include/asm-powerpc/tlb.h file -- tgall
  */
-DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
@@ -96,7 +96,7 @@
 	 * This is safe since tlb_gather_mmu has disabled preemption.
 	 * tlb->cpu is set by tlb_gather_mmu as well.
 	 */
-        cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
+        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
 	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
Index: linux-2.6.25.8-rt7/arch/powerpc/mm/init_32.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/init_32.c	2008-07-17 16:51:31.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/mm/init_32.c	2008-07-17 16:51:33.000000000 +0530
@@ -54,7 +54,7 @@
 #endif
 #define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
 
-DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 unsigned long total_memory;
 unsigned long total_lowmem;
Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
===================================================================
--- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h	2008-07-17 16:51:31.000000000 +0530
+++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h	2008-07-17 16:51:33.000000000 +0530
@@ -46,11 +46,8 @@
 	 * pages are going to be freed and we really don't want to have a CPU
 	 * access a freed page because it has a stale TLB
 	 */
-	if (tlbbatch->index) {
-		preempt_disable();
+	if (tlbbatch->index)
 		__flush_tlb_pending(tlbbatch);
-		preempt_enable();
-	}
 
 	pte_free_finish();
 }
Index: linux-2.6.25.8-rt7/include/asm-generic/tlb.h
===================================================================
--- linux-2.6.25.8-rt7.orig/include/asm-generic/tlb.h	2008-07-17 16:51:31.000000000 +0530
+++ linux-2.6.25.8-rt7/include/asm-generic/tlb.h	2008-07-17 17:33:02.000000000 +0530
@@ -41,23 +41,32 @@
 	unsigned int		nr;	/* set to ~0U means fast mode */
 	unsigned int		need_flush;/* Really unmapped some ptes? */
 	unsigned int		fullmm; /* non-zero means full mm flush */
+#if !defined(__powerpc64__)
 	int			cpu;
+#endif
 	struct page *		pages[FREE_PTE_NR];
 };
 
 /* Users of the generic TLB shootdown code must declare this storage space. */
-DECLARE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
-
+#if !defined(__powerpc64__)
+	DECLARE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
+#else
+	DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+#endif
 /* tlb_gather_mmu
  *	Return a pointer to an initialized struct mmu_gather.
  */
 static inline struct mmu_gather *
 tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
 {
-	int cpu;
-	struct mmu_gather *tlb = &get_cpu_var_locked(mmu_gathers, &cpu);
 
-	tlb->cpu = cpu;
+#if !defined(__powerpc64__)
+		int cpu;
+		struct mmu_gather *tlb = &get_cpu_var_locked(mmu_gathers, &cpu);
+		tlb->cpu = cpu;
+#else
+		struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+#endif
 	tlb->mm = mm;
 
 	/* Use fast mode if only one CPU is online */
@@ -93,7 +102,11 @@
 	/* keep the page table cache within bounds */
 	check_pgt_cache();
 
-	put_cpu_var_locked(mmu_gathers, tlb->cpu);
+#if !defined(__powerpc64__)
+		put_cpu_var_locked(mmu_gathers, tlb->cpu);
+#else
+		put_cpu_var(mmu_gathers);
+#endif
 }
 
 /* tlb_remove_page

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-17 12:56   ` Chirag Jog
@ 2008-07-17 20:14     ` Benjamin Herrenschmidt
  2008-07-18 10:11       ` Chirag Jog
  0 siblings, 1 reply; 11+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-17 20:14 UTC (permalink / raw)
  To: Chirag Jog
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel


> All these operations are done assuming that tlb_gather_mmu disables
> preemption and tlb_finish_mmu enables preemption again.
> This is not true for -rt.
> For x86, none of the code paths between tlb_gather_mmu and
> tlb_finish_mmu access any per_cpu variables.
> But this is not true for powerpc64 as we can see.
> 
> One way could be to make tlb_gather_mmu disable preemption as it does
> in mainline but only for powerpc.
> Although i am not sure, if this is the right step ahead.
> 
> I am attaching a patch below for the same.
> I have left out the tce bits, as they are fine.
> 
> Note: I haven't extensively tested the patch

A better option is to make sure that a context switch does the right
thing, flushing the pending batch. I think that's already the case,
which means that your original patch may work, but that needs to
be double-checked and commented properly.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-17 20:14     ` Benjamin Herrenschmidt
@ 2008-07-18 10:11       ` Chirag Jog
  2008-07-18 22:05         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 11+ messages in thread
From: Chirag Jog @ 2008-07-18 10:11 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel


* Benjamin Herrenschmidt <benh@kernel.crashing.org> [2008-07-18 06:14:31]:

> 
> > All these operations are done assuming that tlb_gather_mmu disables
> > preemption and tlb_finish_mmu enables preemption again.
> > This is not true for -rt.
> > For x86, none of the code paths between tlb_gather_mmu and
> > tlb_finish_mmu access any per_cpu variables.
> > But this is not true for powerpc64 as we can see.
> > 
> > One way could be to make tlb_gather_mmu disable preemption as it does
> > in mainline but only for powerpc.
> > Although i am not sure, if this is the right step ahead.
> > 
> > I am attaching a patch below for the same.
> > I have left out the tce bits, as they are fine.
> > 
> > Note: I haven't extensively tested the patch
> 
> A better option is to make sure that a context switch does the right
> thing, flushing the pending batch. I think that's already the case,
> which means that your original patch may work, but that needs to
> be double-checked and commented properly.
> 
With the original patch, the pending batch does get flushed 
in a non-preemptable region. 
I am resending the original with just adding the necesary comments.

-Thanks,
        Chirag



Signed-Off-By: Chirag <chirag@linux.vnet.ibm.com>

Index: linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/mm/tlb_64.c	2008-07-18 10:08:00.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/mm/tlb_64.c	2008-07-18 10:09:54.000000000 +0530
@@ -38,7 +38,6 @@
  * include/asm-powerpc/tlb.h file -- tgall
  */
 DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
 struct pte_freelist_batch
@@ -48,7 +47,7 @@
 	pgtable_free_t	tables[0];
 };
 
-DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+DEFINE_PER_CPU_LOCKED(struct pte_freelist_batch *, pte_freelist_cur);
 unsigned long pte_freelist_forced_free;
 
 #define PTE_FREELIST_SIZE \
@@ -92,24 +91,21 @@
 
 void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
 {
-	/*
-	 * This is safe since tlb_gather_mmu has disabled preemption.
-	 * tlb->cpu is set by tlb_gather_mmu as well.
-	 */
+	int cpu;
         cpumask_t local_cpumask = cpumask_of_cpu(tlb->cpu);
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
 		pgtable_free(pgf);
-		return;
+		goto cleanup;
 	}
 
 	if (*batchp == NULL) {
 		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
 		if (*batchp == NULL) {
 			pgtable_free_now(pgf);
-			return;
+			goto cleanup;
 		}
 		(*batchp)->index = 0;
 	}
@@ -118,6 +114,9 @@
 		pte_free_submit(*batchp);
 		*batchp = NULL;
 	}
+
+ cleanup:
+	put_cpu_var_locked(pte_freelist_cur, cpu);
 }
 
 /*
@@ -253,13 +252,15 @@
 
 void pte_free_finish(void)
 {
-	/* This is safe since tlb_gather_mmu has disabled preemption */
-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
+	int cpu;
+	struct pte_freelist_batch **batchp = &get_cpu_var_locked(pte_freelist_cur, &cpu);
 
-	if (*batchp == NULL)
-		return;
-	pte_free_submit(*batchp);
-	*batchp = NULL;
+	if (*batchp) {
+		pte_free_submit(*batchp);
+		*batchp = NULL;
+	}
+
+	put_cpu_var_locked(pte_freelist_cur, cpu);
 }
 
 /**
Index: linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h
===================================================================
--- linux-2.6.25.8-rt7.orig/include/asm-powerpc/tlb.h	2008-07-18 10:08:00.000000000 +0530
+++ linux-2.6.25.8-rt7/include/asm-powerpc/tlb.h	2008-07-18 10:31:02.000000000 +0530
@@ -40,18 +40,20 @@
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
+	/* Disable preemption to ensure the pending TLB batch is flushed
+	 * before a potential context switch
+	 */
+	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
 
 	/* If there's a TLB batch pending, then we must flush it because the
 	 * pages are going to be freed and we really don't want to have a CPU
 	 * access a freed page because it has a stale TLB
 	 */
 	if (tlbbatch->index) {
-		preempt_disable();
 		__flush_tlb_pending(tlbbatch);
-		preempt_enable();
 	}
 
+	put_cpu_var(ppc64_tlb_batch);
 	pte_free_finish();
 }
 
Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/iommu.c	2008-07-17 14:47:30.000000000 +0530
+++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/iommu.c	2008-07-18 10:09:54.000000000 +0530
@@ -124,7 +124,7 @@
 	}
 }
 
-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
+static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
 
 static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 				     long npages, unsigned long uaddr,
@@ -135,12 +135,13 @@
 	u64 *tcep;
 	u64 rpn;
 	long l, limit;
+	int cpu;
 
 	if (npages == 1)
 		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 					   direction);
 
-	tcep = __get_cpu_var(tce_page);
+	tcep = get_cpu_var_locked(tce_page, &cpu);
 
 	/* This is safe to do since interrupts are off when we're called
 	 * from iommu_alloc{,_sg}()
@@ -148,10 +149,13 @@
 	if (!tcep) {
 		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
 		/* If allocation fails, fall back to the loop implementation */
-		if (!tcep)
+		if (!tcep) {
+			put_cpu_var_locked(tce_page, cpu);
 			return tce_build_pSeriesLP(tbl, tcenum, npages,
 						   uaddr, direction);
-		__get_cpu_var(tce_page) = tcep;
+		}
+
+		per_cpu_var_locked(tce_page, cpu) = tcep;
 	}
 
 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
@@ -188,6 +192,8 @@
 		printk("\ttce[0] val = 0x%lx\n", tcep[0]);
 		show_stack(current, (unsigned long *)__get_SP());
 	}
+
+	put_cpu_var_locked(tce_page, cpu);
 }
 
 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-18 10:11       ` Chirag Jog
@ 2008-07-18 22:05         ` Benjamin Herrenschmidt
  2008-07-19  1:26           ` Steven Rostedt
  2008-07-21 10:23           ` Chirag Jog
  0 siblings, 2 replies; 11+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-18 22:05 UTC (permalink / raw)
  To: Chirag Jog
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel


> With the original patch, the pending batch does get flushed 
> in a non-preemptable region. 
> I am resending the original with just adding the necesary comments.

Your comment isn't what I meant. What I meant is that if the process
is context switched while walking the page tables, the low level powerpc
context switch code should also perform a __flush_tlb_pending.

BTW. Is the pte_lock also not a real spinlock anymore ? That may break
other assumptions the powerpc mm code is doing.

This -rt stuff is just too scary, it changes some fundamental semantics
of the spinlocks. yuck.

Ben.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-18 22:05         ` Benjamin Herrenschmidt
@ 2008-07-19  1:26           ` Steven Rostedt
  2008-07-19  3:53             ` Benjamin Herrenschmidt
  2008-07-21 10:23           ` Chirag Jog
  1 sibling, 1 reply; 11+ messages in thread
From: Steven Rostedt @ 2008-07-19  1:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-rt-users, Josh Triplett, linuxppc-dev, Nivedita Singhvi,
	Chirag Jog, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel


On Sat, 19 Jul 2008, Benjamin Herrenschmidt wrote:
>
> > With the original patch, the pending batch does get flushed
> > in a non-preemptable region.
> > I am resending the original with just adding the necesary comments.
>
> Your comment isn't what I meant. What I meant is that if the process
> is context switched while walking the page tables, the low level powerpc
> context switch code should also perform a =EF=BB=BF__flush_tlb_pending.
>
> BTW. Is the pte_lock also not a real spinlock anymore ? That may break
> other assumptions the powerpc mm code is doing.
>
> This -rt stuff is just too scary, it changes some fundamental semantics
> of the spinlocks. yuck.

There's lots of semantics that are changed with -rt that should make
everything still work ;-)  Some spinlocks remain real spinlocks, but we
shouldn't have a problem with most being mutexes.

There's some cases that uses per CPU variables or other per cpu actions
that require a special CPU_LOCK that protects the data in a preemption
mode. The slab.c code in -rt handles this.

-- Steve

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-19  1:26           ` Steven Rostedt
@ 2008-07-19  3:53             ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 11+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-19  3:53 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-rt-users, Josh Triplett, linuxppc-dev, Nivedita Singhvi,
	Chirag Jog, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel


> There's lots of semantics that are changed with -rt that should make
> everything still work ;-)  Some spinlocks remain real spinlocks, but we
> shouldn't have a problem with most being mutexes.
> 
> There's some cases that uses per CPU variables or other per cpu actions
> that require a special CPU_LOCK that protects the data in a preemption
> mode. The slab.c code in -rt handles this.

Well, there is at least in my case a whole class of code that assumes
that because the whole thing happens within a spinlock section at the
toplevel, it could not only access per_cpu variables using the
__variants, that's easy, but it also assumes that it can add things bit
by bit as it gets called at the lower level to that per-cpu cache. It's
not actually prepared for possibly migrating to another CPU right in the
middle. 

I need to review that stuff a bit. I think we fixed some of that at one
point, and we made sure that the context switch itself would flush
pending MMU batches, so it -may- be fine in that specific case.

Cheers,
Ben. 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables
  2008-07-18 22:05         ` Benjamin Herrenschmidt
  2008-07-19  1:26           ` Steven Rostedt
@ 2008-07-21 10:23           ` Chirag Jog
  1 sibling, 0 replies; 11+ messages in thread
From: Chirag Jog @ 2008-07-21 10:23 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-rt-users, Josh Triplett, Steven Rostedt, linuxppc-dev,
	Nivedita Singhvi, Timothy R. Chavez, Thomas Gleixner, paulmck,
	linux.kernel

Hi Benjamin

* Benjamin Herrenschmidt <benh@kernel.crashing.org> [2008-07-19 08:05:30]:

> 
> > With the original patch, the pending batch does get flushed 
> > in a non-preemptable region. 
> > I am resending the original with just adding the necesary comments.
> 
> Your comment isn't what I meant. What I meant is that if the process
> is context switched while walking the page tables, the low level powerpc
> context switch code should also perform a ???__flush_tlb_pending.
Sorry, I misunderstood.
The powerpc context switch code does perform a __flush_tlb_pending.
Here is the patch 
http://marc.info/?l=linux-kernel&m=119752629222720&w=2
> BTW. Is the pte_lock also not a real spinlock anymore ? That may break
> other assumptions the powerpc mm code is doing.
pte_lock is a not a real spinlock anymore.

> This -rt stuff is just too scary, it changes some fundamental semantics
> of the spinlocks. yuck.
> 
> Ben.
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-07-21 10:26 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-09 16:05 [PATCH][RT][PPC64] Fix preempt unsafe paths accessing per_cpu variables Chirag Jog
2008-07-11  8:19 ` Sebastien Dugue
2008-07-15  1:32 ` Benjamin Herrenschmidt
2008-07-17 12:56   ` Chirag Jog
2008-07-17 20:14     ` Benjamin Herrenschmidt
2008-07-18 10:11       ` Chirag Jog
2008-07-18 22:05         ` Benjamin Herrenschmidt
2008-07-19  1:26           ` Steven Rostedt
2008-07-19  3:53             ` Benjamin Herrenschmidt
2008-07-21 10:23           ` Chirag Jog
  -- strict thread matches above, loose matches on Subject: below --
2008-07-09 15:33 Chirag Jog

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).