LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH powerpc 2/2] kfree the cache name  of pgtable cache if SLUB is used
From: Christoph Lameter @ 2012-07-03 20:36 UTC (permalink / raw)
  To: Li Zhong
  Cc: LKML, Pekka Enberg, linux-mm, Paul Mackerras, Matt Mackall,
	PowerPC email list
In-Reply-To: <alpine.DEB.2.00.1207031344240.14703@router.home>

Looking through the emails it seems that there is an issue with alias
strings. That can be solved by duping the name of the slab earlier in kmem_cache_create().
Does this patch fix the issue?

Subject: slub: Dup name earlier in kmem_cache_create

Dup the name earlier in kmem_cache_create so that alias
processing is done using the copy of the string and not
the string itself.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 mm/slub.c |   29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2012-06-11 08:49:56.000000000 -0500
+++ linux-2.6/mm/slub.c	2012-07-03 15:17:37.000000000 -0500
@@ -3933,8 +3933,12 @@ struct kmem_cache *kmem_cache_create(con
 	if (WARN_ON(!name))
 		return NULL;

+	n = kstrdup(name, GFP_KERNEL);
+	if (!n)
+		goto out;
+
 	down_write(&slub_lock);
-	s = find_mergeable(size, align, flags, name, ctor);
+	s = find_mergeable(size, align, flags, n, ctor);
 	if (s) {
 		s->refcount++;
 		/*
@@ -3944,7 +3948,7 @@ struct kmem_cache *kmem_cache_create(con
 		s->objsize = max(s->objsize, (int)size);
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));

-		if (sysfs_slab_alias(s, name)) {
+		if (sysfs_slab_alias(s, n)) {
 			s->refcount--;
 			goto err;
 		}
@@ -3952,31 +3956,26 @@ struct kmem_cache *kmem_cache_create(con
 		return s;
 	}

-	n = kstrdup(name, GFP_KERNEL);
-	if (!n)
-		goto err;
-
 	s = kmalloc(kmem_size, GFP_KERNEL);
 	if (s) {
 		if (kmem_cache_open(s, n,
 				size, align, flags, ctor)) {
 			list_add(&s->list, &slab_caches);
 			up_write(&slub_lock);
-			if (sysfs_slab_add(s)) {
-				down_write(&slub_lock);
-				list_del(&s->list);
-				kfree(n);
-				kfree(s);
-				goto err;
-			}
-			return s;
+			if (!sysfs_slab_add(s))
+				return s;
+
+			down_write(&slub_lock);
+			list_del(&s->list);
 		}
 		kfree(s);
 	}
-	kfree(n);
+
 err:
+	kfree(n);
 	up_write(&slub_lock);

+out:
 	if (flags & SLAB_PANIC)
 		panic("Cannot create slabcache %s\n", name);
 	else

^ permalink raw reply

* Re: Build regressions/improvements in v3.5-rc5
From: Geert Uytterhoeven @ 2012-07-03 19:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: Linuxppc-dev, Haavard Skinnemoen, Hans-Christian Egtvedt,
	Linux-sh list
In-Reply-To: <1341342768-22221-1-git-send-email-geert@linux-m68k.org>

On Tue, Jul 3, 2012 at 9:12 PM, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> JFYI, when comparing v3.5-rc5 to v3.5-rc4[3], the summaries are:
>   - build errors: +20/-11

20 regressions:
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error:
'FSL_DIU_PORT_DVI' undeclared (first use in this function):  => 189:9
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error: 'enum
fsl_diu_monitor_port' declared inside parameter list [-Werror]:  =>
70:9, 84:9, 88:36
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error: 'return' with
a value, in function returning void [-Werror]:  => 189:2
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error: function
declaration isn't a prototype [-Werror=strict-prototypes]:  => 69:5,
88:6, 83:6, 187:1
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error: its scope is
only this definition or declaration, which is probably not what you
want [-Werror]:  => 70:9
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error: parameter 1
('port') has incomplete type:  => 187:54, 83:56, 88:57, 69:56
  + arch/powerpc/platforms/512x/mpc512x_shared.c: error: return type
is an incomplete type:  => 187:1
  + drivers/net/ethernet/3com/3c515.c: error: implicit declaration of
function 'isa_bus_to_virt' [-Werror=implicit-function-declaration]:
=> 1374:12
  + drivers/net/ethernet/3com/3c515.c: error: implicit declaration of
function 'isa_virt_to_bus' [-Werror=implicit-function-declaration]:
=> 824:9
  + drivers/net/ethernet/amd/ni65.c: error: implicit declaration of
function 'isa_bus_to_virt' [-Werror=implicit-function-declaration]:
=> 756:4
  + drivers/net/ethernet/amd/ni65.c: error: implicit declaration of
function 'isa_virt_to_bus' [-Werror=implicit-function-declaration]:
=> 586:2

powerpc-randconfig

  + error: idt77252.c: relocation truncated to fit: R_PPC64_REL24
against symbol `.__raw_spin_lock_init' defined in .text section in
lib/built-in.o:  => (.text+0x1ff8c5c)
  + error: idt77252.c: relocation truncated to fit: R_PPC64_REL24
against symbol `.del_timer' defined in .text section in
kernel/built-in.o:  => (.text+0x1ff806c)
  + error: idt77252.c: relocation truncated to fit: R_PPC64_REL24
against symbol `.mutex_unlock' defined in .sched.text section in
kernel/built-in.o:  => (.text+0x1ff8eec), (.text+0x1ff8c14),
(.text+0x1ff8e80), (.text+0x1ff8e08), (.text+0x1ff8f30)
  + error: solos-pci.c: relocation truncated to fit: R_PPC64_REL24
against symbol `._raw_spin_lock_irqsave' defined in .spinlock.text
section in kernel/built-in.o:  => (.text+0x1ff954c)
  + error: solos-pci.c: relocation truncated to fit: R_PPC64_REL24
against symbol `._raw_spin_unlock' defined in .spinlock.text section
in kernel/built-in.o:  => (.text+0x1ff970c)
  + error: solos-pci.c: relocation truncated to fit: R_PPC64_REL24
against symbol `.iowrite32' defined in .text section in
lib/built-in.o:  => (.text+0x1ff98e4)

powerpc-allyesconfig

  + error: vsprintf.c: relocation truncated to fit: R_AVR32_9UW_PCREL
against `.text'+b28:  => (.text+0x92a)

avr32-allnoconfig

  + include/linux/sh_pfc.h: error: field 'chip' has incomplete type:  => 121:19

se7721_defconfig

  + kernel/sys.c: error: 'mmap_min_addr' undeclared (first use in this
function): 1864:34 => 1864:37, 1864:34

sh-randconfig, sh-allyesconfig, sh-allmodconfig

> [1] http://kisskb.ellerman.id.au/kisskb/head/5202/ (all 116 configs)

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH powerpc 2/2] kfree the cache name  of pgtable cache if SLUB is used
From: Christoph Lameter @ 2012-07-03 18:48 UTC (permalink / raw)
  To: Li Zhong
  Cc: LKML, Pekka Enberg, linux-mm, Paul Mackerras, Matt Mackall,
	PowerPC email list
In-Reply-To: <1340618099.13778.39.camel@ThinkPad-T420>

On Mon, 25 Jun 2012, Li Zhong wrote:

> This patch tries to kfree the cache name of pgtables cache if SLUB is
> used, as SLUB duplicates the cache name, and the original one is leaked.

SLAB also does not free the name. Why would you have an #ifdef in there?

^ permalink raw reply

* [PATCH][v3] PPC: use CURRENT_THREAD_INFO instead of open coded assembly
From: Stuart Yoder @ 2012-07-03 15:11 UTC (permalink / raw)
  To: agraf, benh; +Cc: linuxppc-dev

From: Stuart Yoder <stuart.yoder@freescale.com>

Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
---

-v3
   -moved CURRENT_THREAD_INFO out from under the assembly only
    #define as per comments on mailing list
   -reversed logic of 64-bit #ifdef for CURRENT_THREAD_INFO 
    macro

-this patch is a pre-requisite for the idle hcall which I
am trying to get into Alex's KVM tree, so ideally would like
Ben's ack and get this applied to Alex's tree

-built/tested with a 32-bit booke kernel, built a 64-bit
 booke kernel


 arch/powerpc/include/asm/exception-64s.h |    4 ++--
 arch/powerpc/include/asm/thread_info.h   |    6 ++++++
 arch/powerpc/kernel/entry_32.S           |   24 ++++++++++++------------
 arch/powerpc/kernel/entry_64.S           |   14 +++++++-------
 arch/powerpc/kernel/exceptions-64e.S     |    2 +-
 arch/powerpc/kernel/exceptions-64s.S     |    2 +-
 arch/powerpc/kernel/head_fsl_booke.S     |    2 +-
 arch/powerpc/kernel/idle_6xx.S           |    4 ++--
 arch/powerpc/kernel/idle_book3e.S        |    2 +-
 arch/powerpc/kernel/idle_e500.S          |    4 ++--
 arch/powerpc/kernel/idle_power4.S        |    2 +-
 arch/powerpc/kernel/misc_32.S            |    4 ++--
 arch/powerpc/kvm/bookehv_interrupts.S    |    6 +-----
 arch/powerpc/mm/hash_low_32.S            |    8 ++++----
 arch/powerpc/sysdev/6xx-suspend.S        |    2 +-
 15 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index d58fc4e..5dbd00d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -293,7 +293,7 @@ label##_hv:								\
 
 #define RUNLATCH_ON				\
 BEGIN_FTR_SECTION				\
-	clrrdi	r3,r1,THREAD_SHIFT;		\
+	CURRENT_THREAD_INFO(r3, r1)		\
 	ld	r4,TI_LOCAL_FLAGS(r3);		\
 	andi.	r0,r4,_TLF_RUNLATCH;		\
 	beql	ppc64_runlatch_on_trampoline;	\
@@ -332,7 +332,7 @@ label##_common:							\
 #ifdef CONFIG_PPC_970_NAP
 #define FINISH_NAP				\
 BEGIN_FTR_SECTION				\
-	clrrdi	r11,r1,THREAD_SHIFT;		\
+	CURRENT_THREAD_INFO(r11, r1)		\
 	ld	r9,TI_LOCAL_FLAGS(r11);		\
 	andi.	r10,r9,_TLF_NAPPING;		\
 	bnel	power4_fixup_nap;		\
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 68831e9..faf9352 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -22,6 +22,12 @@
 
 #define THREAD_SIZE		(1 << THREAD_SHIFT)
 
+#ifdef CONFIG_PPC64
+#define CURRENT_THREAD_INFO(dest, sp)	clrrdi dest, sp, THREAD_SHIFT
+#else
+#define CURRENT_THREAD_INFO(dest, sp)	rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT
+#endif
+
 #ifndef __ASSEMBLY__
 #include <linux/cache.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ba3aeb4..bad42e3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -92,7 +92,7 @@ crit_transfer_to_handler:
 	mfspr	r8,SPRN_SPRG_THREAD
 	lwz	r0,KSP_LIMIT(r8)
 	stw	r0,SAVED_KSP_LIMIT(r11)
-	rlwimi	r0,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r0, r1)
 	stw	r0,KSP_LIMIT(r8)
 	/* fall through */
 #endif
@@ -112,7 +112,7 @@ crit_transfer_to_handler:
 	mfspr	r8,SPRN_SPRG_THREAD
 	lwz	r0,KSP_LIMIT(r8)
 	stw	r0,saved_ksp_limit@l(0)
-	rlwimi	r0,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r0, r1)
 	stw	r0,KSP_LIMIT(r8)
 	/* fall through */
 #endif
@@ -158,7 +158,7 @@ transfer_to_handler:
 	tophys(r11,r11)
 	addi	r11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
-	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r9,TI_CPU(r9)
 	slwi	r9,r9,3
 	add	r11,r11,r9
@@ -179,7 +179,7 @@ transfer_to_handler:
 	ble-	stack_ovf		/* then the kernel stack overflowed */
 5:
 #if defined(CONFIG_6xx) || defined(CONFIG_E500)
-	rlwinm	r9,r1,0,0,31-THREAD_SHIFT
+	CURRENT_THREAD_INFO(r9, r1)
 	tophys(r9,r9)			/* check local flags */
 	lwz	r12,TI_LOCAL_FLAGS(r9)
 	mtcrf	0x01,r12
@@ -333,7 +333,7 @@ _GLOBAL(DoSyscall)
 	mtmsr	r11
 1:
 #endif /* CONFIG_TRACE_IRQFLAGS */
-	rlwinm	r10,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	CURRENT_THREAD_INFO(r10, r1)
 	lwz	r11,TI_FLAGS(r10)
 	andi.	r11,r11,_TIF_SYSCALL_T_OR_A
 	bne-	syscall_dotrace
@@ -354,7 +354,7 @@ ret_from_syscall:
 	bl	do_show_syscall_exit
 #endif
 	mr	r6,r3
-	rlwinm	r12,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	CURRENT_THREAD_INFO(r12, r1)
 	/* disable interrupts so current_thread_info()->flags can't change */
 	LOAD_MSR_KERNEL(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
 	/* Note: We don't bother telling lockdep about it */
@@ -815,7 +815,7 @@ ret_from_except:
 
 user_exc_return:		/* r10 contains MSR_KERNEL here */
 	/* Check current_thread_info()->flags */
-	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r9,TI_FLAGS(r9)
 	andi.	r0,r9,_TIF_USER_WORK_MASK
 	bne	do_work
@@ -835,7 +835,7 @@ restore_user:
 /* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
 	/* check current_thread_info->preempt_count */
-	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r0,TI_PREEMPT(r9)
 	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
 	bne	restore
@@ -852,7 +852,7 @@ resume_kernel:
 	bl	trace_hardirqs_off
 #endif
 1:	bl	preempt_schedule_irq
-	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r3,TI_FLAGS(r9)
 	andi.	r0,r3,_TIF_NEED_RESCHED
 	bne-	1b
@@ -1122,7 +1122,7 @@ ret_from_debug_exc:
 	lwz	r10,SAVED_KSP_LIMIT(r1)
 	stw	r10,KSP_LIMIT(r9)
 	lwz	r9,THREAD_INFO-THREAD(r9)
-	rlwinm	r10,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r10, r1)
 	lwz	r10,TI_PREEMPT(r10)
 	stw	r10,TI_PREEMPT(r9)
 	RESTORE_xSRR(SRR0,SRR1);
@@ -1156,7 +1156,7 @@ load_dbcr0:
 	lis	r11,global_dbcr0@ha
 	addi	r11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
-	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r9,TI_CPU(r9)
 	slwi	r9,r9,3
 	add	r11,r11,r9
@@ -1197,7 +1197,7 @@ recheck:
 	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
 	SYNC
 	MTMSRD(r10)		/* disable interrupts */
-	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r9,TI_FLAGS(r9)
 	andi.	r0,r9,_TIF_NEED_RESCHED
 	bne-	do_resched
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ed1718f..ba943b9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -146,7 +146,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 	REST_2GPRS(7,r1)
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 #endif
-	clrrdi	r11,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r11, r1)
 	ld	r10,TI_FLAGS(r11)
 	andi.	r11,r10,_TIF_SYSCALL_T_OR_A
 	bne-	syscall_dotrace
@@ -181,7 +181,7 @@ syscall_exit:
 	bl	.do_show_syscall_exit
 	ld	r3,RESULT(r1)
 #endif
-	clrrdi	r12,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r12, r1)
 
 	ld	r8,_MSR(r1)
 #ifdef CONFIG_PPC_BOOK3S
@@ -262,7 +262,7 @@ syscall_dotrace:
 	ld	r7,GPR7(r1)
 	ld	r8,GPR8(r1)
 	addi	r9,r1,STACK_FRAME_OVERHEAD
-	clrrdi	r10,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r10, r1)
 	ld	r10,TI_FLAGS(r10)
 	b	.Lsyscall_dotrace_cont
 
@@ -499,7 +499,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 2:
 #endif /* !CONFIG_PPC_BOOK3S */
 
-	clrrdi	r7,r8,THREAD_SHIFT	/* base of new stack */
+	CURRENT_THREAD_INFO(r7, r8)  /* base of new stack */
 	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
 	   because we don't need to leave the 288-byte ABI gap at the
 	   top of the kernel stack. */
@@ -559,7 +559,7 @@ _GLOBAL(ret_from_except_lite)
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PREEMPT
-	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
+	CURRENT_THREAD_INFO(r9, r1)
 	li	r0,_TIF_NEED_RESCHED	/* bits to check */
 	ld	r3,_MSR(r1)
 	ld	r4,TI_FLAGS(r9)
@@ -574,7 +574,7 @@ _GLOBAL(ret_from_except_lite)
 	beq	restore		/* if not, just restore regs and return */
 
 	/* Check current_thread_info()->flags */
-	clrrdi	r9,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r9, r1)
 	ld	r4,TI_FLAGS(r9)
 	andi.	r0,r4,_TIF_USER_WORK_MASK
 	bne	do_work
@@ -782,7 +782,7 @@ do_work:
 1:	bl	.preempt_schedule_irq
 
 	/* Re-test flags and eventually loop */
-	clrrdi	r9,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r9, r1)
 	ld	r4,TI_FLAGS(r9)
 	andi.	r0,r4,_TIF_NEED_RESCHED
 	bne	1b
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7215cc2..3b60028 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -222,7 +222,7 @@ exc_##n##_bad_stack:							    \
  * interrupts happen before the wait instruction.
  */
 #define CHECK_NAPPING()							\
-	clrrdi	r11,r1,THREAD_SHIFT;					\
+	CURRENT_THREAD_INFO(r11, r1)
 	ld	r10,TI_LOCAL_FLAGS(r11);				\
 	andi.	r9,r10,_TLF_NAPPING;					\
 	beq+	1f;							\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c06d29..8ad3468 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -851,7 +851,7 @@ BEGIN_FTR_SECTION
 	bne-	do_ste_alloc		/* If so handle it */
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 
-	clrrdi	r11,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r11, r1)
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
 	bne	77f			/* then don't call hash_page now */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 1f4434a..7e7bd88 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -192,7 +192,7 @@ _ENTRY(__early_start)
 	li	r0,0
 	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
 
-	rlwinm  r22,r1,0,0,31-THREAD_SHIFT      /* current thread_info */
+	CURRENT_THREAD_INFO(r22, r1)
 	stw	r24, TI_CPU(r22)
 
 	bl	early_init
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 15c611d..1686916 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -135,7 +135,7 @@ BEGIN_FTR_SECTION
 	DSSALL
 	sync
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	rlwinm	r9,r1,0,0,31-THREAD_SHIFT	/* current thread_info */
+	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r8,TI_LOCAL_FLAGS(r9)	/* set napping bit */
 	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */
 	stw	r8,TI_LOCAL_FLAGS(r9)	/* it will return to our caller */
@@ -158,7 +158,7 @@ _GLOBAL(power_save_ppc32_restore)
 	stw	r9,_NIP(r11)		/* make it do a blr */
 
 #ifdef CONFIG_SMP
-	rlwinm	r12,r11,0,0,31-THREAD_SHIFT
+	CURRENT_THREAD_INFO(r12, r11)
 	lwz	r11,TI_CPU(r12)		/* get cpu number * 4 */
 	slwi	r11,r11,2
 #else
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index ff007b5..4c7cb400 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -60,7 +60,7 @@ _GLOBAL(book3e_idle)
 1:	/* Let's set the _TLF_NAPPING flag so interrupts make us return
 	 * to the right spot
 	*/
-	clrrdi	r11,r1,THREAD_SHIFT
+	CURRENT_THREAD_INFO(r11, r1)
 	ld	r10,TI_LOCAL_FLAGS(r11)
 	ori	r10,r10,_TLF_NAPPING
 	std	r10,TI_LOCAL_FLAGS(r11)
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 4f0ab85..1544866 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -21,7 +21,7 @@
 	.text
 
 _GLOBAL(e500_idle)
-	rlwinm	r3,r1,0,0,31-THREAD_SHIFT	/* current thread_info */
+	CURRENT_THREAD_INFO(r3, r1)
 	lwz	r4,TI_LOCAL_FLAGS(r3)	/* set napping bit */
 	ori	r4,r4,_TLF_NAPPING	/* so when we take an exception */
 	stw	r4,TI_LOCAL_FLAGS(r3)	/* it will return to our caller */
@@ -96,7 +96,7 @@ _GLOBAL(power_save_ppc32_restore)
 	stw	r9,_NIP(r11)		/* make it do a blr */
 
 #ifdef CONFIG_SMP
-	rlwinm	r12,r1,0,0,31-THREAD_SHIFT
+	CURRENT_THREAD_INFO(r12, r1)
 	lwz	r11,TI_CPU(r12)		/* get cpu number * 4 */
 	slwi	r11,r11,2
 #else
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index 2c71b0f..e3edaa1 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -59,7 +59,7 @@ BEGIN_FTR_SECTION
 	DSSALL
 	sync
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	clrrdi	r9,r1,THREAD_SHIFT	/* current thread_info */
+	CURRENT_THREAD_INFO(r9, r1)
 	ld	r8,TI_LOCAL_FLAGS(r9)	/* set napping bit */
 	ori	r8,r8,_TLF_NAPPING	/* so when we take an exception */
 	std	r8,TI_LOCAL_FLAGS(r9)	/* it will return to our caller */
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 386d57f..407e293 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -179,7 +179,7 @@ _GLOBAL(low_choose_750fx_pll)
 	mtspr	SPRN_HID1,r4
 
 	/* Store new HID1 image */
-	rlwinm	r6,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r6, r1)
 	lwz	r6,TI_CPU(r6)
 	slwi	r6,r6,2
 	addis	r6,r6,nap_save_hid1@ha
@@ -699,7 +699,7 @@ _GLOBAL(kernel_thread)
 #ifdef CONFIG_SMP
 _GLOBAL(start_secondary_resume)
 	/* Reset stack */
-	rlwinm	r1,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	CURRENT_THREAD_INFO(r1, r1)
 	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
 	li	r3,0
 	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 0fa2ef7..c8c7a04 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -161,11 +161,7 @@
 	mtspr	SPRN_EPLC, r8
 
 	/* disable preemption, so we are sure we hit the fixup handler */
-#ifdef CONFIG_PPC64
-	clrrdi	r8,r1,THREAD_SHIFT
-#else
-	rlwinm	r8,r1,0,0,31-THREAD_SHIFT       /* current thread_info */
-#endif
+	CURRENT_THREAD_INFO(r8, r1)
 	li	r7, 1
 	stw	r7, TI_PREEMPT(r8)
 
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index b13d589..115347f 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -184,7 +184,7 @@ _GLOBAL(add_hash_page)
 	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
 
 #ifdef CONFIG_SMP
-	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT) /* use cpu number to make tag */
+	CURRENT_THREAD_INFO(r8, r1)	/* use cpu number to make tag */
 	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
 	oris	r8,r8,12
 #endif /* CONFIG_SMP */
@@ -545,7 +545,7 @@ _GLOBAL(flush_hash_pages)
 #ifdef CONFIG_SMP
 	addis	r9,r7,mmu_hash_lock@ha
 	addi	r9,r9,mmu_hash_lock@l
-	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r8, r1)
 	add	r8,r8,r7
 	lwz	r8,TI_CPU(r8)
 	oris	r8,r8,9
@@ -639,7 +639,7 @@ _GLOBAL(flush_hash_patch_B)
  */
 _GLOBAL(_tlbie)
 #ifdef CONFIG_SMP
-	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r8, r1)
 	lwz	r8,TI_CPU(r8)
 	oris	r8,r8,11
 	mfmsr	r10
@@ -677,7 +677,7 @@ _GLOBAL(_tlbie)
  */
 _GLOBAL(_tlbia)
 #if defined(CONFIG_SMP)
-	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT)
+	CURRENT_THREAD_INFO(r8, r1)
 	lwz	r8,TI_CPU(r8)
 	oris	r8,r8,10
 	mfmsr	r10
diff --git a/arch/powerpc/sysdev/6xx-suspend.S b/arch/powerpc/sysdev/6xx-suspend.S
index 21cda08..cf48e9c 100644
--- a/arch/powerpc/sysdev/6xx-suspend.S
+++ b/arch/powerpc/sysdev/6xx-suspend.S
@@ -29,7 +29,7 @@ _GLOBAL(mpc6xx_enter_standby)
 	ori	r5, r5, ret_from_standby@l
 	mtlr	r5
 
-	rlwinm	r5, r1, 0, 0, 31-THREAD_SHIFT
+	CURRENT_THREAD_INFO(r5, r1)
 	lwz	r6, TI_LOCAL_FLAGS(r5)
 	ori	r6, r6, _TLF_SLEEPING
 	stw	r6, TI_LOCAL_FLAGS(r5)
-- 
1.7.3.4

^ permalink raw reply related

* RE: [PATCH][v2] PPC: use CURRENT_THREAD_INFO instead of open coded assembly
From: Yoder Stuart-B08248 @ 2012-07-03 15:07 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <EB9E2FD4-8867-4E9D-93F9-B8EE3284F37A@suse.de>



> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Monday, July 02, 2012 4:16 PM
> To: Yoder Stuart-B08248
> Cc: benh@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH][v2] PPC: use CURRENT_THREAD_INFO instead of open cod=
ed assembly
>=20
>=20
> On 02.07.2012, at 23:14, Stuart Yoder wrote:
>=20
> > From: Stuart Yoder <stuart.yoder@freescale.com>
> >
> > Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
> > ---
> >
> > -this patch is a pre-requisite for the idle hcall which I
> > am trying to get into Alex's KVM tree, so ideally would like
> > Ben's ack and get this applied to Alex's tree
> >
> > -built/tested with a 32-bit booke kernel, built a 64-bit
> > booke kernel
> >
> > -v2
> >   -moved CURRENT_THREAD_INFO under assembly only
> >    #ifdef
> >
> > arch/powerpc/include/asm/exception-64s.h |    4 ++--
> > arch/powerpc/include/asm/thread_info.h   |    8 ++++++++
> > arch/powerpc/kernel/entry_32.S           |   24 ++++++++++++-----------=
-
> > arch/powerpc/kernel/entry_64.S           |   14 +++++++-------
> > arch/powerpc/kernel/exceptions-64e.S     |    2 +-
> > arch/powerpc/kernel/exceptions-64s.S     |    2 +-
> > arch/powerpc/kernel/head_fsl_booke.S     |    2 +-
> > arch/powerpc/kernel/idle_6xx.S           |    4 ++--
> > arch/powerpc/kernel/idle_book3e.S        |    2 +-
> > arch/powerpc/kernel/idle_e500.S          |    4 ++--
> > arch/powerpc/kernel/idle_power4.S        |    2 +-
> > arch/powerpc/kernel/misc_32.S            |    4 ++--
> > arch/powerpc/kvm/bookehv_interrupts.S    |    6 +-----
> > arch/powerpc/mm/hash_low_32.S            |    8 ++++----
> > arch/powerpc/sysdev/6xx-suspend.S        |    2 +-
> > 15 files changed, 46 insertions(+), 42 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/in=
clude/asm/exception-64s.h
> > index d58fc4e..5dbd00d 100644
> > --- a/arch/powerpc/include/asm/exception-64s.h
> > +++ b/arch/powerpc/include/asm/exception-64s.h
> > @@ -293,7 +293,7 @@ label##_hv:								\
> >
> > #define RUNLATCH_ON				\
> > BEGIN_FTR_SECTION				\
> > -	clrrdi	r3,r1,THREAD_SHIFT;		\
> > +	CURRENT_THREAD_INFO(r3, r1)		\
> > 	ld	r4,TI_LOCAL_FLAGS(r3);		\
> > 	andi.	r0,r4,_TLF_RUNLATCH;		\
> > 	beql	ppc64_runlatch_on_trampoline;	\
> > @@ -332,7 +332,7 @@ label##_common:							\
> > #ifdef CONFIG_PPC_970_NAP
> > #define FINISH_NAP				\
> > BEGIN_FTR_SECTION				\
> > -	clrrdi	r11,r1,THREAD_SHIFT;		\
> > +	CURRENT_THREAD_INFO(r11, r1)		\
> > 	ld	r9,TI_LOCAL_FLAGS(r11);		\
> > 	andi.	r10,r9,_TLF_NAPPING;		\
> > 	bnel	power4_fixup_nap;		\
> > diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/incl=
ude/asm/thread_info.h
> > index 68831e9..3760620 100644
> > --- a/arch/powerpc/include/asm/thread_info.h
> > +++ b/arch/powerpc/include/asm/thread_info.h
> > @@ -74,6 +74,14 @@ static inline struct thread_info *current_thread_inf=
o(void)
> > 	return (struct thread_info *)(sp & ~(THREAD_SIZE-1));
> > }
> >
> > +#else
> > +
> > +#ifndef CONFIG_PPC64
>=20
> Oh no! The logic is still backwards :(

Doh...somehow I missed that comment in your previous reply.  Will fix=20
that.

Stuart

^ permalink raw reply

* Re: [PATCH v7 1/5] powerpc/85xx: implement hardware timebase sync
From: Tabi Timur-B04825 @ 2012-07-03 12:46 UTC (permalink / raw)
  To: Zhao Chenhui-B35336
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1341310879-5468-1-git-send-email-chenhui.zhao@freescale.com>

On Tue, Jul 3, 2012 at 5:21 AM, Zhao Chenhui <chenhui.zhao@freescale.com> w=
rote:

> +       np =3D of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
> +       if (np) {
> +               guts =3D of_iomap(np, 0);
> +               of_node_put(np);
> +               if (!guts) {
> +                       pr_err("%s: Could not map guts node address\n",
> +                                                               __func__)=
;
> +                       return;
> +               }
> +               smp_85xx_ops.give_timebase =3D mpc85xx_give_timebase;
> +               smp_85xx_ops.take_timebase =3D mpc85xx_take_timebase;
> +       }

I had this in mind:

               guts =3D of_iomap(np, 0);
               of_node_put(np);
               if (guts) {
                       smp_85xx_ops.give_timebase =3D mpc85xx_give_timebase=
;
                       smp_85xx_ops.take_timebase =3D mpc85xx_take_timebase=
;
                } else {
                       pr_err("%s: Could not map guts node address\n",
                                                               __func__);
               }

That way, a missing GUTS node does not break everything.

--=20
Timur Tabi
Linux kernel developer at Freescale=

^ permalink raw reply

* [PATCH v7 2/5] powerpc/85xx: add HOTPLUG_CPU support
From: Zhao Chenhui @ 2012-07-03 10:21 UTC (permalink / raw)
  To: linuxppc-dev, scottwood, galak; +Cc: linux-kernel
In-Reply-To: <1341310879-5468-1-git-send-email-chenhui.zhao@freescale.com>

From: Li Yang <leoli@freescale.com>

Add support to disable and re-enable individual cores at runtime
on MPC85xx/QorIQ SMP machines. Currently support e500v1/e500v2 core.

MPC85xx machines use ePAPR spin-table in boot page for CPU kick-off.
This patch uses the boot page from bootloader to boot core at runtime.
It supports 32-bit and 36-bit physical address.

Add generic_set_cpu_up() to set cpu_state as CPU_UP_PREPARE in kick_cpu().

Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Jin Qing <b24347@freescale.com>
Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
---
v7:
 * removed CONFIG_85xx_TB_SYNC
no change to the rest of the patch set

 arch/powerpc/Kconfig                  |    6 +-
 arch/powerpc/include/asm/cacheflush.h |    2 +
 arch/powerpc/include/asm/smp.h        |    2 +
 arch/powerpc/kernel/head_fsl_booke.S  |   28 +++++++
 arch/powerpc/kernel/smp.c             |   10 +++
 arch/powerpc/platforms/85xx/smp.c     |  137 ++++++++++++++++++++++++---------
 6 files changed, 146 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 38786c8..d6bacbe 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -220,7 +220,8 @@ config ARCH_HIBERNATION_POSSIBLE
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 	depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
-		   (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x
+		   (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
+		   || 44x || 40x
 
 config PPC_DCR_NATIVE
 	bool
@@ -331,7 +332,8 @@ config SWIOTLB
 
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
-	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_POWERNV)
+	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || \
+	PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC))
 	---help---
 	  Say Y here to be able to disable and re-enable individual
 	  CPUs at runtime on SMP machines.
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index ab9e402..b843e35 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -30,6 +30,8 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
+extern void __flush_disable_L1(void);
+
 extern void __flush_icache_range(unsigned long, unsigned long);
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
 {
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ebc24dc..e807e9d 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -65,6 +65,7 @@ int generic_cpu_disable(void);
 void generic_cpu_die(unsigned int cpu);
 void generic_mach_cpu_die(void);
 void generic_set_cpu_dead(unsigned int cpu);
+void generic_set_cpu_up(unsigned int cpu);
 int generic_check_cpu_restart(unsigned int cpu);
 #endif
 
@@ -190,6 +191,7 @@ extern unsigned long __secondary_hold_spinloop;
 extern unsigned long __secondary_hold_acknowledge;
 extern char __secondary_hold;
 
+extern void __early_start(void);
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index de80e0f..be0261b 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -996,6 +996,34 @@ _GLOBAL(flush_dcache_L1)
 
 	blr
 
+/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */
+_GLOBAL(__flush_disable_L1)
+	mflr	r10
+	bl	flush_dcache_L1	/* Flush L1 d-cache */
+	mtlr	r10
+
+	mfspr	r4, SPRN_L1CSR0	/* Invalidate and disable d-cache */
+	li	r5, 2
+	rlwimi	r4, r5, 0, 3
+
+	msync
+	isync
+	mtspr	SPRN_L1CSR0, r4
+	isync
+
+1:	mfspr	r4, SPRN_L1CSR0	/* Wait for the invalidate to finish */
+	andi.	r4, r4, 2
+	bne	1b
+
+	mfspr	r4, SPRN_L1CSR1	/* Invalidate and disable i-cache */
+	li	r5, 2
+	rlwimi	r4, r5, 0, 3
+
+	mtspr	SPRN_L1CSR1, r4
+	isync
+
+	blr
+
 #ifdef CONFIG_SMP
 /* When we get here, r24 needs to hold the CPU # */
 	.globl __secondary_start
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d9f9441..e0ffe03 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -423,6 +423,16 @@ void generic_set_cpu_dead(unsigned int cpu)
 	per_cpu(cpu_state, cpu) = CPU_DEAD;
 }
 
+/*
+ * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
+ * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
+ * which makes the delay in generic_cpu_die() not happen.
+ */
+void generic_set_cpu_up(unsigned int cpu)
+{
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+}
+
 int generic_check_cpu_restart(unsigned int cpu)
 {
 	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 2e65fe8..925e678 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -2,7 +2,7 @@
  * Author: Andy Fleming <afleming@freescale.com>
  * 	   Kumar Gala <galak@kernel.crashing.org>
  *
- * Copyright 2006-2008, 2011 Freescale Semiconductor Inc.
+ * Copyright 2006-2008, 2011-2012 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <linux/kexec.h>
 #include <linux/highmem.h>
+#include <linux/cpu.h>
 
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
@@ -30,18 +31,14 @@
 #include <sysdev/mpic.h>
 #include "smp.h"
 
-extern void __early_start(void);
-
-#define BOOT_ENTRY_ADDR_UPPER	0
-#define BOOT_ENTRY_ADDR_LOWER	1
-#define BOOT_ENTRY_R3_UPPER	2
-#define BOOT_ENTRY_R3_LOWER	3
-#define BOOT_ENTRY_RESV		4
-#define BOOT_ENTRY_PIR		5
-#define BOOT_ENTRY_R6_UPPER	6
-#define BOOT_ENTRY_R6_LOWER	7
-#define NUM_BOOT_ENTRY		8
-#define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
+struct epapr_spin_table {
+	u32	addr_h;
+	u32	addr_l;
+	u32	r3_h;
+	u32	r3_l;
+	u32	reserved;
+	u32	pir;
+};
 
 static struct ccsr_guts __iomem *guts;
 static u64 timebase;
@@ -101,15 +98,45 @@ static void mpc85xx_take_timebase(void)
 	local_irq_restore(flags);
 }
 
-static int __init
-smp_85xx_kick_cpu(int nr)
+#ifdef CONFIG_HOTPLUG_CPU
+static void __cpuinit smp_85xx_mach_cpu_die(void)
+{
+	unsigned int cpu = smp_processor_id();
+	u32 tmp;
+
+	local_irq_disable();
+	idle_task_exit();
+	generic_set_cpu_dead(cpu);
+	mb();
+
+	mtspr(SPRN_TCR, 0);
+
+	__flush_disable_L1();
+	tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP;
+	mtspr(SPRN_HID0, tmp);
+	isync();
+
+	/* Enter NAP mode. */
+	tmp = mfmsr();
+	tmp |= MSR_WE;
+	mb();
+	mtmsr(tmp);
+	isync();
+
+	while (1)
+		;
+}
+#endif
+
+static int __cpuinit smp_85xx_kick_cpu(int nr)
 {
 	unsigned long flags;
 	const u64 *cpu_rel_addr;
-	__iomem u32 *bptr_vaddr;
+	__iomem struct epapr_spin_table *spin_table;
 	struct device_node *np;
-	int n = 0, hw_cpu = get_hard_smp_processor_id(nr);
+	int hw_cpu = get_hard_smp_processor_id(nr);
 	int ioremappable;
+	int ret = 0;
 
 	WARN_ON(nr < 0 || nr >= NR_CPUS);
 	WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS);
@@ -134,46 +161,80 @@ smp_85xx_kick_cpu(int nr)
 
 	/* Map the spin table */
 	if (ioremappable)
-		bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY);
+		spin_table = ioremap(*cpu_rel_addr,
+				sizeof(struct epapr_spin_table));
 	else
-		bptr_vaddr = phys_to_virt(*cpu_rel_addr);
+		spin_table = phys_to_virt(*cpu_rel_addr);
 
 	local_irq_save(flags);
-
-	out_be32(bptr_vaddr + BOOT_ENTRY_PIR, hw_cpu);
 #ifdef CONFIG_PPC32
-	out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start));
+#ifdef CONFIG_HOTPLUG_CPU
+	/* Corresponding to generic_set_cpu_dead() */
+	generic_set_cpu_up(nr);
+
+	if (system_state == SYSTEM_RUNNING) {
+		out_be32(&spin_table->addr_l, 0);
+
+		/*
+		 * We don't set the BPTR register here since it already points
+		 * to the boot page properly.
+		 */
+		mpic_reset_core(hw_cpu);
+
+		/* wait until core is ready... */
+		if (!spin_event_timeout(in_be32(&spin_table->addr_l) == 1,
+						10000, 100)) {
+			pr_err("%s: timeout waiting for core %d to reset\n",
+							__func__, hw_cpu);
+			ret = -ENOENT;
+			goto out;
+		}
+
+		/*  clear the acknowledge status */
+		__secondary_hold_acknowledge = -1;
+	}
+#endif
+	out_be32(&spin_table->pir, hw_cpu);
+	out_be32(&spin_table->addr_l, __pa(__early_start));
 
 	if (!ioremappable)
-		flush_dcache_range((ulong)bptr_vaddr,
-				(ulong)(bptr_vaddr + SIZE_BOOT_ENTRY));
+		flush_dcache_range((ulong)spin_table,
+			(ulong)spin_table + sizeof(struct epapr_spin_table));
 
 	/* Wait a bit for the CPU to ack. */
-	while ((__secondary_hold_acknowledge != hw_cpu) && (++n < 1000))
-		mdelay(1);
+	if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu,
+					10000, 100)) {
+		pr_err("%s: timeout waiting for core %d to ack\n",
+						__func__, hw_cpu);
+		ret = -ENOENT;
+		goto out;
+	}
+out:
 #else
 	smp_generic_kick_cpu(nr);
 
-	out_be64((u64 *)(bptr_vaddr + BOOT_ENTRY_ADDR_UPPER),
-		__pa((u64)*((unsigned long long *) generic_secondary_smp_init)));
+	out_be32(&spin_table->pir, hw_cpu);
+	out_be64((u64 *)(&spin_table->addr_h),
+	  __pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
 
 	if (!ioremappable)
-		flush_dcache_range((ulong)bptr_vaddr,
-				(ulong)(bptr_vaddr + SIZE_BOOT_ENTRY));
+		flush_dcache_range((ulong)spin_table,
+			(ulong)spin_table + sizeof(struct epapr_spin_table));
 #endif
-
 	local_irq_restore(flags);
 
 	if (ioremappable)
-		iounmap(bptr_vaddr);
+		iounmap(spin_table);
 
-	pr_debug("waited %d msecs for CPU #%d.\n", n, nr);
-
-	return 0;
+	return ret;
 }
 
 struct smp_ops_t smp_85xx_ops = {
 	.kick_cpu = smp_85xx_kick_cpu,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable	= generic_cpu_disable,
+	.cpu_die	= generic_cpu_die,
+#endif
 #ifdef CONFIG_KEXEC
 	.give_timebase	= smp_generic_give_timebase,
 	.take_timebase	= smp_generic_take_timebase,
@@ -277,8 +338,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
 }
 #endif /* CONFIG_KEXEC */
 
-static void __init
-smp_85xx_setup_cpu(int cpu_nr)
+static void __cpuinit smp_85xx_setup_cpu(int cpu_nr)
 {
 	if (smp_85xx_ops.probe == smp_mpic_probe)
 		mpic_setup_this_cpu();
@@ -329,6 +389,9 @@ void __init mpc85xx_smp_init(void)
 		}
 		smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
 		smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
+		ppc_md.cpu_die = smp_85xx_mach_cpu_die;
+#endif
 	}
 
 	smp_ops = &smp_85xx_ops;
-- 
1.6.4.1

^ permalink raw reply related

* [PATCH v7 1/5] powerpc/85xx: implement hardware timebase sync
From: Zhao Chenhui @ 2012-07-03 10:21 UTC (permalink / raw)
  To: linuxppc-dev, scottwood, galak; +Cc: linux-kernel

Do hardware timebase sync. Firstly, stop all timebases, and transfer
the timebase value of the boot core to the other core. Finally,
start all timebases.

Only apply to dual-core chips, such as MPC8572, P2020, etc.

Signed-off-by: Zhao Chenhui <chenhui.zhao@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
---
v7:
 * removed CONFIG_85xx_TB_SYNC
 * incorporated Timur's comments

 arch/powerpc/include/asm/fsl_guts.h |    2 +
 arch/powerpc/platforms/85xx/smp.c   |   82 +++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h
index aa4c488..dd5ba2c 100644
--- a/arch/powerpc/include/asm/fsl_guts.h
+++ b/arch/powerpc/include/asm/fsl_guts.h
@@ -48,6 +48,8 @@ struct ccsr_guts {
         __be32  dmuxcr;		/* 0x.0068 - DMA Mux Control Register */
         u8	res06c[0x70 - 0x6c];
 	__be32	devdisr;	/* 0x.0070 - Device Disable Control */
+#define CCSR_GUTS_DEVDISR_TB1	0x00001000
+#define CCSR_GUTS_DEVDISR_TB0	0x00004000
 	__be32	devdisr2;	/* 0x.0074 - Device Disable Control 2 */
 	u8	res078[0x7c - 0x78];
 	__be32  pmjcr;		/* 0x.007c - 4 Power Management Jog Control Register */
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index ff42490..2e65fe8 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -24,6 +24,7 @@
 #include <asm/mpic.h>
 #include <asm/cacheflush.h>
 #include <asm/dbell.h>
+#include <asm/fsl_guts.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/mpic.h>
@@ -42,6 +43,64 @@ extern void __early_start(void);
 #define NUM_BOOT_ENTRY		8
 #define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
 
+static struct ccsr_guts __iomem *guts;
+static u64 timebase;
+static int tb_req;
+static int tb_valid;
+
+static void mpc85xx_timebase_freeze(int freeze)
+{
+	uint32_t mask;
+
+	mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
+	if (freeze)
+		setbits32(&guts->devdisr, mask);
+	else
+		clrbits32(&guts->devdisr, mask);
+
+	in_be32(&guts->devdisr);
+}
+
+static void mpc85xx_give_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	while (!tb_req)
+		barrier();
+	tb_req = 0;
+
+	mpc85xx_timebase_freeze(1);
+	timebase = get_tb();
+	mb();
+	tb_valid = 1;
+
+	while (tb_valid)
+		barrier();
+
+	mpc85xx_timebase_freeze(0);
+
+	local_irq_restore(flags);
+}
+
+static void mpc85xx_take_timebase(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	tb_req = 1;
+	while (!tb_valid)
+		barrier();
+
+	set_tb(timebase >> 32, timebase & 0xffffffff);
+	isync();
+	tb_valid = 0;
+
+	local_irq_restore(flags);
+}
+
 static int __init
 smp_85xx_kick_cpu(int nr)
 {
@@ -228,6 +287,16 @@ smp_85xx_setup_cpu(int cpu_nr)
 		doorbell_setup_this_cpu();
 }
 
+static const struct of_device_id mpc85xx_smp_guts_ids[] = {
+	{ .compatible = "fsl,mpc8572-guts", },
+	{ .compatible = "fsl,p1020-guts", },
+	{ .compatible = "fsl,p1021-guts", },
+	{ .compatible = "fsl,p1022-guts", },
+	{ .compatible = "fsl,p1023-guts", },
+	{ .compatible = "fsl,p2020-guts", },
+	{},
+};
+
 void __init mpc85xx_smp_init(void)
 {
 	struct device_node *np;
@@ -249,6 +318,19 @@ void __init mpc85xx_smp_init(void)
 		smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
 	}
 
+	np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
+	if (np) {
+		guts = of_iomap(np, 0);
+		of_node_put(np);
+		if (!guts) {
+			pr_err("%s: Could not map guts node address\n",
+								__func__);
+			return;
+		}
+		smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
+		smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+	}
+
 	smp_ops = &smp_85xx_ops;
 
 #ifdef CONFIG_KEXEC
-- 
1.6.4.1

^ permalink raw reply related

* [PATCH] powerpc/44x: Support OCM(On Chip Memory) for APM821xx SoC and Bluestone board
From: Vinh Nguyen Huu Tuong @ 2012-07-03  8:52 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Josh Boyer, Matt Porter,
	Grant Likely, Rob Herring, Duc Dang, David S. Miller, Mai La,
	Kumar Gala, Li Yang, Ashish Kalra, Anatolij Gustschin, Liu Gang,
	Meador Inge, linuxppc-dev, linux-kernel, devicetree-discuss
  Cc: Vinh Nguyen Huu Tuong

This patch consists of:
- Add driver for OCM component
- Export OCM Information at /sys/kernel/debug/ppc4xx_ocm/info

Signed-off-by: Vinh Nguyen Huu Tuong <vhtnguyen@apm.com>
---
 arch/powerpc/boot/dts/bluestone.dts   |    8 +
 arch/powerpc/include/asm/ppc4xx_ocm.h |   45 ++++
 arch/powerpc/platforms/44x/Kconfig    |    8 +
 arch/powerpc/sysdev/Makefile          |    1 +
 arch/powerpc/sysdev/ppc4xx_ocm.c      |  415 +++++++++++++++++++++++++++++++++
 5 files changed, 477 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/include/asm/ppc4xx_ocm.h
 create mode 100644 arch/powerpc/sysdev/ppc4xx_ocm.c

diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
index 9d4917a..7daaca3 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -107,6 +107,14 @@
 		interrupt-parent = <&UIC0>;
 	};
 
+	OCM: ocm@400040000 {
+		compatible = "ibm,ocm";
+		status = "ok";
+		cell-index = <1>;
+		/* configured in U-Boot */
+		reg = <4 0x00040000 0x8000>; /* 32K */
+	};
+
 	SDR0: sdr {
 		compatible = "ibm,sdr-apm821xx";
 		dcr-reg = <0x00e 0x002>;
diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h b/arch/powerpc/include/asm/ppc4xx_ocm.h
new file mode 100644
index 0000000..6ce9046
--- /dev/null
+++ b/arch/powerpc/include/asm/ppc4xx_ocm.h
@@ -0,0 +1,45 @@
+/*
+ * PowerPC 4xx OCM memory allocation support
+ *
+ * (C) Copyright 2009, Applied Micro Circuits Corporation
+ * Victor Gallardo (vgallardo@amcc.com)
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
+#define __ASM_POWERPC_PPC4XX_OCM_H__
+
+#define PPC4XX_OCM_NON_CACHED 0
+#define PPC4XX_OCM_CACHED     1
+
+#if defined(CONFIG_PPC4xx_OCM)
+
+void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
+		  int flags, const char *owner);
+void ppc4xx_ocm_free(const void *virt);
+
+#else
+
+#define ppc4xx_ocm_alloc(phys, size, align, flags, owner)	NULL
+#define ppc4xx_ocm_free(addr)	((void)0)
+
+#endif /* CONFIG_PPC4xx_OCM */
+
+#endif  /* __ASM_POWERPC_PPC4XX_OCM_H__ */
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 8abf6fb8..0effe9f 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -252,6 +252,14 @@ config PPC4xx_GPIO
 	help
 	  Enable gpiolib support for ppc440 based boards
 
+config PPC4xx_OCM
+	bool "PPC4xx On Chip Memory (OCM) support"
+	depends on 4xx
+	select PPC_LIB_RHEAP
+	help
+	  Enable OCM support for PowerPC 4xx platforms with on chip memory,
+	  OCM provides the fast place for memory access to improve performance.
+
 # 44x specific CPU modules, selected based on the board above.
 config 440EP
 	bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 1bd7ecb..6f768e2 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_PPC_INDIRECT_PCI)	+= indirect_pci.o
 obj-$(CONFIG_PPC_I8259)		+= i8259.o
 obj-$(CONFIG_IPIC)		+= ipic.o
 obj-$(CONFIG_4xx)		+= uic.o
+obj-$(CONFIG_PPC4xx_OCM)	+= ppc4xx_ocm.o
 obj-$(CONFIG_4xx_SOC)		+= ppc4xx_soc.o
 obj-$(CONFIG_XILINX_VIRTEX)	+= xilinx_intc.o
 obj-$(CONFIG_XILINX_PCI)	+= xilinx_pci.o
diff --git a/arch/powerpc/sysdev/ppc4xx_ocm.c b/arch/powerpc/sysdev/ppc4xx_ocm.c
new file mode 100644
index 0000000..1b15f93
--- /dev/null
+++ b/arch/powerpc/sysdev/ppc4xx_ocm.c
@@ -0,0 +1,415 @@
+/*
+ * PowerPC 4xx OCM memory allocation support
+ *
+ * (C) Copyright 2009, Applied Micro Circuits Corporation
+ * Victor Gallardo (vgallardo@amcc.com)
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <asm/rheap.h>
+#include <asm/ppc4xx_ocm.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+
+#define OCM_DISABLED	0
+#define OCM_ENABLED		1
+
+struct ocm_block {
+	struct list_head	list;
+	void __iomem		*addr;
+	int					size;
+	const char			*owner;
+};
+
+/* non-cached or cached region */
+struct ocm_region {
+	phys_addr_t			phys;
+	void __iomem		*virt;
+
+	int					memtotal;
+	int					memfree;
+
+	rh_info_t			*rh;
+	struct list_head	list;
+};
+
+struct ocm_info {
+	int					index;
+	int					status;
+	int					ready;
+
+	phys_addr_t			phys;
+
+	int					alignment;
+	int					memtotal;
+	int					cache_size;
+
+	struct ocm_region	nc;	/* non-cached region */
+	struct ocm_region	c;	/* cached region */
+};
+
+static struct ocm_info *ocm_nodes;
+static int ocm_count;
+
+static struct ocm_info *ocm_get_node(unsigned int index)
+{
+	if (index >= ocm_count) {
+		printk(KERN_ERR "PPC4XX OCM: invalid index");
+		return NULL;
+	}
+
+	return &ocm_nodes[index];
+}
+
+static int ocm_free_region(struct ocm_region *ocm_reg, const void *addr)
+{
+	struct ocm_block *blk, *tmp;
+	unsigned long offset;
+
+	if (!ocm_reg->virt)
+		return 0;
+
+	list_for_each_entry_safe(blk, tmp, &ocm_reg->list, list) {
+		if (blk->addr == addr) {
+			offset = addr - ocm_reg->virt;
+			ocm_reg->memfree += blk->size;
+			rh_free(ocm_reg->rh, offset);
+			list_del(&blk->list);
+			kfree(blk);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void __init ocm_init_node(int count, struct device_node *node)
+{
+	struct ocm_info *ocm;
+
+	const unsigned int *cell_index;
+	const unsigned int *cache_size;
+	int len;
+
+	struct resource rsrc;
+	int ioflags;
+
+	ocm = ocm_get_node(count);
+
+	cell_index = of_get_property(node, "cell-index", &len);
+	if (!cell_index) {
+		printk(KERN_ERR "PPC4XX OCM: missing cell-index property");
+		return;
+	}
+	ocm->index = *cell_index;
+
+	if (of_device_is_available(node))
+		ocm->status = OCM_ENABLED;
+
+	cache_size = of_get_property(node, "cached-region-size", &len);
+	if (cache_size)
+		ocm->cache_size = *cache_size;
+
+	if (of_address_to_resource(node, 0, &rsrc)) {
+		printk(KERN_ERR "PPC4XX OCM%d: could not get resource address\n",
+			ocm->index);
+		return;
+	}
+
+	ocm->phys = rsrc.start;
+	ocm->memtotal = (rsrc.end - rsrc.start + 1);
+
+	printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (%s)\n",
+		ocm->index, ocm->memtotal,
+		(ocm->status == OCM_DISABLED) ? "disabled" : "enabled");
+
+	if (ocm->status == OCM_DISABLED)
+		return;
+
+	/* request region */
+
+	if (!request_mem_region(ocm->phys, ocm->memtotal, "ppc4xx_ocm")) {
+		printk(KERN_ERR "PPC4XX OCM%d: could not request region\n",
+			ocm->index);
+		return;
+	}
+
+	/* Configure non-cached and cached regions */
+
+	ocm->nc.phys = ocm->phys;
+	ocm->nc.memtotal = ocm->memtotal - ocm->cache_size;
+	ocm->nc.memfree = ocm->nc.memtotal;
+
+	ocm->c.phys = ocm->phys + ocm->nc.memtotal;
+	ocm->c.memtotal = ocm->cache_size;
+	ocm->c.memfree = ocm->c.memtotal;
+
+	if (ocm->nc.memtotal == 0)
+		ocm->nc.phys = 0;
+
+	if (ocm->c.memtotal == 0)
+		ocm->c.phys = 0;
+
+	printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (non-cached)\n",
+		ocm->index, ocm->nc.memtotal);
+
+	printk(KERN_INFO "PPC4XX OCM%d: %d Bytes (cached)\n",
+		ocm->index, ocm->c.memtotal);
+
+	/* ioremap the non-cached region */
+	if (ocm->nc.memtotal) {
+		ioflags = _PAGE_NO_CACHE | _PAGE_GUARDED | _PAGE_EXEC;
+		ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal,
+					  ioflags);
+
+		if (!ocm->nc.virt) {
+			printk(KERN_ERR
+			       "PPC4XX OCM%d: failed to ioremap non-cached memory\n",
+			       ocm->index);
+			ocm->nc.memfree = 0;
+			return;
+		}
+	}
+
+	/* ioremap the cached region */
+
+	if (ocm->c.memtotal) {
+		ioflags = _PAGE_EXEC;
+		ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal,
+					 ioflags);
+
+		if (!ocm->c.virt) {
+			printk(KERN_ERR
+			       "PPC4XX OCM%d: failed to ioremap cached memory\n",
+			       ocm->index);
+			ocm->c.memfree = 0;
+			return;
+		}
+	}
+
+	/* Create Remote Heaps */
+
+	ocm->alignment = 4; /* default 4 byte alignment */
+
+	if (ocm->nc.virt) {
+		ocm->nc.rh = rh_create(ocm->alignment);
+		rh_attach_region(ocm->nc.rh, 0, ocm->nc.memtotal);
+	}
+
+	if (ocm->c.virt) {
+		ocm->c.rh = rh_create(ocm->alignment);
+		rh_attach_region(ocm->c.rh, 0, ocm->c.memtotal);
+	}
+
+	INIT_LIST_HEAD(&ocm->nc.list);
+	INIT_LIST_HEAD(&ocm->c.list);
+
+	ocm->ready = 1;
+
+	return;
+}
+
+static int ocm_debugfs_show(struct seq_file *m, void *v)
+{
+	struct ocm_block *blk, *tmp;
+	unsigned int i;
+
+	for (i = 0; i < ocm_count; i++) {
+		struct ocm_info *ocm = ocm_get_node(i);
+
+		if (!ocm || !ocm->ready)
+			continue;
+
+		seq_printf(m, "PPC4XX OCM   : %d\n", ocm->index);
+		seq_printf(m, "PhysAddr     : 0x%llx\n", ocm->phys);
+		seq_printf(m, "MemTotal     : %d Bytes\n", ocm->memtotal);
+		seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
+		seq_printf(m, "MemTotal(C)  : %d Bytes\n", ocm->c.memtotal);
+
+		seq_printf(m, "\n");
+
+		seq_printf(m, "NC.PhysAddr  : 0x%llx\n", ocm->nc.phys);
+		seq_printf(m, "NC.VirtAddr  : 0x%p\n", ocm->nc.virt);
+		seq_printf(m, "NC.MemTotal  : %d Bytes\n", ocm->nc.memtotal);
+		seq_printf(m, "NC.MemFree   : %d Bytes\n", ocm->nc.memfree);
+
+		list_for_each_entry_safe(blk, tmp, &ocm->nc.list, list) {
+			seq_printf(m, "NC.MemUsed   : %d Bytes (%s)\n",
+							blk->size, blk->owner);
+		}
+
+		seq_printf(m, "\n");
+
+		seq_printf(m, "C.PhysAddr   : 0x%llx\n", ocm->c.phys);
+		seq_printf(m, "C.VirtAddr   : 0x%p\n", ocm->c.virt);
+		seq_printf(m, "C.MemTotal   : %d Bytes\n", ocm->c.memtotal);
+		seq_printf(m, "C.MemFree    : %d Bytes\n", ocm->c.memfree);
+
+		list_for_each_entry_safe(blk, tmp, &ocm->c.list, list) {
+			seq_printf(m, "C.MemUsed    : %d Bytes (%s)\n",
+						blk->size, blk->owner);
+		}
+
+		seq_printf(m, "\n");
+	}
+
+	return 0;
+}
+
+static int ocm_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ocm_debugfs_show, NULL);
+}
+
+static const struct file_operations ocm_debugfs_fops = {
+	.open = ocm_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int ocm_debugfs_init(void)
+{
+	struct dentry *junk;
+
+	junk = debugfs_create_dir("ppc4xx_ocm", 0);
+	if (!junk) {
+		printk(KERN_ALERT "debugfs ppc4xx ocm: failed to create dir\n");
+		return -1;
+	}
+
+	if (debugfs_create_file("info", 0644, junk, NULL, &ocm_debugfs_fops)) {
+		printk(KERN_ALERT "debugfs ppc4xx ocm: failed to create file\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
+			int flags, const char *owner)
+{
+	void __iomem *addr = NULL;
+	unsigned long offset;
+	struct ocm_info *ocm;
+	struct ocm_region *ocm_reg;
+	struct ocm_block *ocm_blk;
+	int i;
+
+	for (i = 0; i < ocm_count; i++) {
+		ocm = ocm_get_node(i);
+
+		if (!ocm || !ocm->ready)
+			continue;
+
+		if (flags == PPC4XX_OCM_NON_CACHED)
+			ocm_reg = &ocm->nc;
+		else
+			ocm_reg = &ocm->c;
+
+		if (!ocm_reg->virt)
+			continue;
+
+		if (align < ocm->alignment)
+			align = ocm->alignment;
+
+		offset = rh_alloc_align(ocm_reg->rh, size, align, NULL);
+
+		if (IS_ERR_VALUE(offset))
+			continue;
+
+		ocm_blk = kzalloc(sizeof(struct ocm_block *), GFP_KERNEL);
+		if (!ocm_blk) {
+			printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block");
+			rh_free(ocm_reg->rh, offset);
+			break;
+		}
+
+		*phys = ocm_reg->phys + offset;
+		addr = ocm_reg->virt + offset;
+		size = ALIGN(size, align);
+
+		ocm_blk->addr = addr;
+		ocm_blk->size = size;
+		ocm_blk->owner = owner;
+		list_add_tail(&ocm_blk->list, &ocm_reg->list);
+
+		ocm_reg->memfree -= size;
+
+		break;
+	}
+
+	return addr;
+}
+
+void ppc4xx_ocm_free(const void *addr)
+{
+	int i;
+
+	if (!addr)
+		return;
+
+	for (i = 0; i < ocm_count; i++) {
+		struct ocm_info *ocm = ocm_get_node(i);
+
+		if (!ocm || !ocm->ready)
+			continue;
+
+		if (ocm_free_region(&ocm->nc, addr) ||
+			ocm_free_region(&ocm->c, addr))
+			return;
+	}
+}
+
+static int __init ppc4xx_ocm_init(void)
+{
+	struct device_node *np;
+	int count;
+
+	count = 0;
+	for_each_compatible_node(np, NULL, "ibm,ocm")
+		count++;
+
+	if (!count)
+		return 0;
+
+	ocm_nodes = kzalloc((count * sizeof(struct ocm_info)), GFP_KERNEL);
+	if (!ocm_nodes) {
+		printk(KERN_ERR "PPC4XX OCM: failed to allocate OCM nodes!\n");
+		return -ENOMEM;
+	}
+
+	ocm_count = count;
+	count = 0;
+
+	for_each_compatible_node(np, NULL, "ibm,ocm") {
+		ocm_init_node(count, np);
+		count++;
+	}
+
+	ocm_debugfs_init();
+
+	return 0;
+}
+
+arch_initcall(ppc4xx_ocm_init);
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH v3] cpuidle: (POWER) Replace pseries_notify_cpuidle_add _cpu call with a notifier to fix lockdep problem in start_secondary
From: Deepthi Dharwar @ 2012-07-03  8:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt,
	Linux PM mailing list <linux-pm@vger.kernel.org> "linux-kernel@vger.kernel.org",
	PowerPC email list
  Cc: Paul E. McKenney, Paul Mackerras, Li Zhong, Srivatsa S. Bhat,
	Linux PM mailing list
In-Reply-To: <4FF29219.10608@linux.vnet.ibm.com>


cpuidle: (POWER) Replace pseries_notify_cpuidle_add_cpu call with a notifier to fix lockdep problem in start_secondary

From: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>

Currently the call to pseries_notify_cpuidle_add_cpu(), that takes
action on the cpuidle front when a cpu is added/removed
is being made from smp_xics_setup_cpu().
This caused lockdep issues as
reported https://lkml.org/lkml/2012/5/17/2

On addition of each cpu, resources are cleared and re-allocated 
each time, all in critical section as part of start_secondary() 
call which has interrupts disabled.
To resolve this issue, the pseries_notify_cpuidle_add_cpu() call is
is now being replaced by a hotplug notifier which
would prevent cpuidle resources from being
released and allocated each time cpu is onlined in the critical code path.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

---
This patch has in-corporated review comments
received on the earlier version posted.
	v1 ->  https://lkml.org/lkml/2012/5/18/174
	v2 ->  https://lkml.org/lkml/2012/7/3/61
This applies on 3.5-rc5

 arch/powerpc/include/asm/processor.h            |    2 -
 arch/powerpc/platforms/pseries/processor_idle.c |   35 ++++++++++++++++++++---
 arch/powerpc/platforms/pseries/smp.c            |    1 -
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 413a5ea..53b6dfa 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -389,10 +389,8 @@ extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 
 #ifdef CONFIG_PSERIES_IDLE
 extern void update_smt_snooze_delay(int snooze);
-extern int pseries_notify_cpuidle_add_cpu(int cpu);
 #else
 static inline void update_smt_snooze_delay(int snooze) {}
-static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; }
 #endif
 
 extern void flush_instruction_cache(void);
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index e61483e..da245c0 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -186,17 +186,40 @@ static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
 		.enter = &shared_cede_loop },
 };
 
-int pseries_notify_cpuidle_add_cpu(int cpu)
+static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
+			unsigned long action, void *hcpu)
 {
+	int hotcpu = (unsigned long)hcpu;
 	struct cpuidle_device *dev =
-			per_cpu_ptr(pseries_cpuidle_devices, cpu);
+			per_cpu_ptr(pseries_cpuidle_devices, hotcpu);
+
 	if (dev && cpuidle_get_driver()) {
-		cpuidle_disable_device(dev);
-		cpuidle_enable_device(dev);
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_enable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		case CPU_DEAD:
+		case CPU_DEAD_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_disable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		default:
+			return NOTIFY_DONE;
+		}
 	}
-	return 0;
+	return NOTIFY_OK;
 }
 
+static struct notifier_block setup_hotplug_notifier = {
+	.notifier_call = pseries_cpuidle_add_cpu_notifier,
+};
+
 /*
  * pseries_cpuidle_driver_init()
  */
@@ -321,6 +344,7 @@ static int __init pseries_processor_idle_init(void)
 		return retval;
 	}
 
+	register_cpu_notifier(&setup_hotplug_notifier);
 	printk(KERN_DEBUG "pseries_idle_driver registered\n");
 
 	return 0;
@@ -329,6 +353,7 @@ static int __init pseries_processor_idle_init(void)
 static void __exit pseries_processor_idle_exit(void)
 {
 
+	unregister_cpu_notifier(&setup_hotplug_notifier);
 	pseries_idle_devices_uninit();
 	cpuidle_unregister_driver(&pseries_idle_driver);
 
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index e16bb8d..71706bc 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -147,7 +147,6 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
 	set_default_offline_state(cpu);
 #endif
-	pseries_notify_cpuidle_add_cpu(cpu);
 }
 
 static int __devinit smp_pSeries_kick_cpu(int nr)

Cheers,
Deepthi

^ permalink raw reply related

* Re: [RFC PATCH v2 2/13] memory-hotplug : add physical memory hotplug code to acpi_memory_device_remove
From: Wen Congyang @ 2012-07-03  7:49 UTC (permalink / raw)
  To: Yasuaki Ishimatsu
  Cc: len.brown, linux-acpi, linux-kernel, linux-mm, paulus,
	minchan.kim, kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm,
	liuj97
In-Reply-To: <4FF2A1F1.7000901@jp.fujitsu.com>

At 07/03/2012 03:40 PM, Yasuaki Ishimatsu Wrote:
> Hi Wen,
> 
> 2012/07/03 15:21, Wen Congyang wrote:
>> At 07/03/2012 01:54 PM, Yasuaki Ishimatsu Wrote:
>>> acpi_memory_device_remove() has been prepared to remove physical memory.
>>> But, the function only frees acpi_memory_device currentlry.
>>>
>>> The patch adds following functions into acpi_memory_device_remove():
>>>    - offline memory
>>>    - remove physical memory (only return -EBUSY)
>>>    - free acpi_memory_device
>>>
>>> CC: David Rientjes <rientjes@google.com>
>>> CC: Jiang Liu <liuj97@gmail.com>
>>> CC: Len Brown <len.brown@intel.com>
>>> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>>> CC: Paul Mackerras <paulus@samba.org>
>>> CC: Christoph Lameter <cl@linux.com>
>>> Cc: Minchan Kim <minchan.kim@gmail.com>
>>> CC: Andrew Morton <akpm@linux-foundation.org>
>>> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
>>> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>>>
>>> ---
>>>   drivers/acpi/acpi_memhotplug.c |   26 +++++++++++++++++++++++++-
>>>   drivers/base/memory.c          |   38 ++++++++++++++++++++++++++++++++++++++
>>>   include/linux/memory.h         |    5 +++++
>>>   include/linux/memory_hotplug.h |    1 +
>>>   mm/memory_hotplug.c            |    8 ++++++++
>>>   5 files changed, 77 insertions(+), 1 deletion(-)
>>>
>>> Index: linux-3.5-rc4/drivers/acpi/acpi_memhotplug.c
>>> ===================================================================
>>> --- linux-3.5-rc4.orig/drivers/acpi/acpi_memhotplug.c	2012-07-03 14:21:49.458374960 +0900
>>> +++ linux-3.5-rc4/drivers/acpi/acpi_memhotplug.c	2012-07-03 14:21:58.329264059 +0900
>>> @@ -29,6 +29,7 @@
>>>   #include <linux/module.h>
>>>   #include <linux/init.h>
>>>   #include <linux/types.h>
>>> +#include <linux/memory.h>
>>>   #include <linux/memory_hotplug.h>
>>>   #include <linux/slab.h>
>>>   #include <acpi/acpi_drivers.h>
>>> @@ -452,12 +453,35 @@ static int acpi_memory_device_add(struct
>>>   static int acpi_memory_device_remove(struct acpi_device *device, int type)
>>>   {
>>>   	struct acpi_memory_device *mem_device = NULL;
>>> -
>>> +	struct acpi_memory_info *info, *tmp;
>>> +	int result;
>>> +	int node;
>>>
>>>   	if (!device || !acpi_driver_data(device))
>>>   		return -EINVAL;
>>>
>>>   	mem_device = acpi_driver_data(device);
>>> +
>>> +	node = acpi_get_node(mem_device->device->handle);
>>> +
>>> +	list_for_each_entry_safe(info, tmp, &mem_device->res_list, list) {
>>> +		if (!info->enabled)
>>> +			continue;
>>> +
>>> +		if (!is_memblk_offline(info->start_addr, info->length)) {
>>> +			result = offline_memory(info->start_addr, info->length);
>>> +			if (result)
>>> +				return result;
>>> +		}
>>> +
>>> +		result = remove_memory(node, info->start_addr, info->length);
>>> +		if (result)
>>> +			return result;
>>> +
>>> +		list_del(&info->list);
>>> +		kfree(info);
>>> +	}
>>> +
>>>   	kfree(mem_device);
>>
>> The caller does not care the return value, and after this function returns, the
>> memory device will be unbound from this driver, so we should free all memory
>> allocated for driver data.
> 
> We can ignore return value of remove_memory() because I think that it should
> return 0. But we cannot ignore return value of offline_memory() because
> kernel panic will occurs if kernel removes online memory. How do we deal with
> online memory?

Yes, We can not remove online memory, so just free the memory if offline_memory()
or remove_memory() fails.

> 
>>>
>>>   	return 0;
>>> Index: linux-3.5-rc4/include/linux/memory_hotplug.h
>>> ===================================================================
>>> --- linux-3.5-rc4.orig/include/linux/memory_hotplug.h	2012-07-03 14:21:49.471374796 +0900
>>> +++ linux-3.5-rc4/include/linux/memory_hotplug.h	2012-07-03 14:21:58.330264047 +0900
>>> @@ -233,6 +233,7 @@ static inline int is_mem_section_removab
>>>   extern int mem_online_node(int nid);
>>>   extern int add_memory(int nid, u64 start, u64 size);
>>>   extern int arch_add_memory(int nid, u64 start, u64 size);
>>> +extern int remove_memory(int nid, u64 start, u64 size);
>>>   extern int offline_memory(u64 start, u64 size);
>>>   extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
>>>   								int nr_pages);
>>> Index: linux-3.5-rc4/mm/memory_hotplug.c
>>> ===================================================================
>>> --- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:21:49.466374860 +0900
>>> +++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:21:58.332264022 +0900
>>> @@ -659,6 +659,14 @@ out:
>>>   }
>>>   EXPORT_SYMBOL_GPL(add_memory);
>>>
>>> +int remove_memory(int nid, u64 start, u64 size)
>>> +{
>>> +	return -EBUSY;
>>> +
>>> +}
>>> +EXPORT_SYMBOL_GPL(remove_memory);
>>> +
>>> +
>>>   #ifdef CONFIG_MEMORY_HOTREMOVE
>>>   /*
>>>    * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
>>> Index: linux-3.5-rc4/drivers/base/memory.c
>>> ===================================================================
>>> --- linux-3.5-rc4.orig/drivers/base/memory.c	2012-07-03 14:21:49.459374948 +0900
>>> +++ linux-3.5-rc4/drivers/base/memory.c	2012-07-03 14:21:58.335263984 +0900
>>> @@ -70,6 +70,44 @@ void unregister_memory_isolate_notifier(
>>>   }
>>>   EXPORT_SYMBOL(unregister_memory_isolate_notifier);
>>>
>>> +bool is_memblk_offline(unsigned long start, unsigned long size)
>>> +{
>>> +	struct memory_block *mem = NULL;
>>> +	struct mem_section *section;
>>> +	unsigned long start_pfn, end_pfn;
>>> +	unsigned long pfn, section_nr;
>>> +
>>> +	start_pfn = PFN_DOWN(start);
>>> +	end_pfn = start_pfn + PFN_DOWN(start);
>>> +
>>> +	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>>> +		section_nr = pfn_to_section_nr(pfn);
>>> +		if (!present_section_nr(section_nr));
>>> +			continue;
>>> +
>>> +		section = __nr_to_section(section_nr);
>>> +		/* same memblock? */
>>> +		if (mem)
>>> +			if((section_nr >= mem->start_section_nr) &&
>>> +			   (section_nr <= mem->end_section_nr))
>>> +				continue;
>>> +
>>> +		mem = find_memory_block_hinted(section, mem);
>>
>> The second parameter should be NULL. Otherwise, the mem->dev.kobj will
>> be put twice:
>> 1. we put it when mem->state is MEM_OFFLINE
>> 2. we put it in find_memory_block_hinted().
> 
> Ah, O.K.
> How about it?

This version looks fine to me.

Thanks
Wen Congyang

> 
> +bool is_memblk_offline(unsigned long start, unsigned long size)
> +{
> +	struct memory_block *mem = NULL;
> +	struct mem_section *section;
> +	unsigned long start_pfn, end_pfn;
> +	unsigned long pfn, section_nr;
> +
> +	start_pfn = PFN_DOWN(start);
> +	end_pfn = start_pfn + PFN_DOWN(start);
> +
> +	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
> +		section_nr = pfn_to_section_nr(pfn);
> +		if (!present_section_nr(section_nr));
> +			continue;
> +
> +		section = __nr_to_section(section_nr);
> +		/* same memblock? */
> +		if (mem)
> +			if((section_nr >= mem->start_section_nr) &&
> +			   (section_nr <= mem->end_section_nr))
> +				continue;
> +
> +		mem = find_memory_block_hinted(section, mem);
> +		if (!mem)
> +			continue;
> +		if (mem->state == MEM_OFFLINE)
> +			continue;
> +
> +		kobject_put(&mem->dev.kobj);
> +		return false;
> +	}
> +
> +	if (mem)
> +		kobject_put(&mem->dev.kobj);
> +
> +	return true;
> +}
> +EXPORT_SYMBOL(is_memblk_offline);
> 
> Thanks,
> Yasuaki Ishimatsu
> 
>>
>> Thanks
>> Wen Congyang
>>
>>> +		if (!mem)
>>> +			continue;
>>> +		if (mem->state == MEM_OFFLINE) {
>>> +			kobject_put(&mem->dev.kobj);
>>> +			continue;
>>> +		}
>>> +
>>> +		kobject_put(&mem->dev.kobj);
>>> +		return false;
>>> +	}
>>> +
>>> +	return true;
>>> +}
>>> +EXPORT_SYMBOL(is_memblk_offline);
>>> +
>>>   /*
>>>    * register_memory - Setup a sysfs device for a memory block
>>>    */
>>> Index: linux-3.5-rc4/include/linux/memory.h
>>> ===================================================================
>>> --- linux-3.5-rc4.orig/include/linux/memory.h	2012-07-03 14:21:45.998418215 +0900
>>> +++ linux-3.5-rc4/include/linux/memory.h	2012-07-03 14:21:58.340263922 +0900
>>> @@ -106,6 +106,10 @@ static inline int memory_isolate_notify(
>>>   {
>>>   	return 0;
>>>   }
>>> +static inline bool is_memblk_offline(unsigned long start, unsigned long size)
>>> +{
>>> +	return false;
>>> +}
>>>   #else
>>>   extern int register_memory_notifier(struct notifier_block *nb);
>>>   extern void unregister_memory_notifier(struct notifier_block *nb);
>>> @@ -120,6 +124,7 @@ extern int memory_isolate_notify(unsigne
>>>   extern struct memory_block *find_memory_block_hinted(struct mem_section *,
>>>   							struct memory_block *);
>>>   extern struct memory_block *find_memory_block(struct mem_section *);
>>> +extern bool is_memblk_offline(unsigned long start, unsigned long size);
>>>   #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
>>>   enum mem_add_context { BOOT, HOTPLUG };
>>>   #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at  http://www.tux.org/lkml/
>>>
>>
> 
> 
> 
> 

^ permalink raw reply

* Re: [RFC PATCH v2 2/13] memory-hotplug : add physical memory hotplug code to acpi_memory_device_remove
From: Yasuaki Ishimatsu @ 2012-07-03  7:40 UTC (permalink / raw)
  To: Wen Congyang
  Cc: len.brown, linux-acpi, linux-kernel, linux-mm, paulus,
	minchan.kim, kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm,
	liuj97
In-Reply-To: <4FF28F6F.1000006@cn.fujitsu.com>

Hi Wen,

2012/07/03 15:21, Wen Congyang wrote:
> At 07/03/2012 01:54 PM, Yasuaki Ishimatsu Wrote:
>> acpi_memory_device_remove() has been prepared to remove physical memory.
>> But, the function only frees acpi_memory_device currentlry.
>>
>> The patch adds following functions into acpi_memory_device_remove():
>>    - offline memory
>>    - remove physical memory (only return -EBUSY)
>>    - free acpi_memory_device
>>
>> CC: David Rientjes <rientjes@google.com>
>> CC: Jiang Liu <liuj97@gmail.com>
>> CC: Len Brown <len.brown@intel.com>
>> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>> CC: Paul Mackerras <paulus@samba.org>
>> CC: Christoph Lameter <cl@linux.com>
>> Cc: Minchan Kim <minchan.kim@gmail.com>
>> CC: Andrew Morton <akpm@linux-foundation.org>
>> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
>> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>>
>> ---
>>   drivers/acpi/acpi_memhotplug.c |   26 +++++++++++++++++++++++++-
>>   drivers/base/memory.c          |   38 ++++++++++++++++++++++++++++++++++++++
>>   include/linux/memory.h         |    5 +++++
>>   include/linux/memory_hotplug.h |    1 +
>>   mm/memory_hotplug.c            |    8 ++++++++
>>   5 files changed, 77 insertions(+), 1 deletion(-)
>>
>> Index: linux-3.5-rc4/drivers/acpi/acpi_memhotplug.c
>> ===================================================================
>> --- linux-3.5-rc4.orig/drivers/acpi/acpi_memhotplug.c	2012-07-03 14:21:49.458374960 +0900
>> +++ linux-3.5-rc4/drivers/acpi/acpi_memhotplug.c	2012-07-03 14:21:58.329264059 +0900
>> @@ -29,6 +29,7 @@
>>   #include <linux/module.h>
>>   #include <linux/init.h>
>>   #include <linux/types.h>
>> +#include <linux/memory.h>
>>   #include <linux/memory_hotplug.h>
>>   #include <linux/slab.h>
>>   #include <acpi/acpi_drivers.h>
>> @@ -452,12 +453,35 @@ static int acpi_memory_device_add(struct
>>   static int acpi_memory_device_remove(struct acpi_device *device, int type)
>>   {
>>   	struct acpi_memory_device *mem_device = NULL;
>> -
>> +	struct acpi_memory_info *info, *tmp;
>> +	int result;
>> +	int node;
>>
>>   	if (!device || !acpi_driver_data(device))
>>   		return -EINVAL;
>>
>>   	mem_device = acpi_driver_data(device);
>> +
>> +	node = acpi_get_node(mem_device->device->handle);
>> +
>> +	list_for_each_entry_safe(info, tmp, &mem_device->res_list, list) {
>> +		if (!info->enabled)
>> +			continue;
>> +
>> +		if (!is_memblk_offline(info->start_addr, info->length)) {
>> +			result = offline_memory(info->start_addr, info->length);
>> +			if (result)
>> +				return result;
>> +		}
>> +
>> +		result = remove_memory(node, info->start_addr, info->length);
>> +		if (result)
>> +			return result;
>> +
>> +		list_del(&info->list);
>> +		kfree(info);
>> +	}
>> +
>>   	kfree(mem_device);
> 
> The caller does not care the return value, and after this function returns, the
> memory device will be unbound from this driver, so we should free all memory
> allocated for driver data.

We can ignore return value of remove_memory() because I think that it should
return 0. But we cannot ignore return value of offline_memory() because
kernel panic will occurs if kernel removes online memory. How do we deal with
online memory?

>>
>>   	return 0;
>> Index: linux-3.5-rc4/include/linux/memory_hotplug.h
>> ===================================================================
>> --- linux-3.5-rc4.orig/include/linux/memory_hotplug.h	2012-07-03 14:21:49.471374796 +0900
>> +++ linux-3.5-rc4/include/linux/memory_hotplug.h	2012-07-03 14:21:58.330264047 +0900
>> @@ -233,6 +233,7 @@ static inline int is_mem_section_removab
>>   extern int mem_online_node(int nid);
>>   extern int add_memory(int nid, u64 start, u64 size);
>>   extern int arch_add_memory(int nid, u64 start, u64 size);
>> +extern int remove_memory(int nid, u64 start, u64 size);
>>   extern int offline_memory(u64 start, u64 size);
>>   extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
>>   								int nr_pages);
>> Index: linux-3.5-rc4/mm/memory_hotplug.c
>> ===================================================================
>> --- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:21:49.466374860 +0900
>> +++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:21:58.332264022 +0900
>> @@ -659,6 +659,14 @@ out:
>>   }
>>   EXPORT_SYMBOL_GPL(add_memory);
>>
>> +int remove_memory(int nid, u64 start, u64 size)
>> +{
>> +	return -EBUSY;
>> +
>> +}
>> +EXPORT_SYMBOL_GPL(remove_memory);
>> +
>> +
>>   #ifdef CONFIG_MEMORY_HOTREMOVE
>>   /*
>>    * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
>> Index: linux-3.5-rc4/drivers/base/memory.c
>> ===================================================================
>> --- linux-3.5-rc4.orig/drivers/base/memory.c	2012-07-03 14:21:49.459374948 +0900
>> +++ linux-3.5-rc4/drivers/base/memory.c	2012-07-03 14:21:58.335263984 +0900
>> @@ -70,6 +70,44 @@ void unregister_memory_isolate_notifier(
>>   }
>>   EXPORT_SYMBOL(unregister_memory_isolate_notifier);
>>
>> +bool is_memblk_offline(unsigned long start, unsigned long size)
>> +{
>> +	struct memory_block *mem = NULL;
>> +	struct mem_section *section;
>> +	unsigned long start_pfn, end_pfn;
>> +	unsigned long pfn, section_nr;
>> +
>> +	start_pfn = PFN_DOWN(start);
>> +	end_pfn = start_pfn + PFN_DOWN(start);
>> +
>> +	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
>> +		section_nr = pfn_to_section_nr(pfn);
>> +		if (!present_section_nr(section_nr));
>> +			continue;
>> +
>> +		section = __nr_to_section(section_nr);
>> +		/* same memblock? */
>> +		if (mem)
>> +			if((section_nr >= mem->start_section_nr) &&
>> +			   (section_nr <= mem->end_section_nr))
>> +				continue;
>> +
>> +		mem = find_memory_block_hinted(section, mem);
> 
> The second parameter should be NULL. Otherwise, the mem->dev.kobj will
> be put twice:
> 1. we put it when mem->state is MEM_OFFLINE
> 2. we put it in find_memory_block_hinted().

Ah, O.K.
How about it?

+bool is_memblk_offline(unsigned long start, unsigned long size)
+{
+	struct memory_block *mem = NULL;
+	struct mem_section *section;
+	unsigned long start_pfn, end_pfn;
+	unsigned long pfn, section_nr;
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = start_pfn + PFN_DOWN(start);
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		section_nr = pfn_to_section_nr(pfn);
+		if (!present_section_nr(section_nr));
+			continue;
+
+		section = __nr_to_section(section_nr);
+		/* same memblock? */
+		if (mem)
+			if((section_nr >= mem->start_section_nr) &&
+			   (section_nr <= mem->end_section_nr))
+				continue;
+
+		mem = find_memory_block_hinted(section, mem);
+		if (!mem)
+			continue;
+		if (mem->state == MEM_OFFLINE)
+			continue;
+
+		kobject_put(&mem->dev.kobj);
+		return false;
+	}
+
+	if (mem)
+		kobject_put(&mem->dev.kobj);
+
+	return true;
+}
+EXPORT_SYMBOL(is_memblk_offline);

Thanks,
Yasuaki Ishimatsu

> 
> Thanks
> Wen Congyang
> 
>> +		if (!mem)
>> +			continue;
>> +		if (mem->state == MEM_OFFLINE) {
>> +			kobject_put(&mem->dev.kobj);
>> +			continue;
>> +		}
>> +
>> +		kobject_put(&mem->dev.kobj);
>> +		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +EXPORT_SYMBOL(is_memblk_offline);
>> +
>>   /*
>>    * register_memory - Setup a sysfs device for a memory block
>>    */
>> Index: linux-3.5-rc4/include/linux/memory.h
>> ===================================================================
>> --- linux-3.5-rc4.orig/include/linux/memory.h	2012-07-03 14:21:45.998418215 +0900
>> +++ linux-3.5-rc4/include/linux/memory.h	2012-07-03 14:21:58.340263922 +0900
>> @@ -106,6 +106,10 @@ static inline int memory_isolate_notify(
>>   {
>>   	return 0;
>>   }
>> +static inline bool is_memblk_offline(unsigned long start, unsigned long size)
>> +{
>> +	return false;
>> +}
>>   #else
>>   extern int register_memory_notifier(struct notifier_block *nb);
>>   extern void unregister_memory_notifier(struct notifier_block *nb);
>> @@ -120,6 +124,7 @@ extern int memory_isolate_notify(unsigne
>>   extern struct memory_block *find_memory_block_hinted(struct mem_section *,
>>   							struct memory_block *);
>>   extern struct memory_block *find_memory_block(struct mem_section *);
>> +extern bool is_memblk_offline(unsigned long start, unsigned long size);
>>   #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
>>   enum mem_add_context { BOOT, HOTPLUG };
>>   #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>
> 

^ permalink raw reply

* Re: [PATCH V5 0/7] minimal alignment for p2p bars
From: Gavin Shan @ 2012-07-03  7:01 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linux-pci, linuxram, linuxppc-dev, bhelgaas, yinghai
In-Reply-To: <cover.1340949637.git.shangw@linux.vnet.ibm.com>

Hi Bjorn,

Could you please review it when you have time?

Thanks,
Gavin

>v1 -> v2:
>	* Shorten the varaible names so that they looks more short.
>	* Changelog adjustment so that they looks more meaningful.
>
>v2 -> v3:
>	* Rebase to 3.5.RC4
>
>v3 -> v4:
>	* Merge Yinghai's patches.
>
>v3 -> v4:
>	* Split patch for easy review.
>	* Add function to retrieve the minimal alignment of p2p bridge. 
>
>Lu Yinghai(3):
>  pci: change variable name for find_pci_host_bridge
>  pci: argument pci_bus for find_pci_host_bridge
>  pci: fiddle with conversion of pci and CPU address
>
>Gavin Shan(4)
>  pci: make find_pci_host_bridge global
>  pci: minimal alignment for bars of P2P bridges
>  pci: function to retrieve alignment of p2p bars
>  pci: resource assignment based on p2p alignment
>
> drivers/pci/host-bridge.c |   59 ++++++++++++++++++++++++++++++++++++---------
> drivers/pci/probe.c       |    5 ++++
> drivers/pci/setup-bus.c   |   22 +++++++++++------
> include/linux/pci.h       |   15 +++++++++++-
> 4 files changed, 81 insertions(+), 20 deletions(-)
>
>Thanks,
>Gavin
>

^ permalink raw reply

* Re: [RFC PATCH v2 2/13] memory-hotplug : add physical memory hotplug code to acpi_memory_device_remove
From: Wen Congyang @ 2012-07-03  6:21 UTC (permalink / raw)
  To: Yasuaki Ishimatsu
  Cc: len.brown, linux-acpi, linux-kernel, linux-mm, paulus,
	minchan.kim, kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm,
	liuj97
In-Reply-To: <4FF288FC.8030609@jp.fujitsu.com>

At 07/03/2012 01:54 PM, Yasuaki Ishimatsu Wrote:
> acpi_memory_device_remove() has been prepared to remove physical memory.
> But, the function only frees acpi_memory_device currentlry.
> 
> The patch adds following functions into acpi_memory_device_remove():
>   - offline memory
>   - remove physical memory (only return -EBUSY)
>   - free acpi_memory_device
> 
> CC: David Rientjes <rientjes@google.com>
> CC: Jiang Liu <liuj97@gmail.com>
> CC: Len Brown <len.brown@intel.com>
> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Christoph Lameter <cl@linux.com>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> ---
>  drivers/acpi/acpi_memhotplug.c |   26 +++++++++++++++++++++++++-
>  drivers/base/memory.c          |   38 ++++++++++++++++++++++++++++++++++++++
>  include/linux/memory.h         |    5 +++++
>  include/linux/memory_hotplug.h |    1 +
>  mm/memory_hotplug.c            |    8 ++++++++
>  5 files changed, 77 insertions(+), 1 deletion(-)
> 
> Index: linux-3.5-rc4/drivers/acpi/acpi_memhotplug.c
> ===================================================================
> --- linux-3.5-rc4.orig/drivers/acpi/acpi_memhotplug.c	2012-07-03 14:21:49.458374960 +0900
> +++ linux-3.5-rc4/drivers/acpi/acpi_memhotplug.c	2012-07-03 14:21:58.329264059 +0900
> @@ -29,6 +29,7 @@
>  #include <linux/module.h>
>  #include <linux/init.h>
>  #include <linux/types.h>
> +#include <linux/memory.h>
>  #include <linux/memory_hotplug.h>
>  #include <linux/slab.h>
>  #include <acpi/acpi_drivers.h>
> @@ -452,12 +453,35 @@ static int acpi_memory_device_add(struct
>  static int acpi_memory_device_remove(struct acpi_device *device, int type)
>  {
>  	struct acpi_memory_device *mem_device = NULL;
> -
> +	struct acpi_memory_info *info, *tmp;
> +	int result;
> +	int node;
> 
>  	if (!device || !acpi_driver_data(device))
>  		return -EINVAL;
> 
>  	mem_device = acpi_driver_data(device);
> +
> +	node = acpi_get_node(mem_device->device->handle);
> +
> +	list_for_each_entry_safe(info, tmp, &mem_device->res_list, list) {
> +		if (!info->enabled)
> +			continue;
> +
> +		if (!is_memblk_offline(info->start_addr, info->length)) {
> +			result = offline_memory(info->start_addr, info->length);
> +			if (result)
> +				return result;
> +		}
> +
> +		result = remove_memory(node, info->start_addr, info->length);
> +		if (result)
> +			return result;
> +
> +		list_del(&info->list);
> +		kfree(info);
> +	}
> +
>  	kfree(mem_device);

The caller does not care the return value, and after this function returns, the
memory device will be unbound from this driver, so we should free all memory
allocated for driver data.

> 
>  	return 0;
> Index: linux-3.5-rc4/include/linux/memory_hotplug.h
> ===================================================================
> --- linux-3.5-rc4.orig/include/linux/memory_hotplug.h	2012-07-03 14:21:49.471374796 +0900
> +++ linux-3.5-rc4/include/linux/memory_hotplug.h	2012-07-03 14:21:58.330264047 +0900
> @@ -233,6 +233,7 @@ static inline int is_mem_section_removab
>  extern int mem_online_node(int nid);
>  extern int add_memory(int nid, u64 start, u64 size);
>  extern int arch_add_memory(int nid, u64 start, u64 size);
> +extern int remove_memory(int nid, u64 start, u64 size);
>  extern int offline_memory(u64 start, u64 size);
>  extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
>  								int nr_pages);
> Index: linux-3.5-rc4/mm/memory_hotplug.c
> ===================================================================
> --- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:21:49.466374860 +0900
> +++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:21:58.332264022 +0900
> @@ -659,6 +659,14 @@ out:
>  }
>  EXPORT_SYMBOL_GPL(add_memory);
> 
> +int remove_memory(int nid, u64 start, u64 size)
> +{
> +	return -EBUSY;
> +
> +}
> +EXPORT_SYMBOL_GPL(remove_memory);
> +
> +
>  #ifdef CONFIG_MEMORY_HOTREMOVE
>  /*
>   * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy
> Index: linux-3.5-rc4/drivers/base/memory.c
> ===================================================================
> --- linux-3.5-rc4.orig/drivers/base/memory.c	2012-07-03 14:21:49.459374948 +0900
> +++ linux-3.5-rc4/drivers/base/memory.c	2012-07-03 14:21:58.335263984 +0900
> @@ -70,6 +70,44 @@ void unregister_memory_isolate_notifier(
>  }
>  EXPORT_SYMBOL(unregister_memory_isolate_notifier);
> 
> +bool is_memblk_offline(unsigned long start, unsigned long size)
> +{
> +	struct memory_block *mem = NULL;
> +	struct mem_section *section;
> +	unsigned long start_pfn, end_pfn;
> +	unsigned long pfn, section_nr;
> +
> +	start_pfn = PFN_DOWN(start);
> +	end_pfn = start_pfn + PFN_DOWN(start);
> +
> +	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
> +		section_nr = pfn_to_section_nr(pfn);
> +		if (!present_section_nr(section_nr));
> +			continue;
> +
> +		section = __nr_to_section(section_nr);
> +		/* same memblock? */
> +		if (mem)
> +			if((section_nr >= mem->start_section_nr) &&
> +			   (section_nr <= mem->end_section_nr))
> +				continue;
> +
> +		mem = find_memory_block_hinted(section, mem);

The second parameter should be NULL. Otherwise, the mem->dev.kobj will
be put twice:
1. we put it when mem->state is MEM_OFFLINE
2. we put it in find_memory_block_hinted().

Thanks
Wen Congyang

> +		if (!mem)
> +			continue;
> +		if (mem->state == MEM_OFFLINE) {
> +			kobject_put(&mem->dev.kobj);
> +			continue;
> +		}
> +
> +		kobject_put(&mem->dev.kobj);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +EXPORT_SYMBOL(is_memblk_offline);
> +
>  /*
>   * register_memory - Setup a sysfs device for a memory block
>   */
> Index: linux-3.5-rc4/include/linux/memory.h
> ===================================================================
> --- linux-3.5-rc4.orig/include/linux/memory.h	2012-07-03 14:21:45.998418215 +0900
> +++ linux-3.5-rc4/include/linux/memory.h	2012-07-03 14:21:58.340263922 +0900
> @@ -106,6 +106,10 @@ static inline int memory_isolate_notify(
>  {
>  	return 0;
>  }
> +static inline bool is_memblk_offline(unsigned long start, unsigned long size)
> +{
> +	return false;
> +}
>  #else
>  extern int register_memory_notifier(struct notifier_block *nb);
>  extern void unregister_memory_notifier(struct notifier_block *nb);
> @@ -120,6 +124,7 @@ extern int memory_isolate_notify(unsigne
>  extern struct memory_block *find_memory_block_hinted(struct mem_section *,
>  							struct memory_block *);
>  extern struct memory_block *find_memory_block(struct mem_section *);
> +extern bool is_memblk_offline(unsigned long start, unsigned long size);
>  #define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
>  enum mem_add_context { BOOT, HOTPLUG };
>  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply

* [PATCH v2] cpuidle: (POWER) Replace pseries_notify_cpuidle_add call with a notifier to fix lockdep problem in start_secondary
From: Deepthi Dharwar @ 2012-07-03  6:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Linux PM mailing list,
	linux-kernel@vger.kernel.org, PowerPC email list
  Cc: Paul E. McKenney, Paul Mackerras, Srivatsa S. Bhat, Li Zhong

cpuidle: (POWER) Replace pseries_notify_cpuidle_add call with a notifier to fix lockdep problem in start_secondary

From: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>

Currently the call to pseries_notify_cpuidle_add(), that takes
action on the cpuidle front when a cpu is added/removed
is being made from smp_xics_setup_cpu().
This works fine when the cpus were hot added/removed once the
system is up but causes lockdep issues as
reported https://lkml.org/lkml/2012/5/17/2
during the time of boot.

During boot, on addition of each cpu,
resources were cleared and re-allocated each time, all in critical
section as part of start_secondary() which had interrupts disabled.
To resolve this issue, replacing the pseries_notify_cpuidle_add_cpu() call
with a hotplug notifier.  This would prevent cpuidle resources from being
released and allocated each time cpu is onlined during pseries bootup.

Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

---
This patch has in-corporated review comments
received on the earlier version posted.
	v1 ->  https://lkml.org/lkml/2012/5/18/174
This applies on 3.5-rc5
---
 arch/powerpc/include/asm/processor.h            |    2 -
 arch/powerpc/platforms/pseries/processor_idle.c |   35 ++++++++++++++++++++---
 arch/powerpc/platforms/pseries/smp.c            |    1 -
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 413a5ea..53b6dfa 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -389,10 +389,8 @@ extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 
 #ifdef CONFIG_PSERIES_IDLE
 extern void update_smt_snooze_delay(int snooze);
-extern int pseries_notify_cpuidle_add_cpu(int cpu);
 #else
 static inline void update_smt_snooze_delay(int snooze) {}
-static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; }
 #endif
 
 extern void flush_instruction_cache(void);
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index e61483e..da245c0 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -186,17 +186,40 @@ static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
 		.enter = &shared_cede_loop },
 };
 
-int pseries_notify_cpuidle_add_cpu(int cpu)
+static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
+			unsigned long action, void *hcpu)
 {
+	int hotcpu = (unsigned long)hcpu;
 	struct cpuidle_device *dev =
-			per_cpu_ptr(pseries_cpuidle_devices, cpu);
+			per_cpu_ptr(pseries_cpuidle_devices, hotcpu);
+
 	if (dev && cpuidle_get_driver()) {
-		cpuidle_disable_device(dev);
-		cpuidle_enable_device(dev);
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_enable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		case CPU_DEAD:
+		case CPU_DEAD_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_disable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		default:
+			return NOTIFY_DONE;
+		}
 	}
-	return 0;
+	return NOTIFY_OK;
 }
 
+static struct notifier_block setup_hotplug_notifier = {
+	.notifier_call = pseries_cpuidle_add_cpu_notifier,
+};
+
 /*
  * pseries_cpuidle_driver_init()
  */
@@ -321,6 +344,7 @@ static int __init pseries_processor_idle_init(void)
 		return retval;
 	}
 
+	register_cpu_notifier(&setup_hotplug_notifier);
 	printk(KERN_DEBUG "pseries_idle_driver registered\n");
 
 	return 0;
@@ -329,6 +353,7 @@ static int __init pseries_processor_idle_init(void)
 static void __exit pseries_processor_idle_exit(void)
 {
 
+	unregister_cpu_notifier(&setup_hotplug_notifier);
 	pseries_idle_devices_uninit();
 	cpuidle_unregister_driver(&pseries_idle_driver);
 
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index e16bb8d..71706bc 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -147,7 +147,6 @@ static void __devinit smp_xics_setup_cpu(int cpu)
 	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
 	set_default_offline_state(cpu);
 #endif
-	pseries_notify_cpuidle_add_cpu(cpu);
 }
 
 static int __devinit smp_pSeries_kick_cpu(int nr)

^ permalink raw reply related

* Re: [RFC PATCH v2 4/13] memory-hotplug : remove /sys/firmware/memmap/X sysfs
From: Wen Congyang @ 2012-07-03  6:35 UTC (permalink / raw)
  To: Yasuaki Ishimatsu
  Cc: len.brown, linux-acpi, linux-kernel, linux-mm, paulus,
	minchan.kim, kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm,
	liuj97
In-Reply-To: <4FF28996.10702@jp.fujitsu.com>

At 07/03/2012 01:56 PM, Yasuaki Ishimatsu Wrote:
> When (hot)adding memory into system, /sys/firmware/memmap/X/{end, start, type}
> sysfs files are created. But there is no code to remove these files. The patch
> implements the function to remove them.
> 
> Note : The code does not free firmware_map_entry since there is no way to free
>        memory which is allocated by bootmem.
> 
> CC: David Rientjes <rientjes@google.com>
> CC: Jiang Liu <liuj97@gmail.com>
> CC: Len Brown <len.brown@intel.com>
> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Christoph Lameter <cl@linux.com>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> ---
>  drivers/firmware/memmap.c    |   70 +++++++++++++++++++++++++++++++++++++++++++
>  include/linux/firmware-map.h |    6 +++
>  mm/memory_hotplug.c          |    6 +++
>  3 files changed, 81 insertions(+), 1 deletion(-)
> 
> Index: linux-3.5-rc4/mm/memory_hotplug.c
> ===================================================================
> --- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:00.190240794 +0900
> +++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:03.549198802 +0900
> @@ -661,7 +661,11 @@ EXPORT_SYMBOL_GPL(add_memory);
> 
>  int remove_memory(int nid, u64 start, u64 size)
>  {
> -	return -EBUSY;
> +	lock_memory_hotplug();
> +	/* remove memmap entry */
> +	firmware_map_remove(start, start + size - 1, "System RAM");
> +	unlock_memory_hotplug();
> +	return 0;
> 
>  }
>  EXPORT_SYMBOL_GPL(remove_memory);
> Index: linux-3.5-rc4/include/linux/firmware-map.h
> ===================================================================
> --- linux-3.5-rc4.orig/include/linux/firmware-map.h	2012-07-03 14:21:45.766421116 +0900
> +++ linux-3.5-rc4/include/linux/firmware-map.h	2012-07-03 14:22:03.550198789 +0900
> @@ -25,6 +25,7 @@
> 
>  int firmware_map_add_early(u64 start, u64 end, const char *type);
>  int firmware_map_add_hotplug(u64 start, u64 end, const char *type);
> +int firmware_map_remove(u64 start, u64 end, const char *type);
> 
>  #else /* CONFIG_FIRMWARE_MEMMAP */
> 
> @@ -38,6 +39,11 @@ static inline int firmware_map_add_hotpl
>  	return 0;
>  }
> 
> +static inline int firmware_map_remove(u64 start, u64 end, const char *type)
> +{
> +	return 0;
> +}
> +
>  #endif /* CONFIG_FIRMWARE_MEMMAP */
> 
>  #endif /* _LINUX_FIRMWARE_MAP_H */
> Index: linux-3.5-rc4/drivers/firmware/memmap.c
> ===================================================================
> --- linux-3.5-rc4.orig/drivers/firmware/memmap.c	2012-07-03 14:21:45.761421180 +0900
> +++ linux-3.5-rc4/drivers/firmware/memmap.c	2012-07-03 14:22:03.569198549 +0900
> @@ -79,7 +79,16 @@ static const struct sysfs_ops memmap_att
>  	.show = memmap_attr_show,
>  };
> 
> +static void release_firmware_map_entry(struct kobject *kobj)
> +{
> +	/*
> +	 * FIXME : There is no idea.
> +	 *         How to free the entry which allocated bootmem?
> +	 */

I find a function free_bootmem(), but I am not sure whether it can work here.
Another problem: how to check whether the entry uses bootmem?

Thanks
Wen Congyang

> +}
> +
>  static struct kobj_type memmap_ktype = {
> +	.release	= release_firmware_map_entry,
>  	.sysfs_ops	= &memmap_attr_ops,
>  	.default_attrs	= def_attrs,
>  };
> @@ -123,6 +132,16 @@ static int firmware_map_add_entry(u64 st
>  	return 0;
>  }
> 
> +/**
> + * firmware_map_remove_entry() - Does the real work to remove a firmware
> + * memmap entry.
> + * @entry: removed entry.
> + **/
> +static inline void firmware_map_remove_entry(struct firmware_map_entry *entry)
> +{
> +	list_del(&entry->list);
> +}
> +
>  /*
>   * Add memmap entry on sysfs
>   */
> @@ -144,6 +163,31 @@ static int add_sysfs_fw_map_entry(struct
>  	return 0;
>  }
> 
> +/*
> + * Remove memmap entry on sysfs
> + */
> +static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry)
> +{
> +	kobject_put(&entry->kobj);
> +}
> +
> +/*
> + * Search memmap entry
> + */
> +
> +struct firmware_map_entry * __meminit
> +find_firmware_map_entry(u64 start, u64 end, const char *type)
> +{
> +	struct firmware_map_entry *entry;
> +
> +	list_for_each_entry(entry, &map_entries, list)
> +		if ((entry->start == start) && (entry->end == end) &&
> +		    (!strcmp(entry->type, type)))
> +			return entry;
> +
> +	return NULL;
> +}
> +
>  /**
>   * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do
>   * memory hotplug.
> @@ -196,6 +240,32 @@ int __init firmware_map_add_early(u64 st
>  	return firmware_map_add_entry(start, end, type, entry);
>  }
> 
> +/**
> + * firmware_map_remove() - remove a firmware mapping entry
> + * @start: Start of the memory range.
> + * @end:   End of the memory range (inclusive).
> + * @type:  Type of the memory range.
> + *
> + * removes a firmware mapping entry.
> + *
> + * Returns 0 on success, or -EINVAL if no entry.
> + **/
> +int __meminit firmware_map_remove(u64 start, u64 end, const char *type)
> +{
> +	struct firmware_map_entry *entry;
> +
> +	entry = find_firmware_map_entry(start, end, type);
> +	if (!entry)
> +		return -EINVAL;
> +
> +	/* remove the memmap entry */
> +	remove_sysfs_fw_map_entry(entry);
> +
> +	firmware_map_remove_entry(entry);
> +
> +	return 0;
> +}
> +
>  /*
>   * Sysfs functions -------------------------------------------------------------
>   */
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

^ permalink raw reply

* [RFC PATCH v2 13/13] memory-hotplug : remove sysfs file of node
From: Yasuaki Ishimatsu @ 2012-07-03  6:06 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

The patch adds node_set_offline() and unregister_one_node() to remove_memory()
for removing sysfs file of node.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 mm/memory_hotplug.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:21.012982694 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:25.405925554 +0900
@@ -702,6 +702,11 @@ int remove_memory(int nid, u64 start, u6
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size - 1, "System RAM");

+	if (!node_present_pages(nid)) {
+		node_set_offline(nid);
+		unregister_one_node(nid);
+	}
+
 	__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT);
 	unlock_memory_hotplug();
 	return 0;

^ permalink raw reply

* [RFC PATCH v2 12/13] memory-hotplug : add node_device_release
From: Yasuaki Ishimatsu @ 2012-07-03  6:05 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

When calling unregister_node(), the function shows following message at
device_release().

Device 'node2' does not have a release() function, it is broken and must be
fixed.

So the patch implements node_device_release()

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 drivers/base/node.c |    7 +++++++
 1 file changed, 7 insertions(+)

Index: linux-3.5-rc4/drivers/base/node.c
===================================================================
--- linux-3.5-rc4.orig/drivers/base/node.c	2012-07-03 14:21:44.882432167 +0900
+++ linux-3.5-rc4/drivers/base/node.c	2012-07-03 14:22:23.296951921 +0900
@@ -252,6 +252,12 @@ static inline void hugetlb_register_node
 static inline void hugetlb_unregister_node(struct node *node) {}
 #endif

+static void node_device_release(struct device *dev)
+{
+	struct node *node_dev = to_node(dev);
+
+	memset(node_dev, 0, sizeof(struct node));
+}

 /*
  * register_node - Setup a sysfs device for a node.
@@ -265,6 +271,7 @@ int register_node(struct node *node, int

 	node->dev.id = num;
 	node->dev.bus = &node_subsys;
+	node->dev.release = node_device_release;
 	error = device_register(&node->dev);

 	if (!error){

^ permalink raw reply

* [RFC PATCH v2 11/13] memory-hotplug : free memmap of sparse-vmemmap
From: Yasuaki Ishimatsu @ 2012-07-03  6:04 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

I don't think that all pages of virtual mapping in removed memory can be
freed, since page which type is MIX_SECTION_INFO is difficult to free.
So, the patch only frees page which type is SECTION_INFO at first.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 arch/x86/mm/init_64.c |   91 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h    |    2 +
 mm/memory_hotplug.c   |    5 ++
 mm/sparse.c           |    5 +-
 4 files changed, 101 insertions(+), 2 deletions(-)

Index: linux-3.5-rc4/include/linux/mm.h
===================================================================
--- linux-3.5-rc4.orig/include/linux/mm.h	2012-07-03 14:22:18.530011567 +0900
+++ linux-3.5-rc4/include/linux/mm.h	2012-07-03 14:22:20.999983872 +0900
@@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 				  unsigned long size);
+void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
+void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
Index: linux-3.5-rc4/mm/sparse.c
===================================================================
--- linux-3.5-rc4.orig/mm/sparse.c	2012-07-03 14:21:45.071429805 +0900
+++ linux-3.5-rc4/mm/sparse.c	2012-07-03 14:22:21.000983767 +0900
@@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
 	/* This will make the necessary allocations eventually. */
 	return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
 {
-	return; /* XXX: Not implemented yet */
+	vmemmap_kfree(page, nr_pages);
 }
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
+	vmemmap_free_bootmem(page, nr_pages);
 }
 #else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
Index: linux-3.5-rc4/arch/x86/mm/init_64.c
===================================================================
--- linux-3.5-rc4.orig/arch/x86/mm/init_64.c	2012-07-03 14:22:18.538011465 +0900
+++ linux-3.5-rc4/arch/x86/mm/init_64.c	2012-07-03 14:22:21.007983103 +0900
@@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
 	return 0;
 }

+unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
+				      struct page **pp)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long next;
+
+	*pp = NULL;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return (addr + PAGE_SIZE) & PAGE_MASK;
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return (addr + PAGE_SIZE) & PAGE_MASK;
+
+	if (!cpu_has_pse) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			return next;
+
+		pte = pte_offset_kernel(pmd, addr);
+		if (pte_none(*pte))
+			return next;
+
+		*pp = pte_page(*pte);
+		pte_clear(&init_mm, addr, pte);
+	} else {
+		next = pmd_addr_end(addr, end);
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd))
+			return next;
+
+		*pp = pmd_page(*pmd);
+		pmd_clear(pmd);
+	}
+
+	return next;
+}
+
+void __meminit
+vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long addr = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long next;
+	unsigned int order;
+	struct page *page;
+
+	for (; addr < end; addr = next) {
+		page = NULL;
+		next = find_and_clear_pte_page(addr, end, &page);
+		if (!page)
+			continue;
+
+		if (is_vmalloc_addr(page_address(page)))
+			vfree(page_address(page));
+		else {
+			order = next - addr;
+			free_pages((unsigned long)page_address(page),
+				   get_order(order));
+		}
+	}
+}
+
+void __meminit
+vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long addr = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+	unsigned long next;
+	struct page *page;
+	unsigned long magic;
+
+	for (; addr < end; addr = next) {
+		page = NULL;
+		next = find_and_clear_pte_page(addr, end, &page);
+		if (!page)
+			continue;
+
+		magic = (unsigned long) page->lru.next;
+		if (magic == SECTION_INFO)
+			put_page_bootmem(page);
+	}
+}
+
 void __meminit
 register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page,
 			     unsigned long size)
Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:18.522011667 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:21.012982694 +0900
@@ -303,6 +303,8 @@ static int __meminit __add_section(int n
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
+	unsigned long flags;
+	struct pglist_data *pgdat = zone->zone_pgdat;
 	int ret;

 	if (!valid_section(ms))
@@ -310,6 +312,9 @@ static int __remove_section(struct zone

 	ret = unregister_memory_section(ms);

+	pgdat_resize_lock(pgdat, &flags);
+	sparse_remove_one_section(zone, ms);
+	pgdat_resize_unlock(pgdat, &flags);
 	return ret;
 }
 #else

^ permalink raw reply

* [RFC PATCH v2 10/13] memory-hotplug : implement register_page_bootmem_info_section of sparse-vmemmap
From: Yasuaki Ishimatsu @ 2012-07-03  6:03 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

For removing memmap region of sparse-vmemmap which is allocated bootmem,
memmap region of sparse-vmemmap needs to be registered by get_page_bootmem().
So the patch searches pages of virtual mapping and registers the pages by
get_page_bootmem().

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 arch/x86/mm/init_64.c          |   53 +++++++++++++++++++++++++++++++++++++++++
 include/linux/memory_hotplug.h |    2 +
 include/linux/mm.h             |    3 +-
 mm/memory_hotplug.c            |   23 +++++++++++++++--
 4 files changed, 77 insertions(+), 4 deletions(-)

Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:14.419062959 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:18.522011667 +0900
@@ -91,8 +91,8 @@ static void release_memory_resource(stru
 }

 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-static void get_page_bootmem(unsigned long info,  struct page *page,
-			     unsigned long type)
+void get_page_bootmem(unsigned long info,  struct page *page,
+		      unsigned long type)
 {
 	unsigned long page_type;

@@ -164,8 +164,25 @@ static void register_page_bootmem_info_s

 }
 #else
-static inline void register_page_bootmem_info_section(unsigned long start_pfn)
+static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
+	unsigned long mapsize, section_nr;
+	struct mem_section *ms;
+	struct page *page, *memmap;
+
+	if (!pfn_valid(start_pfn))
+		return;
+
+	section_nr = pfn_to_section_nr(start_pfn);
+	ms = __nr_to_section(section_nr);
+
+	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+
+	page = virt_to_page(memmap);
+	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
+	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
+
+	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
 }
 #endif

Index: linux-3.5-rc4/include/linux/mm.h
===================================================================
--- linux-3.5-rc4.orig/include/linux/mm.h	2012-07-03 14:21:45.223427904 +0900
+++ linux-3.5-rc4/include/linux/mm.h	2012-07-03 14:22:18.530011567 +0900
@@ -1586,7 +1586,8 @@ int vmemmap_populate_basepages(struct pa
 						unsigned long pages, int node);
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
-
+void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
+				  unsigned long size);

 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
Index: linux-3.5-rc4/arch/x86/mm/init_64.c
===================================================================
--- linux-3.5-rc4.orig/arch/x86/mm/init_64.c	2012-07-03 14:21:45.228427843 +0900
+++ linux-3.5-rc4/arch/x86/mm/init_64.c	2012-07-03 14:22:18.538011465 +0900
@@ -978,6 +978,59 @@ vmemmap_populate(struct page *start_page
 	return 0;
 }

+void __meminit
+register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page,
+			     unsigned long size)
+{
+	unsigned long addr = (unsigned long)start_page;
+	unsigned long end = (unsigned long)(start_page + size);
+	unsigned long next;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (; addr < end; addr = next) {
+		pte_t *pte = NULL;
+
+		pgd = pgd_offset_k(addr);
+		if (pgd_none(*pgd)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
+
+		pud = pud_offset(pgd, addr);
+		if (pud_none(*pud)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
+
+		if (!cpu_has_pse) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			pmd = pmd_offset(pud, addr);
+			if (pmd_none(*pmd))
+				continue;
+			get_page_bootmem(section_nr, pmd_page(*pmd),
+					 MIX_SECTION_INFO);
+
+			pte = pte_offset_kernel(pmd, addr);
+			if (pte_none(*pte))
+				continue;
+			get_page_bootmem(section_nr, pte_page(*pte),
+					 SECTION_INFO);
+		} else {
+			next = pmd_addr_end(addr, end);
+
+			pmd = pmd_offset(pud, addr);
+			if (pmd_none(*pmd))
+				continue;
+			get_page_bootmem(section_nr, pmd_page(*pmd),
+					 SECTION_INFO);
+		}
+	}
+}
+
 void __meminit vmemmap_populate_print_last(void)
 {
 	if (p_start) {
Index: linux-3.5-rc4/include/linux/memory_hotplug.h
===================================================================
--- linux-3.5-rc4.orig/include/linux/memory_hotplug.h	2012-07-03 14:22:14.409063086 +0900
+++ linux-3.5-rc4/include/linux/memory_hotplug.h	2012-07-03 14:22:18.541011428 +0900
@@ -162,6 +162,8 @@ static inline void arch_refresh_nodedata

 extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 extern void put_page_bootmem(struct page *page);
+extern void get_page_bootmem(unsigned long ingo, struct page *page,
+			     unsigned long type);

 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug

^ permalink raw reply

* [RFC PATCH v2 9/13] memory-hotplug : move register_page_bootmem_info_node and put_page_bootmem for sparse-vmemmap
From: Yasuaki Ishimatsu @ 2012-07-03  6:02 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

For implementing register_page_bootmem_info_node of sparse-vmemmap,
register_page_bootmem_info_node and put_page_bootmem are moved to
memory_hotplug.c

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 include/linux/memory_hotplug.h |    9 ---------
 mm/memory_hotplug.c            |    8 ++++++--
 2 files changed, 6 insertions(+), 11 deletions(-)

Index: linux-3.5-rc4/include/linux/memory_hotplug.h
===================================================================
--- linux-3.5-rc4.orig/include/linux/memory_hotplug.h	2012-07-03 14:22:10.170116406 +0900
+++ linux-3.5-rc4/include/linux/memory_hotplug.h	2012-07-03 14:22:14.409063086 +0900
@@ -160,17 +160,8 @@ static inline void arch_refresh_nodedata
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */

-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
-{
-}
-static inline void put_page_bootmem(struct page *page)
-{
-}
-#else
 extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 extern void put_page_bootmem(struct page *page);
-#endif

 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:12.299089413 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:14.419062959 +0900
@@ -91,7 +91,6 @@ static void release_memory_resource(stru
 }

 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void get_page_bootmem(unsigned long info,  struct page *page,
 			     unsigned long type)
 {
@@ -127,6 +126,7 @@ void __ref put_page_bootmem(struct page

 }

+#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long *usemap, mapsize, section_nr, i;
@@ -163,6 +163,11 @@ static void register_page_bootmem_info_s
 		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);

 }
+#else
+static inline void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+}
+#endif

 void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
@@ -198,7 +203,6 @@ void register_page_bootmem_info_node(str
 		register_page_bootmem_info_section(pfn);

 }
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP */

 static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
 			   unsigned long end_pfn)

^ permalink raw reply

* [RFC PATCH v2 8/13] memory-hotplug : check page type in get_page_bootmem
From: Yasuaki Ishimatsu @ 2012-07-03  6:01 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

There is a possibility that get_page_bootmem() is called to the same page many
times. So when get_page_bootmem is called to the same page, the function only
increments page->_count.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 mm/memory_hotplug.c |   15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:10.170116406 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:12.299089413 +0900
@@ -95,10 +95,17 @@ static void release_memory_resource(stru
 static void get_page_bootmem(unsigned long info,  struct page *page,
 			     unsigned long type)
 {
-	page->lru.next = (struct list_head *) type;
-	SetPagePrivate(page);
-	set_page_private(page, info);
-	atomic_inc(&page->_count);
+	unsigned long page_type;
+
+	page_type = (unsigned long) page->lru.next;
+	if (type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
+	    type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE){
+		page->lru.next = (struct list_head *) type;
+		SetPagePrivate(page);
+		set_page_private(page, info);
+		atomic_inc(&page->_count);
+	} else
+		atomic_inc(&page->_count);
 }

 /* reference to __meminit __free_pages_bootmem is valid

^ permalink raw reply

* [RFC PATCH v2 7/13] memory-hotplug : remove_memory calls __remove_pages
From: Yasuaki Ishimatsu @ 2012-07-03  6:00 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

The patch adds __remove_pages() to remove_memory(). Then the range of
phys_start_pfn argument and nr_pages argument in __remove_pagse() may
have different zone. So zone argument is removed from __remove_pages()
and __remove_pages() caluculates zone in each section.

When CONFIG_SPARSEMEM_VMEMMAP is defined, there is no way to remove a memmap.
So __remove_section only calls unregister_memory_section().

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 arch/powerpc/platforms/pseries/hotplug-memory.c |    5 +----
 include/linux/memory_hotplug.h                  |    3 +--
 mm/memory_hotplug.c                             |   20 +++++++++++++-------
 3 files changed, 15 insertions(+), 13 deletions(-)

Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:05.919169458 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:10.170116406 +0900
@@ -275,11 +275,14 @@ static int __meminit __add_section(int n
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static int __remove_section(struct zone *zone, struct mem_section *ms)
 {
-	/*
-	 * XXX: Freeing memmap with vmemmap is not implement yet.
-	 *      This should be removed later.
-	 */
-	return -EBUSY;
+	int ret;
+
+	if (!valid_section(ms))
+		return ret;
+
+	ret = unregister_memory_section(ms);
+
+	return ret;
 }
 #else
 static int __remove_section(struct zone *zone, struct mem_section *ms)
@@ -346,11 +349,11 @@ EXPORT_SYMBOL_GPL(__add_pages);
  * sure that pages are marked reserved and zones are adjust properly by
  * calling offline_pages().
  */
-int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
-		 unsigned long nr_pages)
+int __remove_pages(unsigned long phys_start_pfn, unsigned long nr_pages)
 {
 	unsigned long i, ret = 0;
 	int sections_to_remove;
+	struct zone *zone;

 	/*
 	 * We can only remove entire sections
@@ -363,6 +366,7 @@ int __remove_pages(struct zone *zone, un
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+		zone = page_zone(pfn_to_page(pfn));
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
@@ -664,6 +668,8 @@ int remove_memory(int nid, u64 start, u6
 	lock_memory_hotplug();
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size - 1, "System RAM");
+
+	__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT);
 	unlock_memory_hotplug();
 	return 0;

Index: linux-3.5-rc4/include/linux/memory_hotplug.h
===================================================================
--- linux-3.5-rc4.orig/include/linux/memory_hotplug.h	2012-07-03 14:21:58.330264047 +0900
+++ linux-3.5-rc4/include/linux/memory_hotplug.h	2012-07-03 14:22:10.170116406 +0900
@@ -89,8 +89,7 @@ extern bool is_pageblock_removable_noloc
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
-extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
-	unsigned long nr_pages);
+extern int __remove_pages(unsigned long start_pfn, unsigned long nr_pages);

 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
Index: linux-3.5-rc4/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- linux-3.5-rc4.orig/arch/powerpc/platforms/pseries/hotplug-memory.c	2012-07-03 14:22:05.920169437
+0900
+++ linux-3.5-rc4/arch/powerpc/platforms/pseries/hotplug-memory.c	2012-07-03 14:22:10.172116353 +0900
@@ -76,7 +76,6 @@ unsigned long memory_block_size_bytes(vo
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long start, start_pfn;
-	struct zone *zone;
 	int i, ret;
 	int sections_to_remove;

@@ -87,8 +86,6 @@ static int pseries_remove_memblock(unsig
 		return 0;
 	}

-	zone = page_zone(pfn_to_page(start_pfn));
-
 	/*
 	 * Remove section mappings and sysfs entries for the
 	 * section of the memory we are removing.
@@ -101,7 +98,7 @@ static int pseries_remove_memblock(unsig
 	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
-		ret = __remove_pages(zone, start_pfn,  PAGES_PER_SECTION);
+		ret = __remove_pages(start_pfn,  PAGES_PER_SECTION);
 		if (ret)
 			return ret;
 	}

^ permalink raw reply

* [RFC PATCH v2 6/13] memory-hotplug : add memory_block_release
From: Yasuaki Ishimatsu @ 2012-07-03  5:59 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

When calling remove_memory_block(), the function shows following message at
device_release().

Device 'memory528' does not have a release() function, it is broken and must
be fixed.

remove_memory_block() calls kfree(mem). I think it shouled be called from
device_release(). So the patch implements memory_block_release()

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 drivers/base/memory.c |   11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

Index: linux-3.5-rc4/drivers/base/memory.c
===================================================================
--- linux-3.5-rc4.orig/drivers/base/memory.c	2012-07-03 14:21:58.335263984 +0900
+++ linux-3.5-rc4/drivers/base/memory.c	2012-07-03 14:22:08.094141981 +0900
@@ -108,6 +108,15 @@ bool is_memblk_offline(unsigned long sta
 }
 EXPORT_SYMBOL(is_memblk_offline);

+#define to_memory_block(device) container_of(device, struct memory_block, dev)
+
+static void release_memory_block(struct device *dev)
+{
+	struct memory_block *mem = to_memory_block(dev);
+
+	kfree(mem);
+}
+
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
@@ -118,6 +127,7 @@ int register_memory(struct memory_block

 	memory->dev.bus = &memory_subsys;
 	memory->dev.id = memory->start_section_nr / sections_per_block;
+	memory->dev.release = release_memory_block;

 	error = device_register(&memory->dev);
 	return error;
@@ -668,7 +678,6 @@ int remove_memory_block(unsigned long no
 		mem_remove_simple_file(mem, phys_device);
 		mem_remove_simple_file(mem, removable);
 		unregister_memory(mem);
-		kfree(mem);
 	} else
 		kobject_put(&mem->dev.kobj);

^ permalink raw reply

* [RFC PATCH v2 5/13] memory-hotplug : does not release memory region in PAGES_PER_SECTION chunks
From: Yasuaki Ishimatsu @ 2012-07-03  5:58 UTC (permalink / raw)
  To: linux-mm, linux-kernel, linuxppc-dev, linux-acpi
  Cc: len.brown, paulus, minchan.kim, kosaki.motohiro, rientjes, cl,
	akpm, liuj97
In-Reply-To: <4FF287C3.4030901@jp.fujitsu.com>

Since applying a patch(de7f0cba96786c), release_mem_region() has been changed
as called in PAGES_PER_SECTION chunks because register_memory_resource() is
called in PAGES_PER_SECTION chunks by add_memory(). But it seems firmware
dependency. If CRS are written in the PAGES_PER_SECTION chunks in ACPI DSDT
Table, register_memory_resource() is called in PAGES_PER_SECTION chunks.
But if CRS are written in the DIMM unit in ACPI DSDT Table,
register_memory_resource() is called in DIMM unit. So release_mem_region()
should not be called in PAGES_PER_SECTION chunks. The patch fixes it.

CC: David Rientjes <rientjes@google.com>
CC: Jiang Liu <liuj97@gmail.com>
CC: Len Brown <len.brown@intel.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Christoph Lameter <cl@linux.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   13 +++++++++----
 mm/memory_hotplug.c                             |    4 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

Index: linux-3.5-rc4/mm/memory_hotplug.c
===================================================================
--- linux-3.5-rc4.orig/mm/memory_hotplug.c	2012-07-03 14:22:03.549198802 +0900
+++ linux-3.5-rc4/mm/memory_hotplug.c	2012-07-03 14:22:05.919169458 +0900
@@ -358,11 +358,11 @@ int __remove_pages(struct zone *zone, un
 	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
 	BUG_ON(nr_pages % PAGES_PER_SECTION);

+	release_mem_region(phys_start_pfn << PAGE_SHIFT,  nr_pages * PAGE_SIZE);
+
 	sections_to_remove = nr_pages / PAGES_PER_SECTION;
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
-		release_mem_region(pfn << PAGE_SHIFT,
-				   PAGES_PER_SECTION << PAGE_SHIFT);
 		ret = __remove_section(zone, __pfn_to_section(pfn));
 		if (ret)
 			break;
Index: linux-3.5-rc4/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- linux-3.5-rc4.orig/arch/powerpc/platforms/pseries/hotplug-memory.c	2012-07-03 14:21:45.641422678
+0900
+++ linux-3.5-rc4/arch/powerpc/platforms/pseries/hotplug-memory.c	2012-07-03 14:22:05.920169437 +0900
@@ -77,7 +77,8 @@ static int pseries_remove_memblock(unsig
 {
 	unsigned long start, start_pfn;
 	struct zone *zone;
-	int ret;
+	int i, ret;
+	int sections_to_remove;

 	start_pfn = base >> PAGE_SHIFT;

@@ -97,9 +98,13 @@ static int pseries_remove_memblock(unsig
 	 * to sysfs "state" file and we can't remove sysfs entries
 	 * while writing to it. So we have to defer it to here.
 	 */
-	ret = __remove_pages(zone, start_pfn, memblock_size >> PAGE_SHIFT);
-	if (ret)
-		return ret;
+	sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION;
+	for (i = 0; i < sections_to_remove; i++) {
+		unsigned long pfn = start_pfn + i * PAGES_PER_SECTION;
+		ret = __remove_pages(zone, start_pfn,  PAGES_PER_SECTION);
+		if (ret)
+			return ret;
+	}

 	/*
 	 * Update memory regions for memory remove

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox