LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: Linux 3.0 boot failure on the Powerbook G4
From: Michael Büsch @ 2011-07-24 12:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <1311509614.25044.585.camel@pasglop>

On Sun, 24 Jul 2011 22:13:34 +1000
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> > I'm booting zImage.pmac.
> 
> Ah that might make it easier... I don't remember where it links, can you
> show me the program headers out of readelf -a of the zImage ?

As I recompiled stuff, here's the current failure log:
http://bues.ch/misc/linux-3.0-pbook-2.jpg

And this is the corresponding readelf output:

mb@maggie:~$ readelf -a /boot/linux.a
ELF Header:
  Magic:   7f 45 4c 46 01 02 01 00 00 00 00 00 00 00 00 00 
  Class:                             ELF32
  Data:                              2's complement, big endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              EXEC (Executable file)
  Machine:                           PowerPC
  Version:                           0x1
  Entry point address:               0x400230
  Start of program headers:          52 (bytes into file)
  Start of section headers:          5769716 (bytes into file)
  Flags:                             0x8000, relocatable-lib
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         2
  Size of section headers:           40 (bytes)
  Number of section headers:         12
  Section header string table index: 9

Section Headers:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .text             PROGBITS        00400000 010000 0048b0 00  AX  0   0  4
  [ 2] .data             PROGBITS        00405000 015000 0012f8 00  WA  0   0  4
  [ 3] .got              PROGBITS        004062f8 0162f8 00000c 04  WA  0   0  4
  [ 4] __builtin_cmdline PROGBITS        00406304 016304 000200 00  WA  0   0  4
  [ 5] .kernel:vmlinux.s PROGBITS        00407000 017000 569952 00   A  0   0  1
  [ 6] .bss              NOBITS          00971000 580952 00bc70 00  WA  0   0  4
  [ 7] .comment          PROGBITS        00000000 580952 00001c 01  MS  0   0  1
  [ 8] .gnu.attributes   LOOS+ffffff5    00000000 58096e 000014 00      0   0  1
  [ 9] .shstrtab         STRTAB          00000000 580982 000072 00      0   0  1
  [10] .symtab           SYMTAB          00000000 580bd4 000780 10     11  55  4
  [11] .strtab           STRTAB          00000000 581354 0004f3 00      0   0  1
Key to Flags:
  W (write), A (alloc), X (execute), M (merge), S (strings)
  I (info), L (link order), G (group), x (unknown)
  O (extra OS processing required) o (OS specific), p (processor specific)

There are no section groups in this file.

Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x010000 0x00400000 0x00400000 0x570952 0x57cc70 RWE 0x10000
  GNU_STACK      0x000000 0x00000000 0x00000000 0x00000 0x00000 RWE 0x4

 Section to Segment mapping:
  Segment Sections...
   00     .text .data .got __builtin_cmdline .kernel:vmlinux.strip .bss 
   01     

There is no dynamic section in this file.

There are no relocations in this file.

There are no unwind sections in this file.

Symbol table '.symtab' contains 120 entries:
   Num:    Value  Size Type    Bind   Vis      Ndx Name
     0: 00000000     0 NOTYPE  LOCAL  DEFAULT  UND 
     1: 00400000     0 SECTION LOCAL  DEFAULT    1 
     2: 00405000     0 SECTION LOCAL  DEFAULT    2 
     3: 004062f8     0 SECTION LOCAL  DEFAULT    3 
     4: 00406304     0 SECTION LOCAL  DEFAULT    4 
     5: 00407000     0 SECTION LOCAL  DEFAULT    5 
     6: 00971000     0 SECTION LOCAL  DEFAULT    6 
     7: 00000000     0 SECTION LOCAL  DEFAULT    7 
     8: 00000000     0 SECTION LOCAL  DEFAULT    8 
     9: 00000000     0 FILE    LOCAL  DEFAULT  ABS of.c
    10: 00400000    96 FUNC    LOCAL  DEFAULT    1 of_image_hdr
    11: 00400130   220 FUNC    LOCAL  DEFAULT    1 of_try_claim
    12: 00971000     4 OBJECT  LOCAL  DEFAULT    6 claim_base
    13: 00000000     0 FILE    LOCAL  DEFAULT  ABS empty.c
    14: 0040021c     0 NOTYPE  LOCAL  DEFAULT    1 p_start
    15: 00400220     0 NOTYPE  LOCAL  DEFAULT    1 p_etext
    16: 00400224     0 NOTYPE  LOCAL  DEFAULT    1 p_bss_start
    17: 00400228     0 NOTYPE  LOCAL  DEFAULT    1 p_end
    18: 0040022c     0 NOTYPE  LOCAL  DEFAULT    1 p_pstack
    19: 00400234     0 NOTYPE  LOCAL  DEFAULT    1 p_base
    20: 00000007     0 NOTYPE  LOCAL  DEFAULT  ABS RELA
    21: 6ffffff9     0 NOTYPE  LOCAL  DEFAULT  ABS RELACOUNT
    22: 00000000     0 FILE    LOCAL  DEFAULT  ABS main.c
    23: 0040032c   536 FUNC    LOCAL  DEFAULT    1 prep_kernel
    24: 00971004 46960 OBJECT  LOCAL  DEFAULT    6 gzstate
    25: 00406304   512 OBJECT  LOCAL  DEFAULT    4 cmdline
    26: 00000000     0 FILE    LOCAL  DEFAULT  ABS gunzip_util.c
    27: 0097c774   128 OBJECT  LOCAL  DEFAULT    6 discard_buf.1439
    28: 00000000     0 FILE    LOCAL  DEFAULT  ABS elf_util.c
    29: 00000000     0 FILE    LOCAL  DEFAULT  ABS inflate.c
    30: 00400ed4   424 FUNC    LOCAL  DEFAULT    1 zlib_adler32
    31: 004011c4   292 FUNC    LOCAL  DEFAULT    1 zlib_updatewindow
    32: 00405484  2048 OBJECT  LOCAL  DEFAULT    2 lenfix.1147
    33: 00405c84   128 OBJECT  LOCAL  DEFAULT    2 distfix.1148
    34: 00405d04    38 OBJECT  LOCAL  DEFAULT    2 order.1216
    35: 00000000     0 FILE    LOCAL  DEFAULT  ABS inftrees.c
    36: 00405e8e    62 OBJECT  LOCAL  DEFAULT    2 lext.1062
    37: 00405ecc    62 OBJECT  LOCAL  DEFAULT    2 lbase.1061
    38: 00405f0a    64 OBJECT  LOCAL  DEFAULT    2 dext.1064
    39: 00405f4a    64 OBJECT  LOCAL  DEFAULT    2 dbase.1063
    40: 00000000     0 FILE    LOCAL  DEFAULT  ABS oflib.c
    41: 00402a4c   432 FUNC    LOCAL  DEFAULT    1 of_call_prom_ret
    42: 0040611c     4 OBJECT  LOCAL  DEFAULT    2 need_map
    43: 0097c7f4     4 OBJECT  LOCAL  DEFAULT    6 prom
    44: 0097c7f8     4 OBJECT  LOCAL  DEFAULT    6 chosen_mmu
    45: 0097c7fc     4 OBJECT  LOCAL  DEFAULT    6 memory
    46: 00000000     0 FILE    LOCAL  DEFAULT  ABS ofconsole.c
    47: 004032b0   104 FUNC    LOCAL  DEFAULT    1 of_console_open
    48: 0040325c    84 FUNC    LOCAL  DEFAULT    1 of_console_write
    49: 0097c800     4 OBJECT  LOCAL  DEFAULT    6 of_stdout_handle
    50: 00000000     0 FILE    LOCAL  DEFAULT  ABS stdio.c
    51: 0040369c   848 FUNC    LOCAL  DEFAULT    1 number
    52: 0097c804  1024 OBJECT  LOCAL  DEFAULT    6 sprint_buf
    53: 00000000     0 FILE    LOCAL  DEFAULT  ABS inffast.c
    54: 004062f8     0 OBJECT  LOCAL  HIDDEN    3 _GLOBAL_OFFSET_TABLE_
    55: 00400060   208 FUNC    GLOBAL DEFAULT    1 platform_init
    56: 00403318     0 NOTYPE  GLOBAL DEFAULT    1 strcpy
    57: 00000000     0 NOTYPE  WEAK   DEFAULT  UND _platform_stack_top
    58: 00400924   240 FUNC    GLOBAL DEFAULT    1 gunzip_partial
    59: 0040413c   188 FUNC    GLOBAL DEFAULT    1 printf
    60: 004039ec  1872 FUNC    GLOBAL DEFAULT    1 vsprintf
    61: 0040426c     0 NOTYPE  GLOBAL DEFAULT    1 __div64_32
    62: 00403468     0 NOTYPE  GLOBAL DEFAULT    1 memmove
    63: 00402a10    60 FUNC    GLOBAL DEFAULT    1 of_init
    64: 00406508     0 NOTYPE  GLOBAL DEFAULT    4 _dtb_start
    65: 0040020c     0 NOTYPE  GLOBAL DEFAULT    1 _zimage_start_opd
    66: 004048b0     0 NOTYPE  GLOBAL DEFAULT    1 _etext
    67: 00402e04    72 FUNC    GLOBAL DEFAULT    1 of_finddevice
    68: 00401088   132 FUNC    GLOBAL DEFAULT    1 zlib_inflateReset
    69: 00403470     0 NOTYPE  GLOBAL DEFAULT    1 memcpy
    70: 00403624     0 NOTYPE  GLOBAL DEFAULT    1 flush_cache
    71: 0040430c  1444 FUNC    GLOBAL DEFAULT    1 inflate_fast
    72: 00407000     0 NOTYPE  GLOBAL DEFAULT    5 _vmlinux_start
    73: 0040110c   152 FUNC    GLOBAL DEFAULT    1 zlib_inflateInit2
    74: 00402dac    88 FUNC    GLOBAL DEFAULT    1 of_getprop
    75: 00400b80   484 FUNC    GLOBAL DEFAULT    1 gunzip_start
    76: 0097cc04    20 OBJECT  GLOBAL DEFAULT    6 loader_info
    77: 0097cc18    28 OBJECT  GLOBAL DEFAULT    6 platform_ops
    78: 00403140   212 FUNC    GLOBAL DEFAULT    1 of_vmlinux_alloc
    79: 00400a7c   120 FUNC    GLOBAL DEFAULT    1 gunzip_exactly
    80: 004012e8   240 FUNC    GLOBAL DEFAULT    1 zlib_inflateIncomp
    81: 00400d64   200 FUNC    GLOBAL DEFAULT    1 parse_elf64
    82: 0097cc34    20 OBJECT  GLOBAL DEFAULT    6 console_ops
    83: 00403650    76 FUNC    GLOBAL DEFAULT    1 strnlen
    84: 00400a14   104 FUNC    GLOBAL DEFAULT    1 gunzip_finish
    85: 00402e90   688 FUNC    GLOBAL DEFAULT    1 of_claim
    86: 00402480  1424 FUNC    GLOBAL DEFAULT    1 zlib_inflate_table
    87: 00400af4   140 FUNC    GLOBAL DEFAULT    1 gunzip_discard
    88: 004013d8  4264 FUNC    GLOBAL DEFAULT    1 zlib_inflate
    89: 00400e2c   168 FUNC    GLOBAL DEFAULT    1 parse_elf32
    90: 0040335c     0 NOTYPE  GLOBAL DEFAULT    1 strcat
    91: 00402e4c    68 FUNC    GLOBAL DEFAULT    1 of_exit
    92: 004035cc     0 NOTYPE  GLOBAL DEFAULT    1 memchr
    93: 00400000     0 NOTYPE  GLOBAL DEFAULT    1 _start
    94: 004033cc     0 NOTYPE  GLOBAL DEFAULT    1 strncmp
    95: 00403214    72 FUNC    GLOBAL DEFAULT    1 of_console_init
    96: 0040107c    12 FUNC    GLOBAL DEFAULT    1 zlib_inflate_workspacesiz
    97: 00403334     0 NOTYPE  GLOBAL DEFAULT    1 strncpy
    98: 004035f4     0 NOTYPE  GLOBAL DEFAULT    1 memcmp
    99: 00971000     0 NOTYPE  GLOBAL DEFAULT    5 _initrd_start
   100: 00400230     0 NOTYPE  WEAK   DEFAULT    1 _zimage_start
   101: 00403528     0 NOTYPE  GLOBAL DEFAULT    1 backwards_memcpy
   102: 00971000     0 NOTYPE  GLOBAL DEFAULT    6 __bss_start
   103: 0040340c     0 NOTYPE  GLOBAL DEFAULT    1 memset
   104: 00406508     0 NOTYPE  GLOBAL DEFAULT    4 _dtb_end
   105: 00971000     0 NOTYPE  GLOBAL DEFAULT    5 _initrd_end
   106: 0097cc48    40 OBJECT  GLOBAL DEFAULT    6 dt_ops
   107: 004033a8     0 NOTYPE  GLOBAL DEFAULT    1 strcmp
   108: 004041f8   116 FUNC    GLOBAL DEFAULT    1 sprintf
   109: 00971000     0 NOTYPE  GLOBAL DEFAULT    6 _edata
   110: 0097cc70     0 NOTYPE  GLOBAL DEFAULT    6 _end
   111: 00400544   992 FUNC    GLOBAL DEFAULT    1 start
   112: 00970952     0 NOTYPE  GLOBAL DEFAULT    5 _vmlinux_end
   113: 004033f4     0 NOTYPE  GLOBAL DEFAULT    1 strlen
   114: 00403388     0 NOTYPE  GLOBAL DEFAULT    1 strchr
   115: 00400230     0 NOTYPE  GLOBAL DEFAULT    1 _zimage_start_lib
   116: 00406504     0 NOTYPE  GLOBAL DEFAULT    4 __dynamic_start
   117: 004011a4    32 FUNC    GLOBAL DEFAULT    1 zlib_inflateEnd
   118: 00402d54    88 FUNC    GLOBAL DEFAULT    1 of_setprop
   119: 00402bfc   344 FUNC    GLOBAL DEFAULT    1 of_call_prom

No version information found in this file.
Attribute Section: gnu
File Attributes
  Tag_GNU_Power_ABI_FP: Soft float
  Tag_GNU_Power_ABI_Vector: Generic
  Tag_GNU_Power_ABI_Struct_Return: Memory


-- 
Greetings, Michael.

^ permalink raw reply

* Re: Linux 3.0 boot failure on the Powerbook G4
From: Benjamin Herrenschmidt @ 2011-07-24 12:13 UTC (permalink / raw)
  To: Michael Büsch; +Cc: linuxppc-dev
In-Reply-To: <20110724141051.771d6492@maggie>

On Sun, 2011-07-24 at 14:10 +0200, Michael Büsch wrote:
> On Sun, 24 Jul 2011 22:07:30 +1000
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> 
> > On Sat, 2011-07-23 at 22:20 +0200, Michael Büsch wrote:
> > > Linux 3.0 fails to boot _very_ early on my Powerbook G4. See the
> > > yaboot/OF screenshot:
> > > http://bues.ch/misc/linux-3.0-pbook.jpg
> > > 
> > > Linux 2.6.39.2 boots fine.
> > > Does somebody have an idea?
> > 
> > Interesting, that's before it even kills OF. Are you booting a zImage or
> > a vmlinux ?
> 
> I'm booting zImage.pmac.

Ah that might make it easier... I don't remember where it links, can you
show me the program headers out of readelf -a of the zImage ?

> > It might be also useful to compile yaboot with debug output enabled to
> > figure out where the kernel is loaded so we can try calculating where
> > exactly it dies if it's a vmlinux...
> 
> Hm, I guess I could probably try that.

Cheers,
Ben.

^ permalink raw reply

* Re: Linux 3.0 boot failure on the Powerbook G4
From: Michael Büsch @ 2011-07-24 12:10 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <1311509250.25044.583.camel@pasglop>

On Sun, 24 Jul 2011 22:07:30 +1000
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> On Sat, 2011-07-23 at 22:20 +0200, Michael B=C3=BCsch wrote:
> > Linux 3.0 fails to boot _very_ early on my Powerbook G4. See the
> > yaboot/OF screenshot:
> > http://bues.ch/misc/linux-3.0-pbook.jpg
> >=20
> > Linux 2.6.39.2 boots fine.
> > Does somebody have an idea?
>=20
> Interesting, that's before it even kills OF. Are you booting a zImage or
> a vmlinux ?

I'm booting zImage.pmac.

> It might be also useful to compile yaboot with debug output enabled to
> figure out where the kernel is loaded so we can try calculating where
> exactly it dies if it's a vmlinux...

Hm, I guess I could probably try that.

--=20
Greetings, Michael.

^ permalink raw reply

* Re: Linux 3.0 boot failure on the Powerbook G4
From: Benjamin Herrenschmidt @ 2011-07-24 12:07 UTC (permalink / raw)
  To: Michael Büsch; +Cc: linuxppc-dev
In-Reply-To: <20110723222034.6757604b@maggie>

On Sat, 2011-07-23 at 22:20 +0200, Michael Büsch wrote:
> Linux 3.0 fails to boot _very_ early on my Powerbook G4. See the
> yaboot/OF screenshot:
> http://bues.ch/misc/linux-3.0-pbook.jpg
> 
> Linux 2.6.39.2 boots fine.
> Does somebody have an idea?

Interesting, that's before it even kills OF. Are you booting a zImage or
a vmlinux ?

It might be also useful to compile yaboot with debug output enabled to
figure out where the kernel is loaded so we can try calculating where
exactly it dies if it's a vmlinux...

Cheers,
Ben.

> The config can be found here:
> http://bues.ch/misc/linux-3.0-ppc-config
> It's mostly equivalent to the working 2.6.39 config. I enabled
> early OF console, but that didn't help to show better error messages.
> 
> The machine is a:
> 
> cpu             : 7447A, altivec supported
> clock           : 1499.999000MHz
> revision        : 1.2 (pvr 8003 0102)
> platform        : PowerMac
> motherboard     : PowerBook5,6 MacRISC3 Power Macintosh 
> detected as     : 287 (PowerBook G4 15")
> pmac-generation : NewWorld
> 

^ permalink raw reply

* Re: Linux 3.0 boot failure on the Powerbook G4
From: Michael Büsch @ 2011-07-24 10:37 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: linuxppc-dev
In-Reply-To: <m2tyacdrx0.fsf@igel.home>

On Sun, 24 Jul 2011 10:06:03 +0200
Andreas Schwab <schwab@linux-m68k.org> wrote:

> Michael B=C3=BCsch <m@bues.ch> writes:
>=20
> > Linux 3.0 fails to boot _very_ early on my Powerbook G4. See the
> > yaboot/OF screenshot:
> > http://bues.ch/misc/linux-3.0-pbook.jpg
> >
> > Linux 2.6.39.2 boots fine.
> > Does somebody have an idea?
>=20
> Perhaps your image is getting too large?

I reduced the image size, so that it's way less than the
2.6.39 kernel size (old is 2.6.39. a is 3.0)
-rwxr-xr-x 1 root root 5.6M Jul 24 12:16 /boot/linux.a
-rwxr-xr-x 1 root root 6.3M Jul 23 20:50 /boot/linux.old

But that didn't help. :(

I'm currently trying to bisect it, but that turns out to be hard
due to various compile issues and stuff like that...

--=20
Greetings, Michael.

^ permalink raw reply

* Re: Linux 3.0 boot failure on the Powerbook G4
From: Andreas Schwab @ 2011-07-24  8:06 UTC (permalink / raw)
  To: Michael Büsch; +Cc: linuxppc-dev
In-Reply-To: <20110723222034.6757604b__32574.6897528463$1311473425$gmane$org@maggie>

Michael Büsch <m@bues.ch> writes:

> Linux 3.0 fails to boot _very_ early on my Powerbook G4. See the
> yaboot/OF screenshot:
> http://bues.ch/misc/linux-3.0-pbook.jpg
>
> Linux 2.6.39.2 boots fine.
> Does somebody have an idea?

Perhaps your image is getting too large?

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply

* Linux 3.0 boot failure on the Powerbook G4
From: Michael Büsch @ 2011-07-23 20:20 UTC (permalink / raw)
  To: linuxppc-dev

Linux 3.0 fails to boot _very_ early on my Powerbook G4. See the
yaboot/OF screenshot:
http://bues.ch/misc/linux-3.0-pbook.jpg

Linux 2.6.39.2 boots fine.
Does somebody have an idea?

The config can be found here:
http://bues.ch/misc/linux-3.0-ppc-config
It's mostly equivalent to the working 2.6.39 config. I enabled
early OF console, but that didn't help to show better error messages.

The machine is a:

cpu             : 7447A, altivec supported
clock           : 1499.999000MHz
revision        : 1.2 (pvr 8003 0102)
platform        : PowerMac
motherboard     : PowerBook5,6 MacRISC3 Power Macintosh 
detected as     : 287 (PowerBook G4 15")
pmac-generation : NewWorld

-- 
Greetings, Michael.

^ permalink raw reply

* [PATCH tip:x86/memblock RFC] powerpc: Use HAVE_MEMBLOCK_NODE_MAP
From: Tejun Heo @ 2011-07-23 13:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, H. Peter Anvin, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel

powerpc doesn't access early_node_map[] directly and enabling
HAVE_MEMBLOCK_NODE_MAP is trivial - replacing add_active_range() calls
with memblock_set_node() and selecting HAVE_MEMBLOCK_NODE_MAP is
enough.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
benh, the proposed memblock changes are now in tip:x86/memblock, which
is probably scheduled for the next merge window.  This patch converts
powerpc to drop early_node_map[] and put NUMA information in memblock.
I'm planning on converting all early_node_map[] users and dropping the
code.  I don't have access to ppc NUMA machine so the patch is tested
only on a non-NUMA SMP machine.  It builds fine for all valid
combinations of of 32/64 [!]SMP [!]NUMA.

If this change is okay, how should this patch be routed?  ppc tree
pulls from x86/memblock and puts this patch on top of it?

Thanks.

 arch/powerpc/Kconfig   |    1 +
 arch/powerpc/mm/mem.c  |    2 +-
 arch/powerpc/mm/numa.c |   10 ++++------
 3 files changed, 6 insertions(+), 7 deletions(-)

Index: work/arch/powerpc/Kconfig
===================================================================
--- work.orig/arch/powerpc/Kconfig
+++ work/arch/powerpc/Kconfig
@@ -117,6 +117,7 @@ config PPC
 	select HAVE_KRETPROBES
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 	select USE_GENERIC_SMP_HELPERS if SMP
Index: work/arch/powerpc/mm/mem.c
===================================================================
--- work.orig/arch/powerpc/mm/mem.c
+++ work/arch/powerpc/mm/mem.c
@@ -197,7 +197,7 @@ void __init do_init_bootmem(void)
 		unsigned long start_pfn, end_pfn;
 		start_pfn = memblock_region_memory_base_pfn(reg);
 		end_pfn = memblock_region_memory_end_pfn(reg);
-		add_active_range(0, start_pfn, end_pfn);
+		memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
 	}
 
 	/* Add all physical memory to the bootmem map, mark each area
Index: work/arch/powerpc/mm/numa.c
===================================================================
--- work.orig/arch/powerpc/mm/numa.c
+++ work/arch/powerpc/mm/numa.c
@@ -680,9 +680,7 @@ static void __init parse_drconf_memory(s
 			node_set_online(nid);
 			sz = numa_enforce_memory_limit(base, size);
 			if (sz)
-				add_active_range(nid, base >> PAGE_SHIFT,
-						 (base >> PAGE_SHIFT)
-						 + (sz >> PAGE_SHIFT));
+				memblock_set_node(base, sz, nid);
 		} while (--ranges);
 	}
 }
@@ -772,8 +770,7 @@ new_range:
 				continue;
 		}
 
-		add_active_range(nid, start >> PAGE_SHIFT,
-				(start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
+		memblock_set_node(start, size, nid);
 
 		if (--ranges)
 			goto new_range;
@@ -808,7 +805,8 @@ static void __init setup_nonnuma(void)
 		end_pfn = memblock_region_memory_end_pfn(reg);
 
 		fake_numa_create_new_node(end_pfn, &nid);
-		add_active_range(nid, start_pfn, end_pfn);
+		memblock_set_node(PFN_PHYS(start_pfn),
+				  PFN_PHYS(end_pfn - start_pfn), nid);
 		node_set_online(nid);
 	}
 }

^ permalink raw reply

* [PATCH v3 6/6] fault-injection: add notifier error injection testing scripts
From: Akinobu Mita @ 2011-07-23  8:51 UTC (permalink / raw)
  To: linux-kernel, akpm
  Cc: Greg KH, Akinobu Mita, Rafael J. Wysocki, linux-mm,
	Paul Mackerras, Pavel Machek, Américo Wang, linux-pm,
	linuxppc-dev
In-Reply-To: <1311411060-30124-1-git-send-email-akinobu.mita@gmail.com>

* tools/testing/fault-injection/cpu-notifier.sh is testing script for
CPU notifier error handling by using cpu-notifier-error-inject.ko.

1. Offline all hot-pluggable CPUs in preparation for testing
2. Test CPU hot-add error handling by injecting notifier errors
3. Online all hot-pluggable CPUs in preparation for testing
4. Test CPU hot-remove error handling by injecting notifier errors

* tools/testing/fault-injection/memory-notifier.sh is doing the same thing
for memory hotplug notifier.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: linux-pm@lists.linux-foundation.org
Cc: Greg KH <greg@kroah.com>
Cc: linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Américo Wang <xiyou.wangcong@gmail.com>
---
* v3
- new patch

 tools/testing/fault-injection/cpu-notifier.sh    |  162 +++++++++++++++++++++
 tools/testing/fault-injection/memory-notifier.sh |  163 ++++++++++++++++++++++
 2 files changed, 325 insertions(+), 0 deletions(-)
 create mode 100755 tools/testing/fault-injection/cpu-notifier.sh
 create mode 100755 tools/testing/fault-injection/memory-notifier.sh

diff --git a/tools/testing/fault-injection/cpu-notifier.sh b/tools/testing/fault-injection/cpu-notifier.sh
new file mode 100755
index 0000000..be02a85
--- /dev/null
+++ b/tools/testing/fault-injection/cpu-notifier.sh
@@ -0,0 +1,162 @@
+#!/bin/bash
+
+#
+# list all hot-pluggable CPUs
+#
+hotpluggable_cpus()
+{
+	local state=${1:-.\*}
+
+	for cpu in /sys/devices/system/cpu/cpu*; do
+		if [ -f $cpu/online ] && grep -q $state $cpu/online; then
+			echo ${cpu##/*/cpu}
+		fi
+	done
+}
+
+hotplaggable_offline_cpus()
+{
+	hotpluggable_cpus 0
+}
+
+hotpluggable_online_cpus()
+{
+	hotpluggable_cpus 1
+}
+
+cpu_is_online()
+{
+	grep -q 1 /sys/devices/system/cpu/cpu$1/online
+}
+
+cpu_is_offline()
+{
+	grep -q 0 /sys/devices/system/cpu/cpu$1/online
+}
+
+add_cpu()
+{
+	echo 1 > /sys/devices/system/cpu/cpu$1/online
+}
+
+remove_cpu()
+{
+	echo 0 > /sys/devices/system/cpu/cpu$1/online
+}
+
+add_cpu_expect_success()
+{
+	local cpu=$1
+
+	if ! add_cpu $cpu; then
+		echo $FUNCNAME $cpu: unexpected fail >&2
+	elif ! cpu_is_online $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+	fi
+}
+
+add_cpu_expect_fail()
+{
+	local cpu=$1
+
+	if add_cpu $cpu 2> /dev/null; then
+		echo $FUNCNAME $cpu: unexpected success >&2
+	elif ! cpu_is_offline $cpu; then
+		echo $FUNCNAME $cpu: unexpected online >&2
+	fi
+}
+
+remove_cpu_expect_success()
+{
+	local cpu=$1
+
+	if ! remove_cpu $cpu; then
+		echo $FUNCNAME $cpu: unexpected fail >&2
+	elif ! cpu_is_offline $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+	fi
+}
+
+remove_cpu_expect_fail()
+{
+	local cpu=$1
+
+	if remove_cpu $cpu 2> /dev/null; then
+		echo $FUNCNAME $cpu: unexpected success >&2
+	elif ! cpu_is_online $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+	fi
+}
+
+if [ $UID != 0 ]; then
+	echo must be run as root >&2
+	exit 1
+fi
+
+error=-12
+priority=0
+
+while getopts e:p: opt; do
+	case $opt in
+	e)
+		error=$OPTARG
+		;;
+	p)
+		priority=$OPTARG
+		;;
+	esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+	echo "error code must be -4095 <= errno < 0" >&2
+	exit 1
+fi
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+
+if [ ! -d "$DEBUGFS" ]; then
+	echo debugfs is not mounted >&2
+	exit 1
+fi
+
+/sbin/modprobe -r cpu-notifier-error-inject
+/sbin/modprobe -q cpu-notifier-error-inject priority=$priority
+
+if [ ! -d $DEBUGFS/cpu-notifier-error-inject ]; then
+	echo cpu-notifier-error-inject module is not available >&2
+	exit 1
+fi
+
+#
+# Offline all hot-pluggable CPUs
+#
+echo 0 > $DEBUGFS/cpu-notifier-error-inject/CPU_DOWN_PREPARE
+for cpu in `hotpluggable_online_cpus`; do
+	remove_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-add error handling (offline => online)
+#
+echo $error > $DEBUGFS/cpu-notifier-error-inject/CPU_UP_PREPARE
+for cpu in `hotplaggable_offline_cpus`; do
+	add_cpu_expect_fail $cpu
+done
+
+#
+# Online all hot-pluggable CPUs
+#
+echo 0 > $DEBUGFS/cpu-notifier-error-inject/CPU_UP_PREPARE
+for cpu in `hotplaggable_offline_cpus`; do
+	add_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-remove error handling (online => offline)
+#
+echo $error > $DEBUGFS/cpu-notifier-error-inject/CPU_DOWN_PREPARE
+for cpu in `hotpluggable_online_cpus`; do
+	remove_cpu_expect_fail $cpu
+done
+
+/sbin/modprobe -r cpu-notifier-error-inject
diff --git a/tools/testing/fault-injection/memory-notifier.sh b/tools/testing/fault-injection/memory-notifier.sh
new file mode 100755
index 0000000..b7e7fa5
--- /dev/null
+++ b/tools/testing/fault-injection/memory-notifier.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+#
+# list all hot-pluggable memory
+#
+hotpluggable_memory()
+{
+	local state=${1:-.\*}
+
+	for memory in /sys/devices/system/memory/memory*; do
+		if grep -q 1 $memory/removable &&
+		   grep -q $state $memory/state; then
+			echo ${memory##/*/memory}
+		fi
+	done
+}
+
+hotplaggable_offline_memory()
+{
+	hotpluggable_memory offline
+}
+
+hotpluggable_online_memory()
+{
+	hotpluggable_memory online
+}
+
+memory_is_online()
+{
+	grep -q online /sys/devices/system/memory/memory$1/state
+}
+
+memory_is_offline()
+{
+	grep -q offline /sys/devices/system/memory/memory$1/state
+}
+
+add_memory()
+{
+	echo online > /sys/devices/system/memory/memory$1/state
+}
+
+remove_memory()
+{
+	echo offline > /sys/devices/system/memory/memory$1/state
+}
+
+add_memory_expect_success()
+{
+	local memory=$1
+
+	if ! add_memory $memory; then
+		echo $FUNCNAME $memory: unexpected fail >&2
+	elif ! memory_is_online $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+	fi
+}
+
+add_memory_expect_fail()
+{
+	local memory=$1
+
+	if add_memory $memory 2> /dev/null; then
+		echo $FUNCNAME $memory: unexpected success >&2
+	elif ! memory_is_offline $memory; then
+		echo $FUNCNAME $memory: unexpected online >&2
+	fi
+}
+
+remove_memory_expect_success()
+{
+	local memory=$1
+
+	if ! remove_memory $memory; then
+		echo $FUNCNAME $memory: unexpected fail >&2
+	elif ! memory_is_offline $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+	fi
+}
+
+remove_memory_expect_fail()
+{
+	local memory=$1
+
+	if remove_memory $memory 2> /dev/null; then
+		echo $FUNCNAME $memory: unexpected success >&2
+	elif ! memory_is_online $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+	fi
+}
+
+if [ $UID != 0 ]; then
+	echo must be run as root >&2
+	exit 1
+fi
+
+error=-12
+priority=0
+
+while getopts e:p: opt; do
+	case $opt in
+	e)
+		error=$OPTARG
+		;;
+	p)
+		priority=$OPTARG
+		;;
+	esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+	echo "error code must be -4095 <= errno < 0" >&2
+	exit 1
+fi
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+
+if [ ! -d "$DEBUGFS" ]; then
+	echo debugfs is not mounted >&2
+	exit 1
+fi
+
+/sbin/modprobe -r memory-notifier-error-inject
+/sbin/modprobe -q memory-notifier-error-inject priority=$priority
+
+if [ ! -d $DEBUGFS/memory-notifier-error-inject ]; then
+	echo memory-notifier-error-inject module is not available >&2
+	exit 1
+fi
+
+#
+# Offline all hot-pluggable memory
+#
+echo 0 > $DEBUGFS/memory-notifier-error-inject/MEM_GOING_OFFLINE
+for memory in `hotpluggable_online_memory`; do
+	remove_memory_expect_success $memory
+done
+
+#
+# Test memory hot-add error handling (offline => online)
+#
+echo $error > $DEBUGFS/memory-notifier-error-inject/MEM_GOING_ONLINE
+for memory in `hotplaggable_offline_memory`; do
+	add_memory_expect_fail $memory
+done
+
+#
+# Online all hot-pluggable memory
+#
+echo 0 > $DEBUGFS/memory-notifier-error-inject/MEM_GOING_ONLINE
+for memory in `hotplaggable_offline_memory`; do
+	add_memory_expect_success $memory
+done
+
+#
+# Test memory hot-remove error handling (online => offline)
+#
+echo $error > $DEBUGFS/memory-notifier-error-inject/MEM_GOING_OFFLINE
+for memory in `hotpluggable_online_memory`; do
+	remove_memory_expect_fail $memory
+done
+
+/sbin/modprobe -r memory-notifier-error-inject
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH v3 5/6] powerpc: pSeries reconfig notifier error injection module
From: Akinobu Mita @ 2011-07-23  8:50 UTC (permalink / raw)
  To: linux-kernel, akpm; +Cc: linuxppc-dev, Paul Mackerras, Akinobu Mita
In-Reply-To: <1311411060-30124-1-git-send-email-akinobu.mita@gmail.com>

This provides the ability to inject artifical errors to pSeries reconfig
notifier chain callbacks.  It is controlled through debugfs interface
under /sys/kernel/debug/pSeries-reconfig-notifier-error-inject/

Each of the files in the directory represents an event which can be failed
and contains the error code.  If the notifier call chain should be failed
with some events notified, write the error code to the files.

This module needs pSeries_reconfig_notifier_{,un}register symbols to be
exported.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
* v3
- rewrite to be kernel modules instead of initializing at late_initcall()s
- export pSeries_reconfig_notifier_{,un}register symbols for this module
- notifier priority can be specified as a module parameter

 arch/powerpc/platforms/pseries/reconfig.c    |    2 +
 lib/Kconfig.debug                            |   14 +++++++
 lib/Makefile                                 |    2 +
 lib/pSeries-reconfig-notifier-error-inject.c |   48 ++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 0 deletions(-)
 create mode 100644 lib/pSeries-reconfig-notifier-error-inject.c

diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 168651a..b9808e9 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -103,11 +103,13 @@ int pSeries_reconfig_notifier_register(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
 }
+EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register);
 
 void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
 {
 	blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
 }
+EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister);
 
 int pSeries_reconfig_notify(unsigned long action, void *p)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a2b0856..46298d7 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1099,6 +1099,20 @@ config MEMORY_NOTIFIER_ERROR_INJECT
 
 	  If unsure, say N.
 
+config PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT
+	tristate "pSeries reconfig notifier error injection module"
+	depends on PPC_PSERIES && NOTIFIER_ERROR_INJECTION
+	help
+	  This option provides the ability to inject artifical errors to
+	  pSeries reconfig notifier chain callbacks.  It is controlled
+	  through debugfs interface under
+	  /sys/kernel/debug/pSeries-reconfig-notifier-error-inject/
+
+	  To compile this code as a module, choose M here: the module will
+	  be called memory-notifier-error-inject.
+
+	  If unsure, say N.
+
 config FAULT_INJECTION
 	bool "Fault-injection framework"
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index f28914b..e68cea7 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -92,6 +92,8 @@ obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
+obj-$(CONFIG_PSERIES_RECONFIG_NOTIFIER_ERROR_INJECT) += \
+	pSeries-reconfig-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/pSeries-reconfig-notifier-error-inject.c b/lib/pSeries-reconfig-notifier-error-inject.c
new file mode 100644
index 0000000..f4ed2b3
--- /dev/null
+++ b/lib/pSeries-reconfig-notifier-error-inject.c
@@ -0,0 +1,48 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+
+#include <asm/pSeries_reconfig.h>
+
+static int priority;
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify pSeries reconfig notifier priority");
+
+static struct err_inject_notifier_block err_inject_reconfig_nb = {
+	.actions = {
+		{ ERR_INJECT_NOTIFIER_ACTION(PSERIES_RECONFIG_ADD) },
+		{ ERR_INJECT_NOTIFIER_ACTION(PSERIES_RECONFIG_REMOVE) },
+		{ ERR_INJECT_NOTIFIER_ACTION(PSERIES_DRCONF_MEM_ADD) },
+		{ ERR_INJECT_NOTIFIER_ACTION(PSERIES_DRCONF_MEM_REMOVE) },
+		{}
+	}
+};
+
+static int err_inject_init(void)
+{
+	int err;
+
+	err = err_inject_notifier_block_init(&err_inject_reconfig_nb,
+			"pSeries-reconfig-notifier-error-inject", priority);
+	if (err)
+		return err;
+
+	err = pSeries_reconfig_notifier_register(&err_inject_reconfig_nb.nb);
+	if (err)
+		err_inject_notifier_block_cleanup(&err_inject_reconfig_nb);
+
+	return err;
+}
+
+static void err_inject_exit(void)
+{
+	pSeries_reconfig_notifier_unregister(&err_inject_reconfig_nb.nb);
+	err_inject_notifier_block_cleanup(&err_inject_reconfig_nb);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_DESCRIPTION("pSeries reconfig notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH v3 1/6] fault-injection: notifier error injection
From: Akinobu Mita @ 2011-07-23  8:50 UTC (permalink / raw)
  To: linux-kernel, akpm
  Cc: Greg KH, Akinobu Mita, Rafael J. Wysocki, linux-mm,
	Paul Mackerras, Pavel Machek, linux-pm, linuxppc-dev
In-Reply-To: <1311411060-30124-1-git-send-email-akinobu.mita@gmail.com>

The notifier error injection provides the ability to inject artifical
errors to specified notifier chain callbacks.  It is useful to test the
error handling of notifier call chain failures.

This adds common basic functions to define which type of events can be
fail and to initialize the debugfs interface to control what error code
should be returned and which event should be failed.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: linux-pm@lists.linux-foundation.org
Cc: Greg KH <greg@kroah.com>
Cc: linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
---
* v3
- export err_inject_notifier_block_{init,cleanup} for modules

 include/linux/notifier.h |   25 ++++++++++++++
 kernel/notifier.c        |   83 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug        |   11 ++++++
 3 files changed, 119 insertions(+), 0 deletions(-)

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index c0688b0..51882d6 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -278,5 +278,30 @@ extern struct blocking_notifier_head reboot_notifier_list;
 #define VT_UPDATE		0x0004 /* A bigger update occurred */
 #define VT_PREWRITE		0x0005 /* A char is about to be written to the console */
 
+#ifdef CONFIG_NOTIFIER_ERROR_INJECTION
+
+struct err_inject_notifier_action {
+	unsigned long val;
+	int error;
+	const char *name;
+};
+
+#define ERR_INJECT_NOTIFIER_ACTION(action)	\
+	.name = #action, .val = (action),
+
+struct err_inject_notifier_block {
+	struct notifier_block nb;
+	struct dentry *dir;
+	struct err_inject_notifier_action actions[];
+	/* The last slot must be terminated with zero sentinel */
+};
+
+extern int err_inject_notifier_block_init(struct err_inject_notifier_block *enb,
+				const char *name, int priority);
+extern void err_inject_notifier_block_cleanup(
+				struct err_inject_notifier_block *enb);
+
+#endif /* CONFIG_NOTIFIER_ERROR_INJECTION */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 2488ba7..8dcb2cc 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -5,6 +5,7 @@
 #include <linux/rcupdate.h>
 #include <linux/vmalloc.h>
 #include <linux/reboot.h>
+#include <linux/debugfs.h>
 
 /*
  *	Notifier list for kernel code which wants to be called
@@ -584,3 +585,85 @@ int unregister_die_notifier(struct notifier_block *nb)
 	return atomic_notifier_chain_unregister(&die_chain, nb);
 }
 EXPORT_SYMBOL_GPL(unregister_die_notifier);
+
+#ifdef CONFIG_NOTIFIER_ERROR_INJECTION
+
+static int debugfs_errno_set(void *data, u64 val)
+{
+	*(int *)data = clamp_t(int, val, -MAX_ERRNO, 0);
+	return 0;
+}
+
+static int debugfs_errno_get(void *data, u64 *val)
+{
+	*val = *(int *)data;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_errno, debugfs_errno_get, debugfs_errno_set,
+			"%lld\n");
+
+static struct dentry *debugfs_create_errno(const char *name, mode_t mode,
+				struct dentry *parent, int *value)
+{
+	return debugfs_create_file(name, mode, parent, value, &fops_errno);
+}
+
+static int err_inject_notifier_callback(struct notifier_block *nb,
+				unsigned long val, void *p)
+{
+	int err = 0;
+	struct err_inject_notifier_block *enb =
+		container_of(nb, struct err_inject_notifier_block, nb);
+	struct err_inject_notifier_action *action;
+
+	for (action = enb->actions; action->name; action++) {
+		if (action->val == val) {
+			err = action->error;
+			break;
+		}
+	}
+	if (err) {
+		printk(KERN_INFO "Injecting error (%d) to %s\n",
+			err, action->name);
+	}
+
+	return notifier_from_errno(err);
+}
+
+int err_inject_notifier_block_init(struct err_inject_notifier_block *enb,
+				const char *name, int priority)
+{
+	struct err_inject_notifier_action *action;
+	mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+
+	enb->nb.notifier_call = err_inject_notifier_callback;
+	enb->nb.priority = priority;
+
+	enb->dir = debugfs_create_dir(name, NULL);
+	if (!enb->dir)
+		return -ENOMEM;
+
+	for (action = enb->actions; action->name; action++) {
+		/*
+		 * Create debugfs r/w file containing action->error. If
+		 * notifier call chain is called with action->val, it will
+		 * fail with the error code
+		 */
+		if (!debugfs_create_errno(action->name, mode, enb->dir,
+					&action->error)) {
+			debugfs_remove_recursive(enb->dir);
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(err_inject_notifier_block_init);
+
+void err_inject_notifier_block_cleanup(struct err_inject_notifier_block *enb)
+{
+	debugfs_remove_recursive(enb->dir);
+}
+EXPORT_SYMBOL_GPL(err_inject_notifier_block_cleanup);
+
+#endif /* CONFIG_NOTIFIER_ERROR_INJECTION */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c0cb9c4..2a62c5a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1021,6 +1021,17 @@ config LKDTM
 	Documentation on how to use the module can be found in
 	Documentation/fault-injection/provoke-crashes.txt
 
+config NOTIFIER_ERROR_INJECTION
+	bool "Notifier error injection"
+	depends on DEBUG_KERNEL
+	select DEBUG_FS
+	help
+	  This option provides the ability to inject artifical errors to
+	  specified notifier chain callbacks. It is useful to test the error
+	  handling of notifier call chain failures.
+
+	  Say N if unsure.
+
 config CPU_NOTIFIER_ERROR_INJECT
 	tristate "CPU notifier error injection module"
 	depends on HOTPLUG_CPU && DEBUG_KERNEL
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH 3/3] KVM: PPC: Implement H_CEDE hcall for book3s_hv in real-mode code
From: Paul Mackerras @ 2011-07-23  7:42 UTC (permalink / raw)
  To: linuxppc-dev, kvm-ppc, Alexander Graf
In-Reply-To: <20110723074111.GA17927@bloggs.ozlabs.ibm.com>

With a KVM guest operating in SMT4 mode (i.e. 4 hardware threads per
core), whenever a CPU goes idle, we have to pull all the other
hardware threads in the core out of the guest, because the H_CEDE
hcall is handled in the kernel.  This is inefficient.

This adds code to book3s_hv_rmhandlers.S to handle the H_CEDE hcall
in real mode.  When a guest vcpu does an H_CEDE hcall, we now only
exit to the kernel if all the other vcpus in the same core are also
idle.  Otherwise we mark this vcpu as napping, save state that could
be lost in nap mode (mainly GPRs and FPRs), and execute the nap
instruction.  When the thread wakes up, because of a decrementer or
external interrupt, we come back in at kvm_start_guest (from the
system reset interrupt vector), find the `napping' flag set in the
paca, and go to the resume path.

This has some other ramifications.  First, when starting a core, we
now start all the threads, both those that are immediately runnable and
those that are idle.  This is so that we don't have to pull all the
threads out of the guest when an idle thread gets a decrementer interrupt
and wants to start running.  In fact the idle threads will all start
with the H_CEDE hcall returning; being idle they will just do another
H_CEDE immediately and go to nap mode.

This required some changes to kvmppc_run_core() and kvmppc_run_vcpu().
These functions have been restructured to make them simpler and clearer.
We introduce a level of indirection in the wait queue that gets woken
when external and decrementer interrupts get generated for a vcpu, so
that we can have the 4 vcpus in a vcore using the same wait queue.
We need this because the 4 vcpus are being handled by one thread.

Secondly, when we need to exit from the guest to the kernel, we now
have to generate an IPI for any napping threads, because an HDEC
interrupt doesn't wake up a napping thread.

Thirdly, we now need to be able to handle virtual external interrupts
and decrementer interrupts becoming pending while a thread is napping,
and deliver those interrupts to the guest when the thread wakes.
This is done in kvmppc_cede_reentry, just before fast_guest_return.

Finally, since we are not using the generic kvm_vcpu_block for book3s_hv,
and hence not calling kvm_arch_vcpu_runnable, we can remove the #ifdef
from kvm_arch_vcpu_runnable.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s_asm.h |    1 +
 arch/powerpc/include/asm/kvm_host.h       |   19 ++-
 arch/powerpc/kernel/asm-offsets.c         |    6 +
 arch/powerpc/kvm/book3s_hv.c              |  335 ++++++++++++++++-------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  297 ++++++++++++++++++++++---
 arch/powerpc/kvm/powerpc.c                |   21 +-
 6 files changed, 483 insertions(+), 196 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index af73469..1f2f5b6 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -76,6 +76,7 @@ struct kvmppc_host_state {
 	ulong scratch1;
 	u8 in_guest;
 	u8 restore_hid5;
+	u8 napping;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	struct kvm_vcpu *kvm_vcpu;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index db15384..652b05f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -198,21 +198,29 @@ struct kvm_arch {
  */
 struct kvmppc_vcore {
 	int n_runnable;
-	int n_blocked;
+	int n_busy;
 	int num_threads;
 	int entry_exit_count;
 	int n_woken;
 	int nap_count;
+	int napping_threads;
 	u16 pcpu;
-	u8 vcore_running;
+	u8 vcore_state;
 	u8 in_guest;
 	struct list_head runnable_threads;
 	spinlock_t lock;
+	wait_queue_head_t wq;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
 #define VCORE_EXIT_COUNT(vc)	((vc)->entry_exit_count >> 8)
 
+/* Values for vcore_state */
+#define VCORE_INACTIVE	0
+#define VCORE_RUNNING	1
+#define VCORE_EXITING	2
+#define VCORE_SLEEPING	3
+
 struct kvmppc_pte {
 	ulong eaddr;
 	u64 vpage;
@@ -400,11 +408,13 @@ struct kvm_vcpu_arch {
 	struct dtl *dtl;
 	struct dtl *dtl_end;
 
+	wait_queue_head_t *wqp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
 	int state;
 	int ptid;
+	bool timer_running;
 	wait_queue_head_t cpu_run;
 
 	struct kvm_vcpu_arch_shared *shared;
@@ -420,8 +430,9 @@ struct kvm_vcpu_arch {
 #endif
 };
 
-#define KVMPPC_VCPU_BUSY_IN_HOST	0
-#define KVMPPC_VCPU_BLOCKED		1
+/* Values for vcpu->arch.state */
+#define KVMPPC_VCPU_STOPPED		0
+#define KVMPPC_VCPU_BUSY_IN_HOST	1
 #define KVMPPC_VCPU_RUNNABLE		2
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d34cd32..833021c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -44,6 +44,7 @@
 #include <asm/compat.h>
 #include <asm/mmu.h>
 #include <asm/hvcall.h>
+#include <asm/xics.h>
 #endif
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
@@ -457,6 +458,8 @@ int main(void)
 	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
 	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
 	DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
+	DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
+	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa));
 	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
 	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
@@ -472,6 +475,7 @@ int main(void)
 	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
+	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
 	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
 			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
@@ -529,6 +533,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
 	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
 	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
+	HSTATE_FIELD(HSTATE_NAPPING, napping);
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
@@ -541,6 +546,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_DSCR, host_dscr);
 	HSTATE_FIELD(HSTATE_DABR, dabr);
 	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 #else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cc0d7f1..81c384d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -62,6 +62,8 @@
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
 
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
+
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	local_paca->kvm_hstate.kvm_vcpu = vcpu;
@@ -72,40 +74,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
 }
 
-static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu);
-static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu);
-
-void kvmppc_vcpu_block(struct kvm_vcpu *vcpu)
-{
-	u64 now;
-	unsigned long dec_nsec;
-
-	now = get_tb();
-	if (now >= vcpu->arch.dec_expires && !kvmppc_core_pending_dec(vcpu))
-		kvmppc_core_queue_dec(vcpu);
-	if (vcpu->arch.pending_exceptions)
-		return;
-	if (vcpu->arch.dec_expires != ~(u64)0) {
-		dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC /
-			tb_ticks_per_sec;
-		hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
-			      HRTIMER_MODE_REL);
-	}
-
-	kvmppc_vcpu_blocked(vcpu);
-
-	kvm_vcpu_block(vcpu);
-	vcpu->stat.halt_wakeup++;
-
-	if (vcpu->arch.dec_expires != ~(u64)0)
-		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
-
-	kvmppc_vcpu_unblocked(vcpu);
-}
-
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	vcpu->arch.shregs.msr = msr;
+	kvmppc_end_cede(vcpu);
 }
 
 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
@@ -257,15 +229,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 	switch (req) {
 	case H_CEDE:
-		vcpu->arch.shregs.msr |= MSR_EE;
-		vcpu->arch.ceded = 1;
-		smp_mb();
-		if (!vcpu->arch.prodded)
-			kvmppc_vcpu_block(vcpu);
-		else
-			vcpu->arch.prodded = 0;
-		smp_mb();
-		vcpu->arch.ceded = 0;
 		break;
 	case H_PROD:
 		target = kvmppc_get_gpr(vcpu, 4);
@@ -388,20 +351,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	}
 
-
-	if (!(r & RESUME_HOST)) {
-		/* To avoid clobbering exit_reason, only check for signals if
-		 * we aren't already exiting to userspace for some other
-		 * reason. */
-		if (signal_pending(tsk)) {
-			vcpu->stat.signal_exits++;
-			run->exit_reason = KVM_EXIT_INTR;
-			r = -EINTR;
-		} else {
-			kvmppc_core_deliver_interrupts(vcpu);
-		}
-	}
-
 	return r;
 }
 
@@ -479,13 +428,9 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	/*
-	 * Some vcpus may start out in stopped state.  If we initialize
-	 * them to busy-in-host state they will stop other vcpus in the
-	 * vcore from running.  Instead we initialize them to blocked
-	 * state, effectively considering them to be stopped until we
-	 * see the first run ioctl for them.
+	 * We consider the vcpu stopped until we see the first run ioctl for it.
 	 */
-	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
+	vcpu->arch.state = KVMPPC_VCPU_STOPPED;
 
 	init_waitqueue_head(&vcpu->arch.cpu_run);
 
@@ -496,6 +441,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 		if (vcore) {
 			INIT_LIST_HEAD(&vcore->runnable_threads);
 			spin_lock_init(&vcore->lock);
+			init_waitqueue_head(&vcore->wq);
 		}
 		kvm->arch.vcores[core] = vcore;
 	}
@@ -506,7 +452,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 
 	spin_lock(&vcore->lock);
 	++vcore->num_threads;
-	++vcore->n_blocked;
 	spin_unlock(&vcore->lock);
 	vcpu->arch.vcore = vcore;
 
@@ -524,30 +469,31 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 	kfree(vcpu);
 }
 
-static void kvmppc_vcpu_blocked(struct kvm_vcpu *vcpu)
+static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	unsigned long dec_nsec, now;
 
-	spin_lock(&vc->lock);
-	vcpu->arch.state = KVMPPC_VCPU_BLOCKED;
-	++vc->n_blocked;
-	if (vc->n_runnable > 0 &&
-	    vc->n_runnable + vc->n_blocked == vc->num_threads) {
-		vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
-					arch.run_list);
-		wake_up(&vcpu->arch.cpu_run);
+	now = get_tb();
+	if (now > vcpu->arch.dec_expires) {
+		/* decrementer has already gone negative */
+		kvmppc_core_queue_dec(vcpu);
+		kvmppc_core_deliver_interrupts(vcpu);
+		return;
 	}
-	spin_unlock(&vc->lock);
+	dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
+		   / tb_ticks_per_sec;
+	hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
+		      HRTIMER_MODE_REL);
+	vcpu->arch.timer_running = 1;
 }
 
-static void kvmppc_vcpu_unblocked(struct kvm_vcpu *vcpu)
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-
-	spin_lock(&vc->lock);
-	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
-	--vc->n_blocked;
-	spin_unlock(&vc->lock);
+	vcpu->arch.ceded = 0;
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
 }
 
 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
@@ -562,6 +508,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 		return;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 	--vc->n_runnable;
+	++vc->n_busy;
 	/* decrement the physical thread id of each following vcpu */
 	v = vcpu;
 	list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
@@ -575,15 +522,20 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 	struct paca_struct *tpaca;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
+	if (vcpu->arch.timer_running) {
+		hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+		vcpu->arch.timer_running = 0;
+	}
 	cpu = vc->pcpu + vcpu->arch.ptid;
 	tpaca = &paca[cpu];
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
 	tpaca->kvm_hstate.kvm_vcore = vc;
+	tpaca->kvm_hstate.napping = 0;
+	vcpu->cpu = vc->pcpu;
 	smp_wmb();
 #ifdef CONFIG_PPC_ICP_NATIVE
 	if (vcpu->arch.ptid) {
 		tpaca->cpu_start = 0x80;
-		tpaca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST;
 		wmb();
 		xics_wake_cpu(cpu);
 		++vc->n_woken;
@@ -631,9 +583,10 @@ static int on_primary_thread(void)
  */
 static int kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-	struct kvm_vcpu *vcpu, *vnext;
+	struct kvm_vcpu *vcpu, *vcpu0, *vnext;
 	long ret;
 	u64 now;
+	int ptid;
 
 	/* don't start if any threads have a signal pending */
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -652,29 +605,50 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		goto out;
 	}
 
+	/*
+	 * Assign physical thread IDs, first to non-ceded vcpus
+	 * and then to ceded ones.
+	 */
+	ptid = 0;
+	vcpu0 = NULL;
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		if (!vcpu->arch.ceded) {
+			if (!ptid)
+				vcpu0 = vcpu;
+			vcpu->arch.ptid = ptid++;
+		}
+	}
+	if (!vcpu0)
+		return 0;		/* nothing to run */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+		if (vcpu->arch.ceded)
+			vcpu->arch.ptid = ptid++;
+
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
-	vc->vcore_running = 1;
+	vc->vcore_state = VCORE_RUNNING;
 	vc->in_guest = 0;
 	vc->pcpu = smp_processor_id();
+	vc->napping_threads = 0;
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		kvmppc_start_thread(vcpu);
-	vcpu = list_first_entry(&vc->runnable_threads, struct kvm_vcpu,
-				arch.run_list);
 
+	preempt_disable();
 	spin_unlock(&vc->lock);
 
-	preempt_disable();
 	kvm_guest_enter();
-	__kvmppc_vcore_entry(NULL, vcpu);
+	__kvmppc_vcore_entry(NULL, vcpu0);
 
-	/* wait for secondary threads to finish writing their state to memory */
 	spin_lock(&vc->lock);
+	/* disable sending of IPIs on virtual external irqs */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+		vcpu->cpu = -1;
+	/* wait for secondary threads to finish writing their state to memory */
 	if (vc->nap_count < vc->n_woken)
 		kvmppc_wait_for_nap(vc);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
-	vc->vcore_running = 2;
+	vc->vcore_state = VCORE_EXITING;
 	spin_unlock(&vc->lock);
 
 	/* make sure updates to secondary vcpu structs are visible now */
@@ -690,22 +664,26 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		if (now < vcpu->arch.dec_expires &&
 		    kvmppc_core_pending_dec(vcpu))
 			kvmppc_core_dequeue_dec(vcpu);
-		if (!vcpu->arch.trap) {
-			if (signal_pending(vcpu->arch.run_task)) {
-				vcpu->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
-				vcpu->arch.ret = -EINTR;
-			}
-			continue;		/* didn't get to run */
-		}
-		ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
-					 vcpu->arch.run_task);
+
+		ret = RESUME_GUEST;
+		if (vcpu->arch.trap)
+			ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
+						 vcpu->arch.run_task);
+
 		vcpu->arch.ret = ret;
 		vcpu->arch.trap = 0;
+
+		if (vcpu->arch.ceded) {
+			if (ret != RESUME_GUEST)
+				kvmppc_end_cede(vcpu);
+			else
+				kvmppc_set_timer(vcpu);
+		}
 	}
 
 	spin_lock(&vc->lock);
  out:
-	vc->vcore_running = 0;
+	vc->vcore_state = VCORE_INACTIVE;
 	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
 				 arch.run_list) {
 		if (vcpu->arch.ret != RESUME_GUEST) {
@@ -717,82 +695,130 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	return 1;
 }
 
-static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+/*
+ * Wait for some other vcpu thread to execute us, and
+ * wake us up when we need to handle something in the host.
+ */
+static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
 {
-	int ptid;
-	int wait_state;
-	struct kvmppc_vcore *vc;
 	DEFINE_WAIT(wait);
 
-	/* No need to go into the guest when all we do is going out */
-	if (signal_pending(current)) {
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		return -EINTR;
+	prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+		schedule();
+	finish_wait(&vcpu->arch.cpu_run, &wait);
+}
+
+/*
+ * All the vcpus in this vcore are idle, so wait for a decrementer
+ * or external interrupt to one of the vcpus.  vc->lock is held.
+ */
+static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
+{
+	DEFINE_WAIT(wait);
+	struct kvm_vcpu *v;
+	int all_idle = 1;
+
+	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+	vc->vcore_state = VCORE_SLEEPING;
+	spin_unlock(&vc->lock);
+	list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
+		if (!v->arch.ceded || v->arch.pending_exceptions) {
+			all_idle = 0;
+			break;
+		}
 	}
+	if (all_idle)
+		schedule();
+	finish_wait(&vc->wq, &wait);
+	spin_lock(&vc->lock);
+	vc->vcore_state = VCORE_INACTIVE;
+}
 
-	/* On PPC970, check that we have an RMA region */
-	if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
-		return -EPERM;
+static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	int n_ceded;
+	int prev_state;
+	struct kvmppc_vcore *vc;
+	struct kvm_vcpu *v, *vn;
 
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
 
-	flush_fp_to_thread(current);
-	flush_altivec_to_thread(current);
-	flush_vsx_to_thread(current);
-
 	/*
 	 * Synchronize with other threads in this virtual core
 	 */
 	vc = vcpu->arch.vcore;
 	spin_lock(&vc->lock);
-	/* This happens the first time this is called for a vcpu */
-	if (vcpu->arch.state == KVMPPC_VCPU_BLOCKED)
-		--vc->n_blocked;
-	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
-	ptid = vc->n_runnable;
+	vcpu->arch.ceded = 0;
 	vcpu->arch.run_task = current;
 	vcpu->arch.kvm_run = kvm_run;
-	vcpu->arch.ptid = ptid;
+	prev_state = vcpu->arch.state;
+	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
 	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
 	++vc->n_runnable;
 
-	wait_state = TASK_INTERRUPTIBLE;
-	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
-		if (signal_pending(current)) {
-			if (!vc->vcore_running) {
-				kvm_run->exit_reason = KVM_EXIT_INTR;
-				vcpu->arch.ret = -EINTR;
-				break;
-			}
-			/* have to wait for vcore to stop executing guest */
-			wait_state = TASK_UNINTERRUPTIBLE;
-			smp_send_reschedule(vc->pcpu);
+	/*
+	 * This happens the first time this is called for a vcpu.
+	 * If the vcore is already running, we may be able to start
+	 * this thread straight away and have it join in.
+	 */
+	if (prev_state == KVMPPC_VCPU_STOPPED) {
+		if (vc->vcore_state == VCORE_RUNNING &&
+		    VCORE_EXIT_COUNT(vc) == 0) {
+			vcpu->arch.ptid = vc->n_runnable - 1;
+			kvmppc_start_thread(vcpu);
 		}
 
-		if (!vc->vcore_running &&
-		    vc->n_runnable + vc->n_blocked == vc->num_threads) {
-			/* we can run now */
-			if (kvmppc_run_core(vc))
-				continue;
-		}
+	} else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
+		--vc->n_busy;
 
-		if (vc->vcore_running == 1 && VCORE_EXIT_COUNT(vc) == 0)
-			kvmppc_start_thread(vcpu);
+	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	       !signal_pending(current)) {
+		if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
+			spin_unlock(&vc->lock);
+			kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
+			spin_lock(&vc->lock);
+			continue;
+		}
+		n_ceded = 0;
+		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+			n_ceded += v->arch.ceded;
+		if (n_ceded == vc->n_runnable)
+			kvmppc_vcore_blocked(vc);
+		else
+			kvmppc_run_core(vc);
+
+		list_for_each_entry_safe(v, vn, &vc->runnable_threads,
+					 arch.run_list) {
+			kvmppc_core_deliver_interrupts(v);
+			if (signal_pending(v->arch.run_task)) {
+				kvmppc_remove_runnable(vc, v);
+				v->stat.signal_exits++;
+				v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+				v->arch.ret = -EINTR;
+				wake_up(&v->arch.cpu_run);
+			}
+		}
+	}
 
-		/* wait for other threads to come in, or wait for vcore */
-		prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
-		spin_unlock(&vc->lock);
-		schedule();
-		finish_wait(&vcpu->arch.cpu_run, &wait);
-		spin_lock(&vc->lock);
+	if (signal_pending(current)) {
+		if (vc->vcore_state == VCORE_RUNNING ||
+		    vc->vcore_state == VCORE_EXITING) {
+			spin_unlock(&vc->lock);
+			kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+			spin_lock(&vc->lock);
+		}
+		if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+			kvmppc_remove_runnable(vc, vcpu);
+			vcpu->stat.signal_exits++;
+			kvm_run->exit_reason = KVM_EXIT_INTR;
+			vcpu->arch.ret = -EINTR;
+		}
 	}
 
-	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
-		kvmppc_remove_runnable(vc, vcpu);
 	spin_unlock(&vc->lock);
-
 	return vcpu->arch.ret;
 }
 
@@ -800,6 +826,21 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
 
+	/* No need to go into the guest when all we'll do is come back out */
+	if (signal_pending(current)) {
+		run->exit_reason = KVM_EXIT_INTR;
+		return -EINTR;
+	}
+
+	/* On PPC970, check that we have an RMA region */
+	if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EPERM;
+
+	flush_fp_to_thread(current);
+	flush_altivec_to_thread(current);
+	flush_vsx_to_thread(current);
+	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+
 	do {
 		r = kvmppc_run_vcpu(run, vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 543ee50..0e2ea04 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -52,7 +52,7 @@ kvmppc_skip_Hinterrupt:
 	b	.
 
 /*
- * Call kvmppc_handler_trampoline_enter in real mode.
+ * Call kvmppc_hv_entry in real mode.
  * Must be called with interrupts hard-disabled.
  *
  * Input Registers:
@@ -92,6 +92,12 @@ _GLOBAL(kvmppc_hv_entry_trampoline)
 kvm_start_guest:
 	ld	r1,PACAEMERGSP(r13)
 	subi	r1,r1,STACK_FRAME_OVERHEAD
+	ld	r2,PACATOC(r13)
+
+	/* were we napping due to cede? */
+	lbz	r0,HSTATE_NAPPING(r13)
+	cmpwi	r0,0
+	bne	kvm_end_cede
 
 	/* get vcpu pointer */
 	ld	r4, HSTATE_KVM_VCPU(r13)
@@ -279,15 +285,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	cmpwi	r0,0
 	beq	20b
 
-	/* Set LPCR.  Set the MER bit if there is a pending external irq. */
+	/* Set LPCR and RMOR. */
 10:	ld	r8,KVM_LPCR(r9)
-	ld	r0,VCPU_PENDING_EXC(r4)
-	li	r7,(1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r7,r7,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and.	r0,r0,r7
-	beq	11f
-	ori	r8,r8,LPCR_MER
-11:	mtspr	SPRN_LPCR,r8
+	mtspr	SPRN_LPCR,r8
 	ld	r8,KVM_RMOR(r9)
 	mtspr	SPRN_RMOR,r8
 	isync
@@ -451,19 +451,50 @@ toc_tlbie_lock:
 	mtctr	r6
 	mtxer	r7
 
-	/* Move SRR0 and SRR1 into the respective regs */
+kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
 	ld	r6, VCPU_SRR0(r4)
 	ld	r7, VCPU_SRR1(r4)
-	mtspr	SPRN_SRR0, r6
-	mtspr	SPRN_SRR1, r7
-
 	ld	r10, VCPU_PC(r4)
+	ld	r11, VCPU_MSR(r4)	/* r11 = vcpu->arch.msr & ~MSR_HV */
 
-	ld	r11, VCPU_MSR(r4)	/* r10 = vcpu->arch.msr & ~MSR_HV */
 	rldicl	r11, r11, 63 - MSR_HV_LG, 1
 	rotldi	r11, r11, 1 + MSR_HV_LG
 	ori	r11, r11, MSR_ME
 
+	/* Check if we can deliver an external or decrementer interrupt now */
+	ld	r0,VCPU_PENDING_EXC(r4)
+	li	r8,(1 << BOOK3S_IRQPRIO_EXTERNAL)
+	oris	r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
+	and	r0,r0,r8
+	cmpdi	cr1,r0,0
+	andi.	r0,r11,MSR_EE
+	beq	cr1,11f
+BEGIN_FTR_SECTION
+	mfspr	r8,SPRN_LPCR
+	ori	r8,r8,LPCR_MER
+	mtspr	SPRN_LPCR,r8
+	isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+	beq	5f
+	li	r0,BOOK3S_INTERRUPT_EXTERNAL
+12:	mr	r6,r10
+	mr	r10,r0
+	mr	r7,r11
+	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
+	rotldi	r11,r11,63
+	b	5f
+11:	beq	5f
+	mfspr	r0,SPRN_DEC
+	cmpwi	r0,0
+	li	r0,BOOK3S_INTERRUPT_DECREMENTER
+	blt	12b
+
+	/* Move SRR0 and SRR1 into the respective regs */
+5:	mtspr	SPRN_SRR0, r6
+	mtspr	SPRN_SRR1, r7
+	li	r0,0
+	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
+
 fast_guest_return:
 	mtspr	SPRN_HSRR0,r10
 	mtspr	SPRN_HSRR1,r11
@@ -577,21 +608,20 @@ kvmppc_interrupt:
 	/* See if this is something we can handle in real mode */
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
-hcall_real_cont:
 
 	/* Check for mediated interrupts (could be done earlier really ...) */
 BEGIN_FTR_SECTION
 	cmpwi	r12,BOOK3S_INTERRUPT_EXTERNAL
 	bne+	1f
-	ld	r5,VCPU_KVM(r9)
-	ld	r5,KVM_LPCR(r5)
 	andi.	r0,r11,MSR_EE
 	beq	1f
+	mfspr	r5,SPRN_LPCR
 	andi.	r0,r5,LPCR_MER
 	bne	bounce_ext_interrupt
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
+hcall_real_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
 	mftb	r6
@@ -685,7 +715,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 	slbia
 	ptesync
 
-hdec_soon:
+hdec_soon:			/* r9 = vcpu, r12 = trap, r13 = paca */
 BEGIN_FTR_SECTION
 	b	32f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
@@ -703,6 +733,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	addi	r0,r3,0x100
 	stwcx.	r0,0,r6
 	bne	41b
+	lwsync
 
 	/*
 	 * At this point we have an interrupt that we have to pass
@@ -716,18 +747,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	 * interrupt, since the other threads will already be on their
 	 * way here in that case.
 	 */
+	cmpwi	r3,0x100	/* Are we the first here? */
+	bge	43f
+	cmpwi	r3,1		/* Are any other threads in the guest? */
+	ble	43f
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	beq	40f
-	cmpwi	r3,0x100	/* Are we the first here? */
-	bge	40f
-	cmpwi	r3,1
-	ble	40f
 	li	r0,0
 	mtspr	SPRN_HDEC,r0
 40:
+	/*
+	 * Send an IPI to any napping threads, since an HDEC interrupt
+	 * doesn't wake CPUs up from nap.
+	 */
+	lwz	r3,VCORE_NAPPING_THREADS(r5)
+	lwz	r4,VCPU_PTID(r9)
+	li	r0,1
+	sldi	r0,r0,r4
+	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
+	beq	43f
+	mulli	r4,r4,PACA_SIZE		/* get paca for thread 0 */
+	subf	r6,r4,r13
+42:	andi.	r0,r3,1
+	beq	44f
+	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
+	li	r0,IPI_PRIORITY
+	li	r7,XICS_QIRR
+	stbcix	r0,r7,r8		/* trigger the IPI */
+44:	srdi.	r3,r3,1
+	addi	r6,r6,PACA_SIZE
+	bne	42b
 
 	/* Secondary threads wait for primary to do partition switch */
-	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */
+43:	ld	r4,VCPU_KVM(r9)		/* pointer to struct kvm */
 	ld	r5,HSTATE_KVM_VCORE(r13)
 	lwz	r3,VCPU_PTID(r9)
 	cmpwi	r3,0
@@ -1080,7 +1132,6 @@ hcall_try_real_mode:
 hcall_real_fallback:
 	li	r12,BOOK3S_INTERRUPT_SYSCALL
 	ld	r9, HSTATE_KVM_VCPU(r13)
-	ld	r11, VCPU_MSR(r9)
 
 	b	hcall_real_cont
 
@@ -1142,7 +1193,7 @@ hcall_real_table:
 	.long	0		/* 0xd4 */
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
-	.long	0		/* 0xe0 */
+	.long	.kvmppc_h_cede - hcall_real_table
 	.long	0		/* 0xe4 */
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
@@ -1171,7 +1222,8 @@ bounce_ext_interrupt:
 	mtspr	SPRN_SRR0,r10
 	mtspr	SPRN_SRR1,r11
 	li	r10,BOOK3S_INTERRUPT_EXTERNAL
-	LOAD_REG_IMMEDIATE(r11,MSR_SF | MSR_ME);
+	li	r11,(MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
+	rotldi	r11,r11,63
 	b	fast_guest_return
 
 _GLOBAL(kvmppc_h_set_dabr)
@@ -1180,6 +1232,178 @@ _GLOBAL(kvmppc_h_set_dabr)
 	li	r3,0
 	blr
 
+_GLOBAL(kvmppc_h_cede)
+	ori	r11,r11,MSR_EE
+	std	r11,VCPU_MSR(r3)
+	li	r0,1
+	stb	r0,VCPU_CEDED(r3)
+	sync			/* order setting ceded vs. testing prodded */
+	lbz	r5,VCPU_PRODDED(r3)
+	cmpwi	r5,0
+	bne	1f
+	li	r0,0		/* set trap to 0 to say hcall is handled */
+	stw	r0,VCPU_TRAP(r3)
+	li	r0,H_SUCCESS
+	std	r0,VCPU_GPR(r3)(r3)
+BEGIN_FTR_SECTION
+	b	2f		/* just send it up to host on 970 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
+
+	/*
+	 * Set our bit in the bitmask of napping threads unless all the
+	 * other threads are already napping, in which case we send this
+	 * up to the host.
+	 */
+	ld	r5,HSTATE_KVM_VCORE(r13)
+	lwz	r6,VCPU_PTID(r3)
+	lwz	r8,VCORE_ENTRY_EXIT(r5)
+	clrldi	r8,r8,56
+	li	r0,1
+	sld	r0,r0,r6
+	addi	r6,r5,VCORE_NAPPING_THREADS
+31:	lwarx	r4,0,r6
+	or	r4,r4,r0
+	popcntw	r7,r4
+	cmpw	r7,r8
+	bge	2f
+	stwcx.	r4,0,r6
+	bne	31b
+	li	r0,1
+	stb	r0,HSTATE_NAPPING(r13)
+	/* order napping_threads update vs testing entry_exit_count */
+	lwsync
+	mr	r4,r3
+	lwz	r7,VCORE_ENTRY_EXIT(r5)
+	cmpwi	r7,0x100
+	bge	33f		/* another thread already exiting */
+
+/*
+ * Although not specifically required by the architecture, POWER7
+ * preserves the following registers in nap mode, even if an SMT mode
+ * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
+ * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
+ */
+	/* Save non-volatile GPRs */
+	std	r14, VCPU_GPR(r14)(r3)
+	std	r15, VCPU_GPR(r15)(r3)
+	std	r16, VCPU_GPR(r16)(r3)
+	std	r17, VCPU_GPR(r17)(r3)
+	std	r18, VCPU_GPR(r18)(r3)
+	std	r19, VCPU_GPR(r19)(r3)
+	std	r20, VCPU_GPR(r20)(r3)
+	std	r21, VCPU_GPR(r21)(r3)
+	std	r22, VCPU_GPR(r22)(r3)
+	std	r23, VCPU_GPR(r23)(r3)
+	std	r24, VCPU_GPR(r24)(r3)
+	std	r25, VCPU_GPR(r25)(r3)
+	std	r26, VCPU_GPR(r26)(r3)
+	std	r27, VCPU_GPR(r27)(r3)
+	std	r28, VCPU_GPR(r28)(r3)
+	std	r29, VCPU_GPR(r29)(r3)
+	std	r30, VCPU_GPR(r30)(r3)
+	std	r31, VCPU_GPR(r31)(r3)
+
+	/* save FP state */
+	bl	.kvmppc_save_fp
+
+	/*
+	 * Take a nap until a decrementer or external interrupt occurs,
+	 * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR
+	 */
+	li	r0,0x80
+	stb	r0,PACAPROCSTART(r13)
+	mfspr	r5,SPRN_LPCR
+	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
+	mtspr	SPRN_LPCR,r5
+	isync
+	li	r0, 0
+	std	r0, HSTATE_SCRATCH0(r13)
+	ptesync
+	ld	r0, HSTATE_SCRATCH0(r13)
+1:	cmpd	r0, r0
+	bne	1b
+	nap
+	b	.
+
+kvm_end_cede:
+	/* Woken by external or decrementer interrupt */
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATOC(r13)
+
+	/* If we're a secondary thread and we got here by an IPI, ack it */
+	ld	r4,HSTATE_KVM_VCPU(r13)
+	lwz	r3,VCPU_PTID(r4)
+	cmpwi	r3,0
+	beq	27f
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r3,r3,44-31,0x7		/* extract wake reason field */
+	cmpwi	r3,4			/* was it an external interrupt? */
+	bne	27f
+	ld	r5, HSTATE_XICS_PHYS(r13)
+	li	r0,0xff
+	li	r6,XICS_QIRR
+	li	r7,XICS_XIRR
+	lwzcix	r8,r5,r7		/* ack the interrupt */
+	sync
+	stbcix	r0,r5,r6		/* clear it */
+	stwcix	r8,r5,r7		/* EOI it */
+27:
+	/* load up FP state */
+	bl	kvmppc_load_fp
+
+	/* Load NV GPRS */
+	ld	r14, VCPU_GPR(r14)(r4)
+	ld	r15, VCPU_GPR(r15)(r4)
+	ld	r16, VCPU_GPR(r16)(r4)
+	ld	r17, VCPU_GPR(r17)(r4)
+	ld	r18, VCPU_GPR(r18)(r4)
+	ld	r19, VCPU_GPR(r19)(r4)
+	ld	r20, VCPU_GPR(r20)(r4)
+	ld	r21, VCPU_GPR(r21)(r4)
+	ld	r22, VCPU_GPR(r22)(r4)
+	ld	r23, VCPU_GPR(r23)(r4)
+	ld	r24, VCPU_GPR(r24)(r4)
+	ld	r25, VCPU_GPR(r25)(r4)
+	ld	r26, VCPU_GPR(r26)(r4)
+	ld	r27, VCPU_GPR(r27)(r4)
+	ld	r28, VCPU_GPR(r28)(r4)
+	ld	r29, VCPU_GPR(r29)(r4)
+	ld	r30, VCPU_GPR(r30)(r4)
+	ld	r31, VCPU_GPR(r31)(r4)
+
+	/* clear our bit in vcore->napping_threads */
+33:	ld	r5,HSTATE_KVM_VCORE(r13)
+	lwz	r3,VCPU_PTID(r4)
+	li	r0,1
+	sld	r0,r0,r3
+	addi	r6,r5,VCORE_NAPPING_THREADS
+32:	lwarx	r7,0,r6
+	andc	r7,r7,r0
+	stwcx.	r7,0,r6
+	bne	32b
+	li	r0,0
+	stb	r0,HSTATE_NAPPING(r13)
+
+	/* see if any other thread is already exiting */
+	lwz	r0,VCORE_ENTRY_EXIT(r5)
+	cmpwi	r0,0x100
+	blt	kvmppc_cede_reentry	/* if not go back to guest */
+
+	/* some threads are exiting, so go to the guest exit path */
+	b	hcall_real_fallback
+
+	/* cede when already previously prodded case */
+1:	li	r0,0
+	stb	r0,VCPU_PRODDED(r3)
+	sync			/* order testing prodded vs. clearing ceded */
+	stb	r0,VCPU_CEDED(r3)
+	li	r3,H_SUCCESS
+	blr
+
+	/* we've ceded but we want to give control to the host */
+2:	li	r3,H_TOO_HARD
+	blr
+
 secondary_too_late:
 	ld	r5,HSTATE_KVM_VCORE(r13)
 	HMT_LOW
@@ -1197,14 +1421,20 @@ secondary_too_late:
 	slbmte	r6,r5
 1:	addi	r11,r11,16
 	.endr
-	b	50f
 
 secondary_nap:
-	/* Clear any pending IPI */
-50:	ld	r5, HSTATE_XICS_PHYS(r13)
+	/* Clear any pending IPI - assume we're a secondary thread */
+	ld	r5, HSTATE_XICS_PHYS(r13)
+	li	r7, XICS_XIRR
+	lwzcix	r3, r5, r7		/* ack any pending interrupt */
+	rlwinm.	r0, r3, 0, 0xffffff	/* any pending? */
+	beq	37f
+	sync
 	li	r0, 0xff
 	li	r6, XICS_QIRR
-	stbcix	r0, r5, r6
+	stbcix	r0, r5, r6		/* clear the IPI */
+	stwcix	r3, r5, r7		/* EOI it */
+37:	sync
 
 	/* increment the nap count and then go to nap mode */
 	ld	r4, HSTATE_KVM_VCORE(r13)
@@ -1214,13 +1444,12 @@ secondary_nap:
 	addi	r3, r3, 1
 	stwcx.	r3, 0, r4
 	bne	51b
-	isync
 
+	li	r3, LPCR_PECE0
 	mfspr	r4, SPRN_LPCR
-	li	r0, LPCR_PECE
-	andc	r4, r4, r0
-	ori	r4, r4, LPCR_PECE0	/* exit nap on interrupt */
+	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
 	mtspr	SPRN_LPCR, r4
+	isync
 	li	r0, 0
 	std	r0, HSTATE_SCRATCH0(r13)
 	ptesync
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a107c9b..cd0e3e5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,12 +39,8 @@
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 	return !(v->arch.shared->msr & MSR_WE) ||
 	       !!(v->arch.pending_exceptions);
-#else
-	return !(v->arch.ceded) || !!(v->arch.pending_exceptions);
-#endif
 }
 
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
@@ -258,6 +254,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	struct kvm_vcpu *vcpu;
 	vcpu = kvmppc_core_vcpu_create(kvm, id);
+	vcpu->arch.wqp = &vcpu->wq;
 	if (!IS_ERR(vcpu))
 		kvmppc_create_vcpu_debugfs(vcpu, id);
 	return vcpu;
@@ -289,8 +286,8 @@ static void kvmppc_decrementer_func(unsigned long data)
 
 	kvmppc_core_queue_dec(vcpu);
 
-	if (waitqueue_active(&vcpu->wq)) {
-		wake_up_interruptible(&vcpu->wq);
+	if (waitqueue_active(vcpu->arch.wqp)) {
+		wake_up_interruptible(vcpu->arch.wqp);
 		vcpu->stat.halt_wakeup++;
 	}
 }
@@ -543,13 +540,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
-	if (irq->irq == KVM_INTERRUPT_UNSET)
+	if (irq->irq == KVM_INTERRUPT_UNSET) {
 		kvmppc_core_dequeue_external(vcpu, irq);
-	else
-		kvmppc_core_queue_external(vcpu, irq);
+		return 0;
+	}
+
+	kvmppc_core_queue_external(vcpu, irq);
 
-	if (waitqueue_active(&vcpu->wq)) {
-		wake_up_interruptible(&vcpu->wq);
+	if (waitqueue_active(vcpu->arch.wqp)) {
+		wake_up_interruptible(vcpu->arch.wqp);
 		vcpu->stat.halt_wakeup++;
 	} else if (vcpu->cpu != -1) {
 		smp_send_reschedule(vcpu->cpu);
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 2/3] KVM: PPC: book3s_pr: Simplify transitions between virtual and real mode
From: Paul Mackerras @ 2011-07-23  7:41 UTC (permalink / raw)
  To: linuxppc-dev, kvm-ppc, Alexander Graf
In-Reply-To: <20110723074111.GA17927@bloggs.ozlabs.ibm.com>

This simplifies the way that the book3s_pr makes the transition to
real mode when entering the guest.  We now call kvmppc_entry_trampoline
(renamed from kvmppc_rmcall) in the base kernel using a normal function
call instead of doing an indirect call through a pointer in the vcpu.
If kvm is a module, the module loader takes care of generating a
trampoline as it does for other calls to functions outside the module.

kvmppc_entry_trampoline then disables interrupts and jumps to
kvmppc_handler_trampoline_enter in real mode using an rfi[d].
That then uses the link register as the address to return to
(potentially in module space) when the guest exits.

This also simplifies the way that we call the Linux interrupt handler
when we exit the guest due to an external, decrementer or performance
monitor interrupt.  Instead of turning on the MMU, then deciding that
we need to call the Linux handler and turning the MMU back off again,
we now go straight to the handler at the point where we would turn the
MMU on.  The handler will then return to the virtual-mode code
(potentially in the module).

Along the way, this moves the setting and clearing of the HID5 DCBZ32
bit into real-mode interrupts-off code, and also makes sure that
we clear the MSR[RI] bit before loading values into SRR0/1.

The net result is that we no longer need any code addresses to be
stored in vcpu->arch.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s.h     |    4 +-
 arch/powerpc/include/asm/kvm_book3s_asm.h |    1 +
 arch/powerpc/include/asm/kvm_host.h       |    8 --
 arch/powerpc/kernel/asm-offsets.c         |    7 +--
 arch/powerpc/kvm/book3s_32_sr.S           |    2 +-
 arch/powerpc/kvm/book3s_64_slb.S          |    2 +-
 arch/powerpc/kvm/book3s_exports.c         |    4 +-
 arch/powerpc/kvm/book3s_interrupts.S      |  129 +---------------------------
 arch/powerpc/kvm/book3s_pr.c              |   12 ---
 arch/powerpc/kvm/book3s_rmhandlers.S      |   51 ++++--------
 arch/powerpc/kvm/book3s_segment.S         |  112 ++++++++++++++++++++-----
 11 files changed, 120 insertions(+), 212 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 98da010..a70c0e6 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -139,9 +139,7 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
-extern void kvmppc_handler_lowmem_trampoline(void);
-extern void kvmppc_handler_trampoline_enter(void);
-extern void kvmppc_rmcall(ulong srr0, ulong srr1);
+extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
 extern void kvmppc_load_up_fpu(void);
 extern void kvmppc_load_up_altivec(void);
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index ef7b368..af73469 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -75,6 +75,7 @@ struct kvmppc_host_state {
 	ulong scratch0;
 	ulong scratch1;
 	u8 in_guest;
+	u8 restore_hid5;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	struct kvm_vcpu *kvm_vcpu;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index cc22b28..db15384 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -258,14 +258,6 @@ struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
 #ifdef CONFIG_PPC_BOOK3S
-	ulong host_msr;
-	ulong host_r2;
-	void *host_retip;
-	ulong trampoline_lowmem;
-	ulong trampoline_enter;
-	ulong highmem_handler;
-	ulong rmcall;
-	ulong host_paca_phys;
 	struct kvmppc_slb slb[64];
 	int slb_max;		/* 1 + index of last valid entry in slb[] */
 	int slb_nr;		/* total number of entries in SLB */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 54b935f..d34cd32 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -446,8 +446,6 @@ int main(void)
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
-	DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
-	DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
 	DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
 	DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
 	DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
@@ -455,10 +453,6 @@ int main(void)
 	DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
 	DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
 	DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
-	DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
-	DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
-	DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
-	DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
 	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
 	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
 	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
@@ -534,6 +528,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
 	HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
 	HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
+	HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index 3608471..7e06a6f 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -31,7 +31,7 @@
 	 * R1 = host R1
 	 * R2 = host R2
 	 * R3 = shadow vcpu
-	 * all other volatile GPRS = free
+	 * all other volatile GPRS = free except R4, R6
 	 * SVCPU[CR]  = guest CR
 	 * SVCPU[XER] = guest XER
 	 * SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 04e7d3b..f2e6e48 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -53,7 +53,7 @@ slb_exit_skip_ ## num:
 	 * R1 = host R1
 	 * R2 = host R2
 	 * R3 = shadow vcpu
-	 * all other volatile GPRS = free
+	 * all other volatile GPRS = free except R4, R6
 	 * SVCPU[CR]  = guest CR
 	 * SVCPU[XER] = guest XER
 	 * SVCPU[CTR] = guest CTR
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 88c8f26..f7f63a0 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -23,9 +23,7 @@
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 EXPORT_SYMBOL_GPL(kvmppc_hv_entry_trampoline);
 #else
-EXPORT_SYMBOL_GPL(kvmppc_handler_trampoline_enter);
-EXPORT_SYMBOL_GPL(kvmppc_handler_lowmem_trampoline);
-EXPORT_SYMBOL_GPL(kvmppc_rmcall);
+EXPORT_SYMBOL_GPL(kvmppc_entry_trampoline);
 EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index c54b0e3..0a8515a 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -29,27 +29,11 @@
 #define ULONG_SIZE 		8
 #define FUNC(name) 		GLUE(.,name)
 
-#define GET_SHADOW_VCPU_R13
-
-#define DISABLE_INTERRUPTS	\
-	mfmsr   r0;		\
-	rldicl  r0,r0,48,1;	\
-	rotldi  r0,r0,16;	\
-	mtmsrd  r0,1;		\
-
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
 #define ULONG_SIZE              4
 #define FUNC(name)		name
 
-#define GET_SHADOW_VCPU_R13	\
-	lwz	r13, (THREAD + THREAD_KVM_SVCPU)(r2)
-
-#define DISABLE_INTERRUPTS	\
-	mfmsr   r0;		\
-	rlwinm  r0,r0,0,17,15;	\
-	mtmsr   r0;		\
-
 #endif /* CONFIG_PPC_BOOK3S_XX */
 
 
@@ -108,44 +92,17 @@ kvm_start_entry:
 
 kvm_start_lightweight:
 
-	GET_SHADOW_VCPU_R13
-	PPC_LL	r3, VCPU_HIGHMEM_HANDLER(r4)
-	PPC_STL	r3, HSTATE_VMHANDLER(r13)
-
-	PPC_LL	r10, VCPU_SHADOW_MSR(r4)	/* r10 = vcpu->arch.shadow_msr */
-
-	DISABLE_INTERRUPTS
-
 #ifdef CONFIG_PPC_BOOK3S_64
-	/* Some guests may need to have dcbz set to 32 byte length.
-	 *
-	 * Usually we ensure that by patching the guest's instructions
-	 * to trap on dcbz and emulate it in the hypervisor.
-	 *
-	 * If we can, we should tell the CPU to use 32 byte dcbz though,
-	 * because that's a lot faster.
-	 */
-
 	PPC_LL	r3, VCPU_HFLAGS(r4)
-	rldicl.	r3, r3, 0, 63		/* CR = ((r3 & 1) == 0) */
-	beq	no_dcbz32_on
-
-	mfspr   r3,SPRN_HID5
-	ori     r3, r3, 0x80		/* XXX HID5_dcbz32 = 0x80 */
-	mtspr   SPRN_HID5,r3
-
-no_dcbz32_on:
-
+	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
+	stb	r3, HSTATE_RESTORE_HID5(r13)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-	PPC_LL	r6, VCPU_RMCALL(r4)
-	mtctr	r6
-
-	PPC_LL	r3, VCPU_TRAMPOLINE_ENTER(r4)
-	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+	PPC_LL	r4, VCPU_SHADOW_MSR(r4)	/* get shadow_msr */
 
 	/* Jump to segment patching handler and into our guest */
-	bctr
+	bl	FUNC(kvmppc_entry_trampoline)
+	nop
 
 /*
  * This is the handler in module memory. It gets jumped at from the
@@ -170,21 +127,6 @@ kvmppc_handler_highmem:
 	/* R7 = vcpu */
 	PPC_LL	r7, GPR4(r1)
 
-#ifdef CONFIG_PPC_BOOK3S_64
-
-	PPC_LL	r5, VCPU_HFLAGS(r7)
-	rldicl.	r5, r5, 0, 63		/* CR = ((r5 & 1) == 0) */
-	beq	no_dcbz32_off
-
-	li	r4, 0
-	mfspr   r5,SPRN_HID5
-	rldimi  r5,r4,6,56
-	mtspr   SPRN_HID5,r5
-
-no_dcbz32_off:
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
 	PPC_STL	r14, VCPU_GPR(r14)(r7)
 	PPC_STL	r15, VCPU_GPR(r15)(r7)
 	PPC_STL	r16, VCPU_GPR(r16)(r7)
@@ -204,67 +146,6 @@ no_dcbz32_off:
 	PPC_STL	r30, VCPU_GPR(r30)(r7)
 	PPC_STL	r31, VCPU_GPR(r31)(r7)
 
-	/* Restore host msr -> SRR1 */
-	PPC_LL	r6, VCPU_HOST_MSR(r7)
-
-	/*
-	 * For some interrupts, we need to call the real Linux
-	 * handler, so it can do work for us. This has to happen
-	 * as if the interrupt arrived from the kernel though,
-	 * so let's fake it here where most state is restored.
-	 *
-	 * Call Linux for hardware interrupts/decrementer
-	 * r3 = address of interrupt handler (exit reason)
-	 */
-
-	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-	beq	call_linux_handler
-	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
-	beq	call_linux_handler
-	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
-	beq	call_linux_handler
-
-	/* Back to EE=1 */
-	mtmsr	r6
-	sync
-	b	kvm_return_point
-
-call_linux_handler:
-
-	/*
-	 * If we land here we need to jump back to the handler we
-	 * came from.
-	 *
-	 * We have a page that we can access from real mode, so let's
-	 * jump back to that and use it as a trampoline to get back into the
-	 * interrupt handler!
-	 *
-	 * R3 still contains the exit code,
-	 * R5 VCPU_HOST_RETIP and
-	 * R6 VCPU_HOST_MSR
-	 */
-
-	/* Restore host IP -> SRR0 */
-	PPC_LL	r5, VCPU_HOST_RETIP(r7)
-
-	/* XXX Better move to a safe function?
-	 *     What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
-
-	mtlr	r12
-
-	PPC_LL	r4, VCPU_TRAMPOLINE_LOWMEM(r7)
-	mtsrr0	r4
-	LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
-	mtsrr1	r3
-
-	RFI
-
-.global kvm_return_point
-kvm_return_point:
-
-	/* Jump back to lightweight entry if we're supposed to */
-	/* go back into the guest */
-
 	/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
 	mr	r5, r12
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 0c0d3f2..d7ab552 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -841,8 +841,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (!p)
 		goto uninit_vcpu;
 
-	vcpu->arch.host_retip = kvm_return_point;
-	vcpu->arch.host_msr = mfmsr();
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* default to book3s_64 (970fx) */
 	vcpu->arch.pvr = 0x3C0301;
@@ -853,16 +851,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
 	vcpu->arch.slb_nr = 64;
 
-	/* remember where some real-mode handlers are */
-	vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
-	vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
-	vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
-#ifdef CONFIG_PPC_BOOK3S_64
-	vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
-#else
-	vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
-#endif
-
 	vcpu->arch.shadow_msr = MSR_USER64;
 
 	err = kvmppc_mmu_init(vcpu);
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 5ee66ed..3418758 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -36,9 +36,8 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
-#define LOAD_SHADOW_VCPU(reg)	GET_PACA(reg)					
-#define MSR_NOIRQ		MSR_KERNEL & ~(MSR_IR | MSR_DR)
 #define FUNC(name) 		GLUE(.,name)
+#define MTMSR_EERI(reg)		mtmsrd	(reg),1
 
 	.globl	kvmppc_skip_interrupt
 kvmppc_skip_interrupt:
@@ -68,8 +67,8 @@ kvmppc_skip_Hinterrupt:
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
-#define MSR_NOIRQ		MSR_KERNEL
 #define FUNC(name)		name
+#define MTMSR_EERI(reg)		mtmsr	(reg)
 
 .macro INTERRUPT_TRAMPOLINE intno
 
@@ -170,40 +169,24 @@ kvmppc_handler_skip_ins:
 #endif
 
 /*
- * This trampoline brings us back to a real mode handler
- *
- * Input Registers:
- *
- * R5 = SRR0
- * R6 = SRR1
- * LR = real-mode IP
+ * Call kvmppc_handler_trampoline_enter in real mode
  *
+ * On entry, r4 contains the guest shadow MSR
  */
-.global kvmppc_handler_lowmem_trampoline
-kvmppc_handler_lowmem_trampoline:
-
-	mtsrr0	r5
+_GLOBAL(kvmppc_entry_trampoline)
+	mfmsr	r5
+	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
+	toreal(r7)
+
+	li	r9, MSR_RI
+	ori	r9, r9, MSR_EE
+	andc	r9, r5, r9	/* Clear EE and RI in MSR value */
+	li	r6, MSR_IR | MSR_DR
+	ori	r6, r6, MSR_EE
+	andc	r6, r5, r6	/* Clear EE, DR and IR in MSR value */
+	MTMSR_EERI(r9)		/* Clear EE and RI in MSR */
+	mtsrr0	r7		/* before we set srr0/1 */
 	mtsrr1	r6
-	blr
-kvmppc_handler_lowmem_trampoline_end:
-
-/*
- * Call a function in real mode
- *
- * Input Registers:
- *
- * R3 = function
- * R4 = MSR
- * R5 = scratch register
- *
- */
-_GLOBAL(kvmppc_rmcall)
-	LOAD_REG_IMMEDIATE(r5, MSR_NOIRQ)
-	mtmsr	r5		/* Disable relocation and interrupts, so mtsrr
-				   doesn't get interrupted */
-	sync
-	mtsrr0	r3
-	mtsrr1	r4
 	RFI
 
 #if defined(CONFIG_PPC_BOOK3S_32)
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index aed32e5..3663564 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -23,6 +23,7 @@
 
 #define GET_SHADOW_VCPU(reg)    \
 	mr	reg, r13
+#define MTMSR_EERI(reg)		mtmsrd	(reg),1
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
@@ -30,6 +31,7 @@
 	tophys(reg, r2);       			\
 	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(reg);	\
 	tophys(reg, reg)
+#define MTMSR_EERI(reg)		mtmsr	(reg)
 
 #endif
 
@@ -57,10 +59,12 @@ kvmppc_handler_trampoline_enter:
 	/* Required state:
 	 *
 	 * MSR = ~IR|DR
-	 * R13 = PACA
 	 * R1 = host R1
 	 * R2 = host R2
-	 * R10 = guest MSR
+	 * R4 = guest shadow MSR
+	 * R5 = normal host MSR
+	 * R6 = current host MSR (EE, IR, DR off)
+	 * LR = highmem guest exit code
 	 * all other volatile GPRS = free
 	 * SVCPU[CR] = guest CR
 	 * SVCPU[XER] = guest XER
@@ -71,15 +75,15 @@ kvmppc_handler_trampoline_enter:
 	/* r3 = shadow vcpu */
 	GET_SHADOW_VCPU(r3)
 
+	/* Save guest exit handler address and MSR */
+	mflr	r0
+	PPC_STL	r0, HSTATE_VMHANDLER(r3)
+	PPC_STL	r5, HSTATE_HOST_MSR(r3)
+
 	/* Save R1/R2 in the PACA (64-bit) or shadow_vcpu (32-bit) */
 	PPC_STL	r1, HSTATE_HOST_R1(r3)
 	PPC_STL	r2, HSTATE_HOST_R2(r3)
 
-	/* Move SRR0 and SRR1 into the respective regs */
-	PPC_LL  r9, SVCPU_PC(r3)
-	mtsrr0	r9
-	mtsrr1	r10
-
 	/* Activate guest mode, so faults get handled by KVM */
 	li	r11, KVM_GUEST_MODE_GUEST
 	stb	r11, HSTATE_IN_GUEST(r3)
@@ -87,17 +91,46 @@ kvmppc_handler_trampoline_enter:
 	/* Switch to guest segment. This is subarch specific. */
 	LOAD_GUEST_SEGMENTS
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Some guests may need to have dcbz set to 32 byte length.
+	 *
+	 * Usually we ensure that by patching the guest's instructions
+	 * to trap on dcbz and emulate it in the hypervisor.
+	 *
+	 * If we can, we should tell the CPU to use 32 byte dcbz though,
+	 * because that's a lot faster.
+	 */
+	lbz	r0, HSTATE_RESTORE_HID5(r3)
+	cmpwi	r0, 0
+	beq	no_dcbz32_on
+
+	mfspr   r0,SPRN_HID5
+	ori     r0, r0, 0x80		/* XXX HID5_dcbz32 = 0x80 */
+	mtspr   SPRN_HID5,r0
+no_dcbz32_on:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 	/* Enter guest */
 
-	PPC_LL	r4, SVCPU_CTR(r3)
-	PPC_LL	r5, SVCPU_LR(r3)
-	lwz	r6, SVCPU_CR(r3)
-	lwz	r7, SVCPU_XER(r3)
+	PPC_LL	r8, SVCPU_CTR(r3)
+	PPC_LL	r9, SVCPU_LR(r3)
+	lwz	r10, SVCPU_CR(r3)
+	lwz	r11, SVCPU_XER(r3)
+
+	mtctr	r8
+	mtlr	r9
+	mtcr	r10
+	mtxer	r11
 
-	mtctr	r4
-	mtlr	r5
-	mtcr	r6
-	mtxer	r7
+	/* Move SRR0 and SRR1 into the respective regs */
+	PPC_LL  r9, SVCPU_PC(r3)
+	/* First clear RI in our current MSR value */
+	li	r0, MSR_RI
+	andc	r6, r6, r0
+	MTMSR_EERI(r6)
+	mtsrr0	r9
+	mtsrr1	r4
 
 	PPC_LL	r0, SVCPU_R0(r3)
 	PPC_LL	r1, SVCPU_R1(r3)
@@ -254,6 +287,43 @@ no_ld_last_inst:
 	/* Switch back to host MMU */
 	LOAD_HOST_SEGMENTS
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+	lbz	r5, HSTATE_RESTORE_HID5(r13)
+	cmpwi	r5, 0
+	beq	no_dcbz32_off
+
+	li	r4, 0
+	mfspr   r5,SPRN_HID5
+	rldimi  r5,r4,6,56
+	mtspr   SPRN_HID5,r5
+
+no_dcbz32_off:
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+	/*
+	 * For some interrupts, we need to call the real Linux
+	 * handler, so it can do work for us. This has to happen
+	 * as if the interrupt arrived from the kernel though,
+	 * so let's fake it here where most state is restored.
+	 *
+	 * Having set up SRR0/1 with the address where we want
+	 * to continue with relocation on (potentially in module
+	 * space), we either just go straight there with rfi[d],
+	 * or we jump to an interrupt handler with bctr if there
+	 * is an interrupt to be handled first.  In the latter
+	 * case, the rfi[d] at the end of the interrupt handler
+	 * will get us back to where we want to continue.
+	 */
+
+	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
+	beq	1f
+	cmpwi	r12, BOOK3S_INTERRUPT_DECREMENTER
+	beq	1f
+	cmpwi	r12, BOOK3S_INTERRUPT_PERFMON
+1:	mtctr	r12
+
 	/* Register usage at this point:
 	 *
 	 * R1       = host R1
@@ -264,13 +334,15 @@ no_ld_last_inst:
 	 *
 	 */
 
-	/* RFI into the highmem handler */
-	mfmsr	r7
-	ori	r7, r7, MSR_IR|MSR_DR|MSR_RI|MSR_ME	/* Enable paging */
-	mtsrr1	r7
-	/* Load highmem handler address */
+	PPC_LL	r6, HSTATE_HOST_MSR(r13)
 	PPC_LL	r8, HSTATE_VMHANDLER(r13)
+
+	/* Restore host msr -> SRR1 */
+	mtsrr1	r6
+	/* Load highmem handler address */
 	mtsrr0	r8
 
+	/* RFI into the highmem handler, or jump to interrupt handler */
+	beqctr
 	RFI
 kvmppc_handler_trampoline_exit_end:
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 1/3] KVM: PPC: Assemble book3s{,_hv}_rmhandlers.S separately
From: Paul Mackerras @ 2011-07-23  7:41 UTC (permalink / raw)
  To: linuxppc-dev, kvm-ppc, Alexander Graf

This makes arch/powerpc/kvm/book3s_rmhandlers.S and
arch/powerpc/kvm/book3s_hv_rmhandlers.S be assembled as
separate compilation units rather than having them #included in
arch/powerpc/kernel/exceptions-64s.S.  We no longer have any
conditional branches between the exception prologs in
exceptions-64s.S and the KVM handlers, so there is no need to
keep their contents close together in the vmlinux image.

In their current location, they are using up part of the limited
space between the first-level interrupt handlers and the firmware
NMI data area at offset 0x7000, and with some kernel configurations
this area will overflow (e.g. allyesconfig), leading to an
"attempt to .org backwards" error when compiling exceptions-64s.S.

Moving them out requires that we add some #includes that the
book3s_{,hv_}rmhandlers.S code was previously getting implicitly
via exceptions-64s.S.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/exceptions-64s.S    |   10 ----------
 arch/powerpc/kvm/Makefile               |    3 +++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |    3 +++
 arch/powerpc/kvm/book3s_rmhandlers.S    |    3 +++
 4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 41b02c7..29ddd8b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -427,16 +427,6 @@ slb_miss_user_pseries:
 	b	.				/* prevent spec. execution */
 #endif /* __DISABLED__ */
 
-/* KVM's trampoline code needs to be close to the interrupt handlers */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_PR
-#include "../kvm/book3s_rmhandlers.S"
-#else
-#include "../kvm/book3s_hv_rmhandlers.S"
-#endif
-#endif
-
 	.align	7
 	.globl	__end_interrupts
 __end_interrupts:
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 08428e2..e161680 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -49,12 +49,15 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
 	book3s_64_mmu_host.o \
 	book3s_64_mmu.o \
 	book3s_32_mmu.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+	book3s_rmhandlers.o
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_hv.o \
 	book3s_hv_interrupts.o \
 	book3s_64_mmu_hv.o
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
+	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_64_vio_hv.o \
 	book3s_hv_builtin.o
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6dd3358..543ee50 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -20,7 +20,10 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
+#include <asm/mmu.h>
 #include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/exception-64s.h>
 
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index c1f877c..5ee66ed 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -20,6 +20,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
+#include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
 
@@ -39,6 +40,7 @@
 #define MSR_NOIRQ		MSR_KERNEL & ~(MSR_IR | MSR_DR)
 #define FUNC(name) 		GLUE(.,name)
 
+	.globl	kvmppc_skip_interrupt
 kvmppc_skip_interrupt:
 	/*
 	 * Here all GPRs are unchanged from when the interrupt happened
@@ -51,6 +53,7 @@ kvmppc_skip_interrupt:
 	rfid
 	b	.
 
+	.globl	kvmppc_skip_Hinterrupt
 kvmppc_skip_Hinterrupt:
 	/*
 	 * Here all GPRs are unchanged from when the interrupt happened
-- 
1.7.5.4

^ permalink raw reply related

* Re: [PATCH V3 2/2] cpc925_edac: support single-processor configurations
From: Dmitry Eremin-Solenikov @ 2011-07-22 22:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Harry Ciao, Paul Mackerras, linuxppc-dev, Doug Thompson
In-Reply-To: <1311372381.25044.578.camel@pasglop>

On 7/23/11, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> On Sat, 2011-07-23 at 01:56 +0400, Dmitry Eremin-Solenikov wrote:
>> On 6/29/11, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
>> > On Fri, 2011-06-17 at 16:51 +0400, Dmitry Eremin-Solenikov wrote:
>> >> If second CPU is not enabled, CPC925 EDAC driver will spill out
>> >> warnings
>> >> about errors on second Processor Interface. Support masking that out,
>> >> by detecting at runtime which CPUs are present in device tree.
>> >
>> > Doug ? Are you going to carry this or should I via powerpc.git ? There's
>> > a dependency on another patch that's going into powerpc-next ...
>>
>> I'm sorry. It's been a month ago. Is there any consensus regarding these
>> two
>> patches? Are they going in in the 3.1 merge window?
>
> There have been no response from the Doug, but I just realized we
> haven't CCing their mailing list... oh well, I'll probably send them to
> Linux myself some time next week.

Thank you very much!

-- 
With best wishes
Dmitry

^ permalink raw reply

* Re: [PATCH V3 2/2] cpc925_edac: support single-processor configurations
From: Benjamin Herrenschmidt @ 2011-07-22 22:06 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov
  Cc: Harry Ciao, Paul Mackerras, linuxppc-dev, Doug Thompson
In-Reply-To: <CALT56yNrCwvBuu+rLgryf4XwA22Qz5-ntQ=rBv056ayT8F-oVw@mail.gmail.com>

On Sat, 2011-07-23 at 01:56 +0400, Dmitry Eremin-Solenikov wrote:
> On 6/29/11, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> > On Fri, 2011-06-17 at 16:51 +0400, Dmitry Eremin-Solenikov wrote:
> >> If second CPU is not enabled, CPC925 EDAC driver will spill out warnings
> >> about errors on second Processor Interface. Support masking that out,
> >> by detecting at runtime which CPUs are present in device tree.
> >
> > Doug ? Are you going to carry this or should I via powerpc.git ? There's
> > a dependency on another patch that's going into powerpc-next ...
> 
> I'm sorry. It's been a month ago. Is there any consensus regarding these two
> patches? Are they going in in the 3.1 merge window?

There have been no response from the Doug, but I just realized we
haven't CCing their mailing list... oh well, I'll probably send them to
Linux myself some time next week.

Ben.

> > Cheers,
> > Ben.
> >
> >> Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
> >> Cc: Harry Ciao <qingtao.cao@windriver.com>
> >> Cc: Doug Thompson <dougthompson@xmission.com>
> >> Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
> >> ---
> >>  drivers/edac/cpc925_edac.c |   67
> >> ++++++++++++++++++++++++++++++++++++++++++--
> >>  1 files changed, 64 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
> >> index a687a0d..a774c0d 100644
> >> --- a/drivers/edac/cpc925_edac.c
> >> +++ b/drivers/edac/cpc925_edac.c
> >> @@ -90,6 +90,7 @@ enum apimask_bits {
> >>  	ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H |
> >>  			   APIMASK_ECC_UE_L | APIMASK_ECC_CE_L),
> >>  };
> >> +#define APIMASK_ADI(n)		CPC925_BIT(((n)+1))
> >>
> >>  /************************************************************
> >>   *	Processor Interface Exception Register (APIEXCP)
> >> @@ -581,16 +582,73 @@ static void cpc925_mc_check(struct mem_ctl_info
> >> *mci)
> >>  }
> >>
> >>  /******************** CPU err device********************************/
> >> +static u32 cpc925_cpu_mask_disabled(void)
> >> +{
> >> +	struct device_node *cpus;
> >> +	struct device_node *cpunode = NULL;
> >> +	static u32 mask = 0;
> >> +
> >> +	/* use cached value if available */
> >> +	if (mask != 0)
> >> +		return mask;
> >> +
> >> +	mask = APIMASK_ADI0 | APIMASK_ADI1;
> >> +
> >> +	cpus = of_find_node_by_path("/cpus");
> >> +	if (cpus == NULL) {
> >> +		cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
> >> +		return 0;
> >> +	}
> >> +
> >> +	while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
> >> +		const u32 *reg = of_get_property(cpunode, "reg", NULL);
> >> +
> >> +		if (strcmp(cpunode->type, "cpu")) {
> >> +			cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n",
> >> cpunode->name);
> >> +			continue;
> >> +		}
> >> +
> >> +		if (reg == NULL || *reg > 2) {
> >> +			cpc925_printk(KERN_ERR, "Bad reg value at %s\n", cpunode->full_name);
> >> +			continue;
> >> +		}
> >> +
> >> +		mask &= ~APIMASK_ADI(*reg);
> >> +	}
> >> +
> >> +	if (mask != (APIMASK_ADI0 | APIMASK_ADI1)) {
> >> +		/* We assume that each CPU sits on it's own PI and that
> >> +		 * for present CPUs the reg property equals to the PI
> >> +		 * interface id */
> >> +		cpc925_printk(KERN_WARNING,
> >> +				"Assuming PI id is equal to CPU MPIC id!\n");
> >> +	}
> >> +
> >> +	of_node_put(cpunode);
> >> +	of_node_put(cpus);
> >> +
> >> +	return mask;
> >> +}
> >> +
> >>  /* Enable CPU Errors detection */
> >>  static void cpc925_cpu_init(struct cpc925_dev_info *dev_info)
> >>  {
> >>  	u32 apimask;
> >> +	u32 cpumask;
> >>
> >>  	apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
> >> -	if ((apimask & CPU_MASK_ENABLE) == 0) {
> >> -		apimask |= CPU_MASK_ENABLE;
> >> -		__raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
> >> +
> >> +	cpumask = cpc925_cpu_mask_disabled();
> >> +	if (apimask & cpumask) {
> >> +		cpc925_printk(KERN_WARNING, "CPU(s) not present, "
> >> +				"but enabled in APIMASK, disabling\n");
> >> +		apimask &= ~cpumask;
> >>  	}
> >> +
> >> +	if ((apimask & CPU_MASK_ENABLE) == 0)
> >> +		apimask |= CPU_MASK_ENABLE;
> >> +
> >> +	__raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
> >>  }
> >>
> >>  /* Disable CPU Errors detection */
> >> @@ -622,6 +680,9 @@ static void cpc925_cpu_check(struct
> >> edac_device_ctl_info *edac_dev)
> >>  	if ((apiexcp & CPU_EXCP_DETECTED) == 0)
> >>  		return;
> >>
> >> +	if ((apiexcp & ~cpc925_cpu_mask_disabled()) == 0)
> >> +		return;
> >> +
> >>  	apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
> >>  	cpc925_printk(KERN_INFO, "Processor Interface Fault\n"
> >>  				 "Processor Interface register dump:\n");
> >
> >
> >
> 
> 

^ permalink raw reply

* Re: [PATCH V3 2/2] cpc925_edac: support single-processor configurations
From: Dmitry Eremin-Solenikov @ 2011-07-22 21:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Harry Ciao, Paul Mackerras, linuxppc-dev, Doug Thompson
In-Reply-To: <1309318559.32158.521.camel@pasglop>

On 6/29/11, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> On Fri, 2011-06-17 at 16:51 +0400, Dmitry Eremin-Solenikov wrote:
>> If second CPU is not enabled, CPC925 EDAC driver will spill out warnings
>> about errors on second Processor Interface. Support masking that out,
>> by detecting at runtime which CPUs are present in device tree.
>
> Doug ? Are you going to carry this or should I via powerpc.git ? There's
> a dependency on another patch that's going into powerpc-next ...

I'm sorry. It's been a month ago. Is there any consensus regarding these two
patches? Are they going in in the 3.1 merge window?

>
> Cheers,
> Ben.
>
>> Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
>> Cc: Harry Ciao <qingtao.cao@windriver.com>
>> Cc: Doug Thompson <dougthompson@xmission.com>
>> Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
>> ---
>>  drivers/edac/cpc925_edac.c |   67
>> ++++++++++++++++++++++++++++++++++++++++++--
>>  1 files changed, 64 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
>> index a687a0d..a774c0d 100644
>> --- a/drivers/edac/cpc925_edac.c
>> +++ b/drivers/edac/cpc925_edac.c
>> @@ -90,6 +90,7 @@ enum apimask_bits {
>>  	ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H |
>>  			   APIMASK_ECC_UE_L | APIMASK_ECC_CE_L),
>>  };
>> +#define APIMASK_ADI(n)		CPC925_BIT(((n)+1))
>>
>>  /************************************************************
>>   *	Processor Interface Exception Register (APIEXCP)
>> @@ -581,16 +582,73 @@ static void cpc925_mc_check(struct mem_ctl_info
>> *mci)
>>  }
>>
>>  /******************** CPU err device********************************/
>> +static u32 cpc925_cpu_mask_disabled(void)
>> +{
>> +	struct device_node *cpus;
>> +	struct device_node *cpunode = NULL;
>> +	static u32 mask = 0;
>> +
>> +	/* use cached value if available */
>> +	if (mask != 0)
>> +		return mask;
>> +
>> +	mask = APIMASK_ADI0 | APIMASK_ADI1;
>> +
>> +	cpus = of_find_node_by_path("/cpus");
>> +	if (cpus == NULL) {
>> +		cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
>> +		return 0;
>> +	}
>> +
>> +	while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
>> +		const u32 *reg = of_get_property(cpunode, "reg", NULL);
>> +
>> +		if (strcmp(cpunode->type, "cpu")) {
>> +			cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n",
>> cpunode->name);
>> +			continue;
>> +		}
>> +
>> +		if (reg == NULL || *reg > 2) {
>> +			cpc925_printk(KERN_ERR, "Bad reg value at %s\n", cpunode->full_name);
>> +			continue;
>> +		}
>> +
>> +		mask &= ~APIMASK_ADI(*reg);
>> +	}
>> +
>> +	if (mask != (APIMASK_ADI0 | APIMASK_ADI1)) {
>> +		/* We assume that each CPU sits on it's own PI and that
>> +		 * for present CPUs the reg property equals to the PI
>> +		 * interface id */
>> +		cpc925_printk(KERN_WARNING,
>> +				"Assuming PI id is equal to CPU MPIC id!\n");
>> +	}
>> +
>> +	of_node_put(cpunode);
>> +	of_node_put(cpus);
>> +
>> +	return mask;
>> +}
>> +
>>  /* Enable CPU Errors detection */
>>  static void cpc925_cpu_init(struct cpc925_dev_info *dev_info)
>>  {
>>  	u32 apimask;
>> +	u32 cpumask;
>>
>>  	apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
>> -	if ((apimask & CPU_MASK_ENABLE) == 0) {
>> -		apimask |= CPU_MASK_ENABLE;
>> -		__raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
>> +
>> +	cpumask = cpc925_cpu_mask_disabled();
>> +	if (apimask & cpumask) {
>> +		cpc925_printk(KERN_WARNING, "CPU(s) not present, "
>> +				"but enabled in APIMASK, disabling\n");
>> +		apimask &= ~cpumask;
>>  	}
>> +
>> +	if ((apimask & CPU_MASK_ENABLE) == 0)
>> +		apimask |= CPU_MASK_ENABLE;
>> +
>> +	__raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
>>  }
>>
>>  /* Disable CPU Errors detection */
>> @@ -622,6 +680,9 @@ static void cpc925_cpu_check(struct
>> edac_device_ctl_info *edac_dev)
>>  	if ((apiexcp & CPU_EXCP_DETECTED) == 0)
>>  		return;
>>
>> +	if ((apiexcp & ~cpc925_cpu_mask_disabled()) == 0)
>> +		return;
>> +
>>  	apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
>>  	cpc925_printk(KERN_INFO, "Processor Interface Fault\n"
>>  				 "Processor Interface register dump:\n");
>
>
>


-- 
With best wishes
Dmitry

^ permalink raw reply

* Re: [PATCH 13/14] 85xx: consolidate of_platform_bus_probe calls
From: Dmitry Eremin-Solenikov @ 2011-07-22 21:45 UTC (permalink / raw)
  To: Scott Wood; +Cc: Paul Mackerras, Linux PPC Development
In-Reply-To: <20110722152911.364d75dd@schlenkerla.am.freescale.net>

On 7/23/11, Scott Wood <scottwood@freescale.com> wrote:
> On Fri, 22 Jul 2011 23:44:01 +0400
> Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> wrote:
>
>> On 7/19/11, Scott Wood <scottwood@freescale.com> wrote:
>> > On Tue, 19 Jul 2011 12:53:50 +0400
>> > Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> wrote:
>> >
>> >> +static struct of_device_id __initdata mpc85xx_common_ids[] = {
>> >> +	{ .type = "soc", },
>> >> +	{ .compatible = "soc", },
>> >> +	{ .compatible = "simple-bus", },
>> >> +	{ .compatible = "gianfar", },
>> >> +	{ .compatible = "fsl,qe", },
>> >> +	{ .compatible = "fsl,cpm2", },
>> >> +	{},
>> >> +};
>> >
>> > Same comment as for 83xx regarding localbus and compatibility with old
>> > device trees.
>>
>> I checked for in-kernel device trees. Unless I miss something, there are
>> no
>> leftovers from this list. (83xx provided no simple-bus property for
>> localbus,
>> so your argument is valid there). If we should care about strange cases,
>> not even blessed by kernel trees, I can add some of the old probes back.
>
> I see simple-bus missing in sbc8560 and ksi8560 -- were these included in
> the consolidation?  Plus some of the others may have had simple-bus added
> after their initial version.

Patches for those are included in the patch serie. Kumar has applied them
to next-3.2.

> As for out-of-tree trees (which could include trees dynamically
> generated/augmented by firmware, as well as device trees for custom boards
> that forked off of an old reference board tree), it's still nice to not
> break them as long as they stick to the binding.

I see your point. I just wasn't thinking too much about ot-of-tree trees.
My thought was that if someone updates the kernel, he can also update the dtb.

> While the localbus binding is deficient regarding the compatible property,
> IIRC localbus preceded the introduction of simple-bus, which appears to be
> defined only in ePAPR (not in Linux or on devicetree.org).  The ePAPR
> language does not suggest that it's mandatory for all buses that don't need
> special handling -- in fact, the language could be read as suggesting that
> it's only applicable to the "internal I/O bus" on an SoC (whereas this
> is an external bus), though that wasn't the intent behind it.

Could you please update the lbc.txt suggesting the compatibility
with simple-bus for lbc? Or you thing that it would be wrong?

I think we should define compatibility list as "fsl,mpcXXXX-localbus",
"fsl,pqXXXXX-localbus", "simple-bus", noting that by default new
platforms/boards should only use "simple-bus" internally. Does this
look reasonable for you? I can then try to provide a patch.

> The notion of probing buses isn't really a part of the device tree specs;
> they're more concerned with binding the devices themselves.  In theory
> Linux should probably be probing everything that a driver will match,
> regardless of where in the tree it is, except where an ancestor node is
> diasbled, has matched a driver that wants to do things differently, or is
> on a blacklist.  Of course, that's somewhat of a philosophical question on
> whether it's better to risk probing someting that shouldn't be, or not
> probing something that should be.  The former is often nastier but more
> obvious, the latter is more likely until simple-bus is more widely used,
> and either one results in something not working.

> Leaving the localbus in may help someone, and it shouldn't hurt anything.

What do you suggest/prefer? To add .name="localbus" to generic code
or to have board-specific hooks (like one for mpc834xemitx)?

So,

>
> -Scott
>
>


-- 
With best wishes
Dmitry

^ permalink raw reply

* Re: [PATCH 2/5] hugetlb: add phys addr to struct huge_bootmem_page
From: Becky Bruce @ 2011-07-22 21:08 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linuxppc-dev, linux-kernel, david
In-Reply-To: <20110721154427.53e8fd5b.akpm@linux-foundation.org>


On Jul 21, 2011, at 5:44 PM, Andrew Morton wrote:

> On Tue, 28 Jun 2011 14:54:45 -0500
> Becky Bruce <beckyb@kernel.crashing.org> wrote:
>=20
>> From: Becky Bruce <beckyb@kernel.crashing.org>
>>=20
>> This is needed on HIGHMEM systems - we don't always have a virtual
>> address so store the physical address and map it in as needed.
>>=20
>> Signed-off-by: Becky Bruce <beckyb@kernel.crashing.org>
>> ---
>> include/linux/hugetlb.h |    3 +++
>> mm/hugetlb.c            |    8 +++++++-
>> 2 files changed, 10 insertions(+), 1 deletions(-)
>>=20
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 59225ef..19644e0 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -231,6 +231,9 @@ struct hstate {
>> struct huge_bootmem_page {
>> 	struct list_head list;
>> 	struct hstate *hstate;
>> +#ifdef CONFIG_HIGHMEM
>> +	phys_addr_t phys;
>> +#endif
>> };
>>=20
>> struct page *alloc_huge_page_node(struct hstate *h, int nid);
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index 6402458..2db81ea 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -1105,8 +1105,14 @@ static void __init =
gather_bootmem_prealloc(void)
>> 	struct huge_bootmem_page *m;
>>=20
>> 	list_for_each_entry(m, &huge_boot_pages, list) {
>> -		struct page *page =3D virt_to_page(m);
>> 		struct hstate *h =3D m->hstate;
>> +#ifdef CONFIG_HIGHMEM
>> +		struct page *page =3D pfn_to_page(m->phys >> =
PAGE_SHIFT);
>> +		free_bootmem_late((unsigned long)m,
>> +				  sizeof(struct huge_bootmem_page));
>> +#else
>> +		struct page *page =3D virt_to_page(m);
>> +#endif
>> 		__ClearPageReserved(page);
>> 		WARN_ON(page_count(page) !=3D 1);
>> 		prep_compound_huge_page(page, h->order);
>=20
> nit: wrapping both definitions and statements in an ifdef like this is
> a bit nasty from a readability and maintainability point of view - =
it's
> inviting people to later make changes which fail to compile when the
> config option is flipped.
>=20
> This is better:
>=20
> --- =
a/mm/hugetlb.c~hugetlb-add-phys-addr-to-struct-huge_bootmem_page-fix
> +++ a/mm/hugetlb.c
> @@ -1106,12 +1106,14 @@ static void __init gather_bootmem_preall
>=20
> 	list_for_each_entry(m, &huge_boot_pages, list) {
> 		struct hstate *h =3D m->hstate;
> +		struct page *page;
> +
> #ifdef CONFIG_HIGHMEM
> -		struct page *page =3D pfn_to_page(m->phys >> =
PAGE_SHIFT);
> +		page =3D pfn_to_page(m->phys >> PAGE_SHIFT);
> 		free_bootmem_late((unsigned long)m,
> 				  sizeof(struct huge_bootmem_page));
> #else
> -		struct page *page =3D virt_to_page(m);
> +		page =3D virt_to_page(m);
> #endif
> 		__ClearPageReserved(page);
> 		WARN_ON(page_count(page) !=3D 1);

Andrew,

I totally agree, this is better, thanks.   I see you've put my original =
patch and your fix into -mm;  I'd like it to percolate there for a bit =
before it goes to Linus to be sure I haven't broken anything.  It should =
be safe, as I don't think this particular function was ever expected to =
work on highmem platforms, but it's possible I'm overlooking something.

Cheers,
-Becky

^ permalink raw reply

* [PATCH v2] powerpc/85xx: enable caam crypto driver by default
From: Kim Phillips @ 2011-07-22 20:48 UTC (permalink / raw)
  To: linuxppc-dev, Kumar Gala; +Cc: Kumar Gala
In-Reply-To: <20110605173958.1073f245.kim.phillips@freescale.com>

corenet based SoCs have SEC4 h/w, so enable the SEC4 driver,
caam, and the algorithms it supports, and disable the
SEC2/3 driver, talitos.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
---
v2: rebase from old e55xx_smp_defconfig file to new corenet & p1023
defconfigs

 arch/powerpc/configs/85xx/p1023rds_defconfig |    1 +
 arch/powerpc/configs/corenet32_smp_defconfig |    1 +
 arch/powerpc/configs/corenet64_smp_defconfig |    5 ++++-
 3 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/configs/85xx/p1023rds_defconfig b/arch/powerpc/configs/85xx/p1023rds_defconfig
index 980ff8f..3ff5a81 100644
--- a/arch/powerpc/configs/85xx/p1023rds_defconfig
+++ b/arch/powerpc/configs/85xx/p1023rds_defconfig
@@ -171,3 +171,4 @@ CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
index 10562a5..4311d02 100644
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -185,3 +185,4 @@ CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index d322835..c92c204 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -100,5 +100,8 @@ CONFIG_DEBUG_INFO=y
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_VIRQ_DEBUG=y
 CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
-- 
1.7.6

^ permalink raw reply related

* Re: [PATCH V2 2/3] powerpc/83xx: consolidate of_platform_bus_probe calls
From: Scott Wood @ 2011-07-22 20:37 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: Linux PPC Development
In-Reply-To: <1311364544-9413-3-git-send-email-dbaryshkov@gmail.com>

On Fri, 22 Jul 2011 23:55:43 +0400
Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> wrote:

> 83xx board files have a lot of duplication in
> *_declare_of_platform_devices() functions. Merge that into a single
> function common to most of the boards.
> 
> The only leftover is mpc834x_itx.c board file which explicitly asks for
> fsl,pq2pro-localbus, as corresponding bindings don't provide
> "simple-bus" compatibility in localbus node.

What if simple-bus is added to that board's device tree in the future?
Won't we end up probing it twice?

-Scott

^ permalink raw reply

* Re: [PATCH 13/14] 85xx: consolidate of_platform_bus_probe calls
From: Scott Wood @ 2011-07-22 20:29 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: Paul Mackerras, Linux PPC Development
In-Reply-To: <CALT56yMxttzC+WZMhja3-PqneMMba5rs9j25RiSAUrwEA_jyaw@mail.gmail.com>

On Fri, 22 Jul 2011 23:44:01 +0400
Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> wrote:

> On 7/19/11, Scott Wood <scottwood@freescale.com> wrote:
> > On Tue, 19 Jul 2011 12:53:50 +0400
> > Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> wrote:
> >
> >> +static struct of_device_id __initdata mpc85xx_common_ids[] = {
> >> +	{ .type = "soc", },
> >> +	{ .compatible = "soc", },
> >> +	{ .compatible = "simple-bus", },
> >> +	{ .compatible = "gianfar", },
> >> +	{ .compatible = "fsl,qe", },
> >> +	{ .compatible = "fsl,cpm2", },
> >> +	{},
> >> +};
> >
> > Same comment as for 83xx regarding localbus and compatibility with old
> > device trees.
> 
> I checked for in-kernel device trees. Unless I miss something, there are no
> leftovers from this list. (83xx provided no simple-bus property for localbus,
> so your argument is valid there). If we should care about strange cases,
> not even blessed by kernel trees, I can add some of the old probes back.

I see simple-bus missing in sbc8560 and ksi8560 -- were these included in
the consolidation?  Plus some of the others may have had simple-bus added
after their initial version.

As for out-of-tree trees (which could include trees dynamically
generated/augmented by firmware, as well as device trees for custom boards
that forked off of an old reference board tree), it's still nice to not
break them as long as they stick to the binding.

While the localbus binding is deficient regarding the compatible property,
IIRC localbus preceded the introduction of simple-bus, which appears to be
defined only in ePAPR (not in Linux or on devicetree.org).  The ePAPR
language does not suggest that it's mandatory for all buses that don't need
special handling -- in fact, the language could be read as suggesting that
it's only applicable to the "internal I/O bus" on an SoC (whereas this
is an external bus), though that wasn't the intent behind it.

The notion of probing buses isn't really a part of the device tree specs;
they're more concerned with binding the devices themselves.  In theory
Linux should probably be probing everything that a driver will match,
regardless of where in the tree it is, except where an ancestor node is
diasbled, has matched a driver that wants to do things differently, or is
on a blacklist.  Of course, that's somewhat of a philosophical question on
whether it's better to risk probing someting that shouldn't be, or not
probing something that should be.  The former is often nastier but more
obvious, the latter is more likely until simple-bus is more widely used,
and either one results in something not working.

Leaving the localbus in may help someone, and it shouldn't hurt anything.

-Scott

^ permalink raw reply

* Re: [openmcapi-dev] [PATCH] powerpc: Exporting boot_cpuid_phys
From: Kumar Gala @ 2011-07-22 20:28 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: openmcapi-dev, Andrew Gabbasov, linuxppc-dev
In-Reply-To: <4E29CB58.10103@mentor.com>


On Jul 22, 2011, at 2:11 PM, Hollis Blanchard wrote:

> On 07/16/2011 06:22 AM, Andrew Gabbasov wrote:
>> Kernel loadable module can use hard_smp_processor_id() if building =
with SMP
>> kernel. In order to make it work for UP kernels too, boot_cpuid_phys
>> symbol (which is what hard_smp_processor_id() macro resolves to
>> in non-SMP configuration) must be exported.
>>=20
>> Signed-off-by: Andrew Gabbasov<andrew_gabbasov@mentor.com>
>> ---
>>  arch/powerpc/kernel/setup_32.c |    1 +
>>  1 files changed, 1 insertions(+), 0 deletions(-)
>>=20
>> diff --git a/arch/powerpc/kernel/setup_32.c =
b/arch/powerpc/kernel/setup_32.c
>> index 1d2fbc9..3dffce6 100644
>> --- a/arch/powerpc/kernel/setup_32.c
>> +++ b/arch/powerpc/kernel/setup_32.c
>> @@ -49,6 +49,7 @@ extern void bootx_init(unsigned long r4, unsigned =
long phys);
>>  int boot_cpuid =3D -1;
>>  EXPORT_SYMBOL_GPL(boot_cpuid);
>>  int boot_cpuid_phys;
>> +EXPORT_SYMBOL_GPL(boot_cpuid_phys);
>>=20
>>  int smp_hw_index[NR_CPUS];
>=20
> Ben, ping?

Its in Benh's next branch.

- k=

^ permalink raw reply

* [PATCH V2 2/2] powerpc/85xx: separate cpm2 pic init
From: Dmitry Eremin-Solenikov @ 2011-07-22 20:23 UTC (permalink / raw)
  To: Linux PPC Development
In-Reply-To: <1311366217-15384-1-git-send-email-dbaryshkov@gmail.com>

Separate handling of CPM2 PIC initialization to mpc85xx_cpm2_pic_init()
function.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 arch/powerpc/platforms/85xx/ksi8560.c        |   29 +------------------
 arch/powerpc/platforms/85xx/mpc85xx.h        |    7 ++++
 arch/powerpc/platforms/85xx/mpc85xx_ads.c    |   33 +---------------------
 arch/powerpc/platforms/85xx/mpc85xx_common.c |   39 ++++++++++++++++++++++++++
 arch/powerpc/platforms/85xx/sbc8560.c        |   33 +---------------------
 arch/powerpc/platforms/85xx/stx_gp3.c        |   35 +----------------------
 arch/powerpc/platforms/85xx/tqm85xx.c        |   35 +----------------------
 7 files changed, 51 insertions(+), 160 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index 7657e1a..3c325b1 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -33,7 +33,6 @@
 #include <sysdev/fsl_pci.h>
 
 #include <asm/cpm2.h>
-#include <sysdev/cpm2_pic.h>
 
 #include "mpc85xx.h"
 
@@ -55,25 +54,11 @@ static void machine_restart(char *cmd)
 	for (;;);
 }
 
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int cascade_irq;
-
-	while ((cascade_irq = cpm2_get_irq()) >= 0)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
-
 static void __init ksi8560_pic_init(void)
 {
 	struct mpic *mpic;
 	struct resource r;
 	struct device_node *np;
-#ifdef CONFIG_CPM2
-	int irq;
-#endif
 
 	np = of_find_node_by_type(NULL, "open-pic");
 
@@ -96,19 +81,7 @@ static void __init ksi8560_pic_init(void)
 
 	mpic_init(mpic);
 
-#ifdef CONFIG_CPM2
-	/* Setup CPM2 PIC */
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
-	if (np == NULL) {
-		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
-		return;
-	}
-	irq = irq_of_parse_and_map(np, 0);
-
-	cpm2_pic_init(np);
-	of_node_put(np);
-	irq_set_chained_handler(irq, cpm2_cascade);
-#endif
+	mpc85xx_cpm2_pic_init();
 }
 
 #ifdef CONFIG_CPM2
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
index 1a1b4eb..2aa7c5d 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx.h
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -1,4 +1,11 @@
 #ifndef MPC85xx_H
 #define MPC85xx_H
 extern int mpc85xx_common_publish_devices(void);
+
+#ifdef CONFIG_CPM2
+extern void mpc85xx_cpm2_pic_init(void);
+#else
+static inline void __init mpc85xx_cpm2_pic_init(void) {}
+#endif /* CONFIG_CPM2 */
+
 #endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 153d2aa..8df4ff5 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -32,7 +32,6 @@
 
 #ifdef CONFIG_CPM2
 #include <asm/cpm2.h>
-#include <sysdev/cpm2_pic.h>
 #endif
 
 #include "mpc85xx.h"
@@ -48,29 +47,11 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
 }
 #endif /* CONFIG_PCI */
 
-#ifdef CONFIG_CPM2
-
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int cascade_irq;
-
-	while ((cascade_irq = cpm2_get_irq()) >= 0)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
-
-#endif /* CONFIG_CPM2 */
-
 static void __init mpc85xx_ads_pic_init(void)
 {
 	struct mpic *mpic;
 	struct resource r;
 	struct device_node *np = NULL;
-#ifdef CONFIG_CPM2
-	int irq;
-#endif
 
 	np = of_find_node_by_type(np, "open-pic");
 	if (!np) {
@@ -92,19 +73,7 @@ static void __init mpc85xx_ads_pic_init(void)
 
 	mpic_init(mpic);
 
-#ifdef CONFIG_CPM2
-	/* Setup CPM2 PIC */
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
-	if (np == NULL) {
-		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
-		return;
-	}
-	irq = irq_of_parse_and_map(np, 0);
-
-	cpm2_pic_init(np);
-	of_node_put(np);
-	irq_set_chained_handler(irq, cpm2_cascade);
-#endif
+	mpc85xx_cpm2_pic_init();
 }
 
 /*
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_common.c b/arch/powerpc/platforms/85xx/mpc85xx_common.c
index 999567a..4fdf382 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_common.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_common.c
@@ -8,6 +8,8 @@
 #include <linux/kernel.h>
 #include <linux/of_platform.h>
 
+#include <sysdev/cpm2_pic.h>
+
 #include "mpc85xx.h"
 
 static struct of_device_id __initdata mpc85xx_common_ids[] = {
@@ -24,3 +26,40 @@ int __init mpc85xx_common_publish_devices(void)
 {
 	return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
 }
+
+#ifdef CONFIG_CPM2
+static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int cascade_irq;
+
+	while ((cascade_irq = cpm2_get_irq()) >= 0)
+		generic_handle_irq(cascade_irq);
+
+	chip->irq_eoi(&desc->irq_data);
+}
+
+
+void __init mpc85xx_cpm2_pic_init(void)
+{
+	struct device_node *np;
+	int irq;
+
+	/* Setup CPM2 PIC */
+	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
+	if (np == NULL) {
+		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
+		return;
+	}
+	irq = irq_of_parse_and_map(np, 0);
+	if (irq == NO_IRQ) {
+		of_node_put(np);
+		printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
+		return;
+	}
+
+	cpm2_pic_init(np);
+	of_node_put(np);
+	irq_set_chained_handler(irq, cpm2_cascade);
+}
+#endif
diff --git a/arch/powerpc/platforms/85xx/sbc8560.c b/arch/powerpc/platforms/85xx/sbc8560.c
index bbb656f..ce22afa 100644
--- a/arch/powerpc/platforms/85xx/sbc8560.c
+++ b/arch/powerpc/platforms/85xx/sbc8560.c
@@ -34,34 +34,15 @@
 
 #ifdef CONFIG_CPM2
 #include <asm/cpm2.h>
-#include <sysdev/cpm2_pic.h>
 #endif
 
 #include "mpc85xx.h"
 
-#ifdef CONFIG_CPM2
-
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int cascade_irq;
-
-	while ((cascade_irq = cpm2_get_irq()) >= 0)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
-
-#endif /* CONFIG_CPM2 */
-
 static void __init sbc8560_pic_init(void)
 {
 	struct mpic *mpic;
 	struct resource r;
 	struct device_node *np = NULL;
-#ifdef CONFIG_CPM2
-	int irq;
-#endif
 
 	np = of_find_node_by_type(np, "open-pic");
 	if (!np) {
@@ -83,19 +64,7 @@ static void __init sbc8560_pic_init(void)
 
 	mpic_init(mpic);
 
-#ifdef CONFIG_CPM2
-	/* Setup CPM2 PIC */
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
-	if (np == NULL) {
-		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
-		return;
-	}
-	irq = irq_of_parse_and_map(np, 0);
-
-	cpm2_pic_init(np);
-	of_node_put(np);
-	irq_set_chained_handler(irq, cpm2_cascade);
-#endif
+	mpc85xx_cpm2_pic_init();
 }
 
 /*
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index ab80044..b44c936 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -44,18 +44,6 @@
 
 #ifdef CONFIG_CPM2
 #include <asm/cpm2.h>
-#include <sysdev/cpm2_pic.h>
-
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int cascade_irq;
-
-	while ((cascade_irq = cpm2_get_irq()) >= 0)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
 #endif /* CONFIG_CPM2 */
 
 static void __init stx_gp3_pic_init(void)
@@ -63,9 +51,6 @@ static void __init stx_gp3_pic_init(void)
 	struct mpic *mpic;
 	struct resource r;
 	struct device_node *np;
-#ifdef CONFIG_CPM2
-	int irq;
-#endif
 
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (!np) {
@@ -87,25 +72,7 @@ static void __init stx_gp3_pic_init(void)
 
 	mpic_init(mpic);
 
-#ifdef CONFIG_CPM2
-	/* Setup CPM2 PIC */
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
-	if (np == NULL) {
-		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
-		return;
-	}
-	irq = irq_of_parse_and_map(np, 0);
-
-	if (irq == NO_IRQ) {
-		of_node_put(np);
-		printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
-		return;
-	}
-
-	cpm2_pic_init(np);
-	of_node_put(np);
-	irq_set_chained_handler(irq, cpm2_cascade);
-#endif
+	mpc85xx_cpm2_pic_init();
 }
 
 /*
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 19e711f..2418bf8 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -42,18 +42,6 @@
 
 #ifdef CONFIG_CPM2
 #include <asm/cpm2.h>
-#include <sysdev/cpm2_pic.h>
-
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
-{
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int cascade_irq;
-
-	while ((cascade_irq = cpm2_get_irq()) >= 0)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
 #endif /* CONFIG_CPM2 */
 
 static void __init tqm85xx_pic_init(void)
@@ -61,9 +49,6 @@ static void __init tqm85xx_pic_init(void)
 	struct mpic *mpic;
 	struct resource r;
 	struct device_node *np;
-#ifdef CONFIG_CPM2
-	int irq;
-#endif
 
 	np = of_find_node_by_type(NULL, "open-pic");
 	if (!np) {
@@ -85,25 +70,7 @@ static void __init tqm85xx_pic_init(void)
 
 	mpic_init(mpic);
 
-#ifdef CONFIG_CPM2
-	/* Setup CPM2 PIC */
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
-	if (np == NULL) {
-		printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
-		return;
-	}
-	irq = irq_of_parse_and_map(np, 0);
-
-	if (irq == NO_IRQ) {
-		of_node_put(np);
-		printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
-		return;
-	}
-
-	cpm2_pic_init(np);
-	of_node_put(np);
-	irq_set_chained_handler(irq, cpm2_cascade);
-#endif
+	mpc85xx_cpm2_pic_init();
 }
 
 /*
-- 
1.7.2.5

^ permalink raw reply related

* [PATCH V2 1/2] powerpc/85xx: consolidate of_platform_bus_probe calls
From: Dmitry Eremin-Solenikov @ 2011-07-22 20:23 UTC (permalink / raw)
  To: Linux PPC Development
In-Reply-To: <1311366217-15384-1-git-send-email-dbaryshkov@gmail.com>

85xx board files have a lot of duplication in *_publish_devices()/
*_declare_of_platform_devices() functions. Merge that into a single
function common to most of the boards.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 arch/powerpc/platforms/85xx/Makefile         |    2 +
 arch/powerpc/platforms/85xx/ksi8560.c        |   18 +---------
 arch/powerpc/platforms/85xx/mpc8536_ds.c     |   16 ++-------
 arch/powerpc/platforms/85xx/mpc85xx.h        |    4 ++
 arch/powerpc/platforms/85xx/mpc85xx_ads.c    |   20 ++---------
 arch/powerpc/platforms/85xx/mpc85xx_cds.c    |   16 ++-------
 arch/powerpc/platforms/85xx/mpc85xx_common.c |   26 ++++++++++++++
 arch/powerpc/platforms/85xx/mpc85xx_ds.c     |   20 +++--------
 arch/powerpc/platforms/85xx/mpc85xx_mds.c    |   46 ++-----------------------
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c    |   18 ++--------
 arch/powerpc/platforms/85xx/p1010rdb.c       |   15 ++-------
 arch/powerpc/platforms/85xx/p1022_ds.c       |    7 ++--
 arch/powerpc/platforms/85xx/p1023_rds.c      |   19 ++---------
 arch/powerpc/platforms/85xx/sbc8548.c        |   18 ++--------
 arch/powerpc/platforms/85xx/sbc8560.c        |   20 ++---------
 arch/powerpc/platforms/85xx/socrates.c       |   13 +------
 arch/powerpc/platforms/85xx/stx_gp3.c        |   16 ++-------
 arch/powerpc/platforms/85xx/tqm85xx.c        |   16 ++-------
 arch/powerpc/platforms/85xx/xes_mpc85xx.c    |   20 +++--------
 19 files changed, 84 insertions(+), 246 deletions(-)
 create mode 100644 arch/powerpc/platforms/85xx/mpc85xx.h
 create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_common.c

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index a971b32..172a4c8 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -3,6 +3,8 @@
 #
 obj-$(CONFIG_SMP) += smp.o
 
+obj-y += mpc85xx_common.o
+
 obj-$(CONFIG_MPC8540_ADS) += mpc85xx_ads.o
 obj-$(CONFIG_MPC8560_ADS) += mpc85xx_ads.o
 obj-$(CONFIG_MPC85xx_CDS) += mpc85xx_cds.o
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index c46f935..7657e1a 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -35,6 +35,7 @@
 #include <asm/cpm2.h>
 #include <sysdev/cpm2_pic.h>
 
+#include "mpc85xx.h"
 
 #define KSI8560_CPLD_HVR		0x04 /* Hardware Version Register */
 #define KSI8560_CPLD_PVR		0x08 /* PLD Version Register */
@@ -215,22 +216,7 @@ static void ksi8560_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .type = "soc", },
-	{ .type = "simple-bus", },
-	{ .name = "cpm", },
-	{ .name = "localbus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(ksi8560, declare_of_platform_devices);
+machine_device_initcall(ksi8560, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index f79f2f1..9ee6455 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -32,6 +32,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 void __init mpc8536_ds_pic_init(void)
 {
 	struct mpic *mpic;
@@ -104,19 +106,7 @@ static void __init mpc8536_ds_setup_arch(void)
 	printk("MPC8536 DS board from Freescale Semiconductor\n");
 }
 
-static struct of_device_id __initdata mpc8536_ds_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init mpc8536_ds_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, mpc8536_ds_ids, NULL);
-}
-machine_device_initcall(mpc8536_ds, mpc8536_ds_publish_devices);
+machine_device_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
new file mode 100644
index 0000000..1a1b4eb
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -0,0 +1,4 @@
+#ifndef MPC85xx_H
+#define MPC85xx_H
+extern int mpc85xx_common_publish_devices(void);
+#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 3b2c9bb..153d2aa 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -35,6 +35,8 @@
 #include <sysdev/cpm2_pic.h>
 #endif
 
+#include "mpc85xx.h"
+
 #ifdef CONFIG_PCI
 static int mpc85xx_exclude_device(struct pci_controller *hose,
 				   u_char bus, u_char devfn)
@@ -221,23 +223,7 @@ static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .name = "soc", },
-	{ .type = "soc", },
-	{ .name = "cpm", },
-	{ .name = "localbus", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(mpc85xx_ads, declare_of_platform_devices);
+machine_device_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 6299a2a..a978d46 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -47,6 +47,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 /* CADMUS info */
 /* xxx - galak, move into device tree */
 #define CADMUS_BASE (0xf8004000)
@@ -331,19 +333,7 @@ static int __init mpc85xx_cds_probe(void)
         return of_flat_dt_is_compatible(root, "MPC85xxCDS");
 }
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	return of_platform_bus_probe(NULL, of_bus_ids, NULL);
-}
-machine_device_initcall(mpc85xx_cds, declare_of_platform_devices);
+machine_device_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
 
 define_machine(mpc85xx_cds) {
 	.name		= "MPC85xx CDS",
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_common.c b/arch/powerpc/platforms/85xx/mpc85xx_common.c
new file mode 100644
index 0000000..999567a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_common.c
@@ -0,0 +1,26 @@
+/*
+ * Routines common to most mpc85xx-based boards.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+
+#include "mpc85xx.h"
+
+static struct of_device_id __initdata mpc85xx_common_ids[] = {
+	{ .type = "soc", },
+	{ .compatible = "soc", },
+	{ .compatible = "simple-bus", },
+	{ .compatible = "gianfar", },
+	{ .compatible = "fsl,qe", },
+	{ .compatible = "fsl,cpm2", },
+	{},
+};
+
+int __init mpc85xx_common_publish_devices(void)
+{
+	return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index c7b97f7..4508804 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -36,6 +36,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -218,21 +220,9 @@ static int __init mpc8544_ds_probe(void)
 	return 0;
 }
 
-static struct of_device_id __initdata mpc85xxds_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init mpc85xxds_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, mpc85xxds_ids, NULL);
-}
-machine_device_initcall(mpc8544_ds, mpc85xxds_publish_devices);
-machine_device_initcall(mpc8572_ds, mpc85xxds_publish_devices);
-machine_device_initcall(p2020_ds, mpc85xxds_publish_devices);
+machine_device_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
+machine_device_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
+machine_device_initcall(p2020_ds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
 machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 747d1ee..fde37e5 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -53,6 +53,8 @@
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
 
+#include "mpc85xx.h"
+
 #undef DEBUG
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -159,25 +161,6 @@ extern void __init mpc85xx_smp_init(void);
 #endif
 
 #ifdef CONFIG_QUICC_ENGINE
-static struct of_device_id mpc85xx_qe_ids[] __initdata = {
-	{ .type = "qe", },
-	{ .compatible = "fsl,qe", },
-	{ },
-};
-
-static void __init mpc85xx_publish_qe_devices(void)
-{
-	struct device_node *np;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!of_device_is_available(np)) {
-		of_node_put(np);
-		return;
-	}
-
-	of_platform_bus_probe(NULL, mpc85xx_qe_ids, NULL);
-}
-
 static void __init mpc85xx_mds_reset_ucc_phys(void)
 {
 	struct device_node *np;
@@ -348,7 +331,6 @@ static void __init mpc85xx_mds_qeic_init(void)
 	of_node_put(np);
 }
 #else
-static void __init mpc85xx_publish_qe_devices(void) { }
 static void __init mpc85xx_mds_qe_init(void) { }
 static void __init mpc85xx_mds_qeic_init(void) { }
 #endif	/* CONFIG_QUICC_ENGINE */
@@ -430,24 +412,12 @@ machine_arch_initcall(mpc8568_mds, board_fixups);
 machine_arch_initcall(mpc8569_mds, board_fixups);
 
 static struct of_device_id mpc85xx_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
 	{ .compatible = "fsl,rapidio-delta", },
 	{ .compatible = "fsl,mpc8548-guts", },
 	{ .compatible = "gpio-leds", },
 	{},
 };
 
-static struct of_device_id p1021_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
 static int __init mpc85xx_publish_devices(void)
 {
 	if (machine_is(mpc8568_mds))
@@ -455,23 +425,15 @@ static int __init mpc85xx_publish_devices(void)
 	if (machine_is(mpc8569_mds))
 		simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
 
+	mpc85xx_common_publish_devices();
 	of_platform_bus_probe(NULL, mpc85xx_ids, NULL);
-	mpc85xx_publish_qe_devices();
-
-	return 0;
-}
-
-static int __init p1021_publish_devices(void)
-{
-	of_platform_bus_probe(NULL, p1021_ids, NULL);
-	mpc85xx_publish_qe_devices();
 
 	return 0;
 }
 
 machine_device_initcall(mpc8568_mds, mpc85xx_publish_devices);
 machine_device_initcall(mpc8569_mds, mpc85xx_publish_devices);
-machine_device_initcall(p1021_mds, p1021_publish_devices);
+machine_device_initcall(p1021_mds, mpc85xx_common_publish_devices);
 
 machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
 machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index 088f30b..19d717e 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -30,6 +30,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -108,20 +110,8 @@ static void __init mpc85xx_rdb_setup_arch(void)
 	printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n");
 }
 
-static struct of_device_id __initdata mpc85xxrdb_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init mpc85xxrdb_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, mpc85xxrdb_ids, NULL);
-}
-machine_device_initcall(p2020_rdb, mpc85xxrdb_publish_devices);
-machine_device_initcall(p1020_rdb, mpc85xxrdb_publish_devices);
+machine_device_initcall(p2020_rdb, mpc85xx_common_publish_devices);
+machine_device_initcall(p1020_rdb, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index d7387fa..2f479f8 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -28,6 +28,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 void __init p1010_rdb_pic_init(void)
 {
 	struct mpic *mpic;
@@ -81,18 +83,7 @@ static void __init p1010_rdb_setup_arch(void)
 	printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n");
 }
 
-static struct of_device_id __initdata p1010rdb_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{},
-};
-
-static int __init p1010rdb_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, p1010rdb_ids, NULL);
-}
-machine_device_initcall(p1010_rdb, p1010rdb_publish_devices);
+machine_device_initcall(p1010_rdb, mpc85xx_common_publish_devices);
 machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
 
 /*
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 266b3aa..26e6f75 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -27,6 +27,8 @@
 #include <sysdev/fsl_pci.h>
 #include <asm/fsl_guts.h>
 
+#include "mpc85xx.h"
+
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
 
 /*
@@ -325,10 +327,6 @@ static void __init p1022_ds_setup_arch(void)
 }
 
 static struct of_device_id __initdata p1022_ds_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
 	/* So that the DMA channel nodes can be probed individually: */
 	{ .compatible = "fsl,eloplus-dma", },
 	{},
@@ -338,6 +336,7 @@ static int __init p1022_ds_publish_devices(void)
 {
 	return of_platform_bus_probe(NULL, p1022_ds_ids, NULL);
 }
+machine_device_initcall(p1022_ds, mpc85xx_common_publish_devices);
 machine_device_initcall(p1022_ds, p1022_ds_publish_devices);
 
 machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c
index 835e0b3..e76c4dc 100644
--- a/arch/powerpc/platforms/85xx/p1023_rds.c
+++ b/arch/powerpc/platforms/85xx/p1023_rds.c
@@ -34,6 +34,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 /* ************************************************************************
  *
  * Setup the architecture
@@ -92,22 +94,7 @@ static void __init mpc85xx_rds_setup_arch(void)
 #endif
 }
 
-static struct of_device_id p1023_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{},
-};
-
-
-static int __init p1023_publish_devices(void)
-{
-	of_platform_bus_probe(NULL, p1023_ids, NULL);
-
-	return 0;
-}
-
-machine_device_initcall(p1023_rds, p1023_publish_devices);
+machine_device_initcall(p1023_rds, mpc85xx_common_publish_devices);
 
 static void __init mpc85xx_rds_pic_init(void)
 {
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index ecdd8c0..a6605a2 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -49,6 +49,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 static int sbc_rev;
 
 static void __init sbc8548_pic_init(void)
@@ -150,21 +152,7 @@ static void sbc8548_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .name = "soc", },
-	{ .type = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(sbc8548, declare_of_platform_devices);
+machine_device_initcall(sbc8548, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/sbc8560.c b/arch/powerpc/platforms/85xx/sbc8560.c
index 678f5a8..bbb656f 100644
--- a/arch/powerpc/platforms/85xx/sbc8560.c
+++ b/arch/powerpc/platforms/85xx/sbc8560.c
@@ -37,6 +37,8 @@
 #include <sysdev/cpm2_pic.h>
 #endif
 
+#include "mpc85xx.h"
+
 #ifdef CONFIG_CPM2
 
 static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
@@ -208,23 +210,7 @@ static void sbc8560_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .name = "soc", },
-	{ .type = "soc", },
-	{ .name = "cpm", },
-	{ .name = "localbus", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(sbc8560, declare_of_platform_devices);
+machine_device_initcall(sbc8560, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index 747d8fb..fec496a 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -41,6 +41,7 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
 #include "socrates_fpga_pic.h"
 
 static void __init socrates_pic_init(void)
@@ -96,17 +97,7 @@ static void __init socrates_setup_arch(void)
 #endif
 }
 
-static struct of_device_id __initdata socrates_of_bus_ids[] = {
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init socrates_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, socrates_of_bus_ids, NULL);
-}
-machine_device_initcall(socrates, socrates_publish_devices);
+machine_device_initcall(socrates, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index 5387e9f..ab80044 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -40,6 +40,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 #ifdef CONFIG_CPM2
 #include <asm/cpm2.h>
 #include <sysdev/cpm2_pic.h>
@@ -144,19 +146,7 @@ static void stx_gp3_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
 }
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(stx_gp3, declare_of_platform_devices);
+machine_device_initcall(stx_gp3, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index 325de77..19e711f 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -38,6 +38,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 #ifdef CONFIG_CPM2
 #include <asm/cpm2.h>
 #include <sysdev/cpm2_pic.h>
@@ -173,19 +175,7 @@ static void __init tqm85xx_ti1520_fixup(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520,
 		tqm85xx_ti1520_fixup);
 
-static struct of_device_id __initdata of_bus_ids[] = {
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init declare_of_platform_devices(void)
-{
-	of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
-	return 0;
-}
-machine_device_initcall(tqm85xx, declare_of_platform_devices);
+machine_device_initcall(tqm85xx, mpc85xx_common_publish_devices);
 
 static const char *board[] __initdata = {
 	"tqc,tqm8540",
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 0125604..0ee3721 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -33,6 +33,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include "mpc85xx.h"
+
 /* A few bit definitions needed for fixups on some boards */
 #define MPC85xx_L2CTL_L2E		0x80000000 /* L2 enable */
 #define MPC85xx_L2CTL_L2I		0x40000000 /* L2 flash invalidate */
@@ -177,21 +179,9 @@ static void __init xes_mpc85xx_setup_arch(void)
 #endif
 }
 
-static struct of_device_id __initdata xes_mpc85xx_ids[] = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .compatible = "simple-bus", },
-	{ .compatible = "gianfar", },
-	{},
-};
-
-static int __init xes_mpc85xx_publish_devices(void)
-{
-	return of_platform_bus_probe(NULL, xes_mpc85xx_ids, NULL);
-}
-machine_device_initcall(xes_mpc8572, xes_mpc85xx_publish_devices);
-machine_device_initcall(xes_mpc8548, xes_mpc85xx_publish_devices);
-machine_device_initcall(xes_mpc8540, xes_mpc85xx_publish_devices);
+machine_device_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
+machine_device_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
+machine_device_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
-- 
1.7.2.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox