LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 12/20] powerpc: change to new flag variables
From: matt mooney @ 2010-09-23  6:51 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: kernel-janitors, Alexander Graf, linux-kernel, Paul Mackerras,
	Avi Kivity, linuxppc-dev

Replace EXTRA_CFLAGS with ccflags-y and EXTRA_AFLAGS with asflags-y.

Signed-off-by: matt mooney <mfm@muteddisk.com>
---
 arch/powerpc/kernel/vdso32/Makefile     |    6 +++---
 arch/powerpc/kernel/vdso64/Makefile     |    6 +++---
 arch/powerpc/kvm/Makefile               |    2 +-
 arch/powerpc/lib/Makefile               |    4 +---
 arch/powerpc/math-emu/Makefile          |    2 +-
 arch/powerpc/mm/Makefile                |    4 +---
 arch/powerpc/oprofile/Makefile          |    4 +---
 arch/powerpc/platforms/iseries/Makefile |    2 +-
 arch/powerpc/platforms/pseries/Makefile |   11 +++--------
 arch/powerpc/sysdev/Makefile            |    4 +---
 arch/powerpc/xmon/Makefile              |    4 +---
 11 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 51ead52..9a7946c 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -14,10 +14,10 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
 
 GCOV_PROFILE := n
 
-EXTRA_CFLAGS := -shared -fno-common -fno-builtin
-EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-EXTRA_AFLAGS := -D__VDSO32__ -s
+asflags-y := -D__VDSO32__ -s
 
 obj-y += vdso32_wrapper.o
 extra-y += vdso32.lds
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 79da65d..8c500d8 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -9,10 +9,10 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
 
 GCOV_PROFILE := n
 
-EXTRA_CFLAGS := -shared -fno-common -fno-builtin
-EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-EXTRA_AFLAGS := -D__VDSO64__ -s
+asflags-y := -D__VDSO64__ -s
 
 obj-y += vdso64_wrapper.o
 extra-y += vdso64.lds
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index d45c818..4d68638 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -4,7 +4,7 @@
 
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
+ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
 
 common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5bb89c8..e4b0c07 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -4,9 +4,7 @@
 
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS		+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
 
 CFLAGS_REMOVE_code-patching.o = -pg
 CFLAGS_REMOVE_feature-fixups.o = -pg
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 0c16ab9..7d1dba0 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -15,4 +15,4 @@ obj-$(CONFIG_SPE)		+= math_efp.o
 CFLAGS_fabs.o = -fno-builtin-fabs
 CFLAGS_math.o = -fno-builtin-fabs
 
-EXTRA_CFLAGS = -I. -Iinclude/math-emu -w
+ccflags-y = -I. -Iinclude/math-emu -w
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index ce68708..53102f3 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -4,9 +4,7 @@
 
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS	+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
 
 obj-y				:= fault.o mem.o pgtable.o gup.o \
 				   init_$(CONFIG_WORD_SIZE).o \
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index e219ca4..73456c4 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -1,8 +1,6 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS	+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
 
 obj-$(CONFIG_OPROFILE) += oprofile.o
 
diff --git a/arch/powerpc/platforms/iseries/Makefile b/arch/powerpc/platforms/iseries/Makefile
index ce01492..a7602b1 100644
--- a/arch/powerpc/platforms/iseries/Makefile
+++ b/arch/powerpc/platforms/iseries/Makefile
@@ -1,4 +1,4 @@
-EXTRA_CFLAGS	+= -mno-minimal-toc
+ccflags-y	:= -mno-minimal-toc
 
 obj-y += exception.o
 obj-y += hvlog.o hvlpconfig.o lpardata.o setup.o dt.o mf.o lpevents.o \
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 046ace9..7ee1599 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -1,10 +1,5 @@
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS		+= -mno-minimal-toc
-endif
-
-ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y)
-EXTRA_CFLAGS		+= -DDEBUG
-endif
+ccflags-$(CONFIG_PPC64)			:= -mno-minimal-toc
+ccflags-$(CONFIG_PPC_PSERIES_DEBUG)	+= -DDEBUG
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
@@ -23,7 +18,7 @@ obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
 obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
-obj-$(CONFIG_PHYP_DUMP)	+= phyp_dump.o
+obj-$(CONFIG_PHYP_DUMP)		+= phyp_dump.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
 
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 5642924..c20ad6d 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -1,8 +1,6 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS			+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)		:= -mno-minimal-toc
 
 mpic-msi-obj-$(CONFIG_PCI_MSI)	+= mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o
 obj-$(CONFIG_MPIC)		+= mpic.o $(mpic-msi-obj-y)
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index faa81b6..c168c54 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -4,9 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 GCOV_PROFILE := n
 
-ifdef CONFIG_PPC64
-EXTRA_CFLAGS += -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
 
 obj-y			+= xmon.o start.o nonstdio.o
 
-- 
1.7.2.1

^ permalink raw reply related

* Re: [U-Boot] cuImage and multi image?
From: Shawn Jin @ 2010-09-23  6:30 UTC (permalink / raw)
  To: Chen, Tiejun; +Cc: Scott Wood, ppcdev, uboot
In-Reply-To: <AANLkTi=xoc+4bDTG5MHuRMEPAwfn5vKsDEXJtO22JKiA@mail.gmail.com>

>> Can you paste the whole log from the u-boot prompt?
>
> In the previous run the ramdisk image was corrupted because the single
> image was loaded at 0x800000. But the boot message showed that the
> initrd image was at 0x0066c000-0x009ae825. So it was over the 8MB
> area.
>
> However after the load address was changed to 0x04000000 (64MB), the
> ramdisk still seemed corrupted but with different error messages.
>
> =3D> bootm
> ## Booting image at 04000000 ...
> =A0 Image Name: =A0 Linux-2.6.33.5
> =A0 Image Type: =A0 PowerPC Linux Kernel Image (gzip compressed)
> =A0 Data Size: =A0 =A04424922 Bytes =3D =A04.2 MB
> =A0 Load Address: 00400000
> =A0 Entry Point: =A000400554
> =A0 Verifying Checksum ... OK
> =A0 Uncompressing Kernel Image ... OK
> Memory <- <0x0 0x8000000> (128MB)
> ENET0: local-mac-address <- 00:09:9b:01:58:64
> CPU clock-frequency <- 0x7270e00 (120MHz)
> CPU timebase-frequency <- 0x7270e0 (8MHz)
> CPU bus-frequency <- 0x3938700 (60MHz)
>
> zImage starting: loaded at 0x00400000 (sp: 0x07d1cbd0)
> Allocating 0x22a1e1 bytes for kernel ...
> gunzipping (0x00000000 <- 0x0040c000:0x0066b0ac)...done 0x21c6c8 bytes
> Attached initrd image at 0x0066c000-0x009ae825
> initrd head: 0x1f8b0808
>
> Linux/PowerPC load: root=3D/dev/ram
> Finalizing device tree... flat tree at 0x9bb300
> Using my870 machine description
> Linux version 2.6.33.5 (shawn@ubuntu) (gcc version 4.2.2) #4 Tue Sep
> 21 09:23:51 PDT 2010
> Found initrd at 0xc066c000:0xc09ae825

The following shows the boot message that the same kernel and the same
ramdisk were loaded separately. The difference is that when boot from
two separate images, the ramdisk is loaded to the top of RAM
(0x79d9000-0x7d1b825). While when booting from the single image, the
ramdisk is loaded to the place immediately after the uncompressed
kernel image (0x0066c000-0x009ae825). I'm not familiar with how the
kernel uses the memory. But it seems clear from this failure that the
kernel overwrites to where the initrd locates.

Anyone can shed some light on why the kernel would overwrite the
initrd area? BTW, if the initrd is small enough, the single image
method works well. Maybe we should have relocated the initrd to the
top of available ram just like u-boot's bootm?

=3D> bootm 1000000 2000000
## Booting image at 01000000 ...
   Image Name:   Linux-2.6.33.5
   Image Type:   PowerPC Linux Kernel Image (gzip compressed)
   Data Size:    1040228 Bytes =3D 1015.8 kB
   Load Address: 00400000
   Entry Point:  00400554
   Verifying Checksum ... OK
   Uncompressing Kernel Image ... OK
## Loading RAMDisk Image at 02000000 ...
   Image Name:   16MB Ramdisk
   Image Type:   PowerPC Linux RAMDisk Image (gzip compressed)
   Data Size:    3418149 Bytes =3D  3.3 MB
   Load Address: 00000000
   Entry Point:  00000000
   Verifying Checksum ... OK
   Loading Ramdisk to 079d9000, end 07d1b825 ... OK
Memory <- <0x0 0x8000000> (128MB)
ENET0: local-mac-address <- 00:09:9b:01:58:64
CPU clock-frequency <- 0x7270e00 (120MHz)
CPU timebase-frequency <- 0x7270e0 (8MHz)
CPU bus-frequency <- 0x3938700 (60MHz)

zImage starting: loaded at 0x00400000 (sp: 0x07d1cbd0)
Allocating 0x22a1e1 bytes for kernel ...
gunzipping (0x00000000 <- 0x0040c000:0x0066b0ac)...done 0x21c6c8 bytes
Using loader supplied ramdisk at 0x79d9000-0x7d1b825
initrd head: 0x1f8b0808

Linux/PowerPC load: root=3D/dev/ram
Finalizing device tree... flat tree at 0x678300
Using my870 machine description
Linux version 2.6.33.5 (shawn@ubuntu) (gcc version 4.2.2) #4 Tue Sep
21 09:23:51 PDT 2010
Found initrd at 0xc79d9000:0xc7d1b825

Thanks,
-Shawn.

^ permalink raw reply

* RE: MPC8641D PEX: programming OWBAR in Endpoint mode?
From: Chen, Tiejun @ 2010-09-23  3:21 UTC (permalink / raw)
  To: david.hagood, linuxppc-dev
In-Reply-To: <08f8439b89ad5771221aaba1cee86fc4.squirrel@localhost>

> -----Original Message-----
> From:=20
> linuxppc-dev-bounces+tiejun.chen=3Dwindriver.com@lists.ozlabs.or
> g=20
> [mailto:linuxppc-dev-bounces+tiejun.chen=3Dwindriver.com@lists.o
zlabs.org] On Behalf Of david.hagood@gmail.com
> Sent: Wednesday, September 22, 2010 11:56 PM
> To: linuxppc-dev@ozlabs.org
> Subject: MPC8641D PEX: programming OWBAR in Endpoint mode?
>=20
> I am trying to get the PCIe interfaces of a Freescale=20
> MPC8641D working in endpoint mode (i.e. as a PCI device=20
> rather than a PCI root complex host).
>=20

As far as I recalled on 4xx the u-boot support for dynamic configuration
of PCIe ports for the targets equipped with PCIe interfaces. Often this
is done via the "pcie_mode" environement variable on the u-boot prompt.
On there you can set to "EP" or "RP" for endpoint or rootpoint mode.

But for Freescale MPC86xx this should be set accordin to the external
configure pin.

> I can get the device to show up on the host's PCI bus, I can=20

This only ensure you can access the PCIe configure space.

> program the inbound ATMUs such that the BARS are updated when=20
> the host (re-)scans them, but I cannot for the life of me get

What value are configured to IntBound REGs?
=20
> the PPC's Outbound ATMUS to work.
>=20
> When I attempt to program them, I can program ALL the=20
> registers EXCEPT the OWBAR - which steadfastly remains 0 no=20
> matter what I write to it.
>=20

How do you configure OWS of PEXOWAR?

I means you still access that if OWS is match the whole target memory
size even when '0' is as the internal platform address.

> As a result, when I attempt to bus master out from the PPC to=20
> the PCIe address spaces via the outbound ATMUs, I get a bus=20
> fault on the PPC as there is no device at the address I am accessing.
>=20
> I've double-checked the LAWs to make sure the PEX is mapped=20
> into local space, I've put the OWBAR in that address space,=20
> I've tried different outbound ATMUs, and NOTHING works. Not=20
> ATMU0, not ATMU1, etc.
>=20
> I've been trying to work with our Freescale rep, but I am=20
> getting nowhere on that front.
>=20
> Does anybody have any suggestions on what I might be doing=20
> wrong? I mean, it looks like it should be a simple=20
> out_be32(addr_of_OWBAR,value), just like all the other=20
> accesses to the ATMU registers that seem to be working (as=20
> in, I read back what I wrote).

Out_be32 should be fine for atmu REGs. And also you can refe to the
function, setup_pci_atmu & setup_one_atmu, on the file,
arch/powerpc/sysdev/fsl_pci.c, to know how to access atmu REGs. Often
you should disable them, configure then enable/invoke atmu antry as
normal configuring sequent.

Additionally I'm a bit afraid your initial phase :) As you know PCIe
would be used as RC mode on Freescale PowerPC kernel. So I don't know if
you also drop this path on your kernel to conflict each other :)=20

Cheers
Tiejun

>=20
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>=20

^ permalink raw reply

* Re: Modifying mpc8308rdb.dts
From: Kim Phillips @ 2010-09-22 23:26 UTC (permalink / raw)
  To: m.johansen; +Cc: linuxppc-dev
In-Reply-To: <DF61B4AD1C20A54EADF5C05E1F1110DE08D43B@Exchange.parkairsystems.net>

On Wed, 22 Sep 2010 12:32:20 +0200
Maria Johansen <m.johansen@no.parkairsystems.com> wrote:

> I am working on an MPC8308rdb, and have needed to add support for spi
> and an lm75 thermal sensor on the i2c-bus. Is it possible to upload my
> changes to the "official" mpc8308rdb.dts, or should I just keep
> modifying it locally and hope the maintainers of the file will add
> support for more devices soon?

posting patches beats waiting for an indefinite amount of time :)

Kim

^ permalink raw reply

* [RFC] irq: Migrate powerpc virq subsystem into generic code
From: Grant Likely @ 2010-09-22 20:32 UTC (permalink / raw)
  To: benh, devicetree-discuss, linuxppc-dev

Being able to dynamically manage linux irq ranges is useful.  Migrate
the powerpc virq code into common code so that other architectures can
use it.

This patch also removes the unused irq_early_init() references.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
Only compile tested; but I wanted to get this out for comments.  I
think this is the right set of routines to generalize for virq on
other architectures.

g.

 arch/microblaze/kernel/setup.c |    2 
 arch/powerpc/Kconfig           |    3 
 arch/powerpc/include/asm/irq.h |  270 ----------------
 arch/powerpc/kernel/irq.c      |  659 --------------------------------------
 include/linux/virq.h           |  302 ++++++++++++++++++
 kernel/irq/Makefile            |    1 
 kernel/irq/virq.c              |  687 ++++++++++++++++++++++++++++++++++++++++
 7 files changed, 995 insertions(+), 929 deletions(-)
 create mode 100644 include/linux/virq.h
 create mode 100644 kernel/irq/virq.c

diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c
index f5f7688..39cf20d 100644
--- a/arch/microblaze/kernel/setup.c
+++ b/arch/microblaze/kernel/setup.c
@@ -51,8 +51,6 @@ void __init setup_arch(char **cmdline_p)
 
 	unflatten_device_tree();
 
-	/* NOTE I think that this function is not necessary to call */
-	/* irq_early_init(); */
 	setup_cpuinfo();
 
 	microblaze_cache_init();
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 631e5a0..cc06e59 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -146,6 +146,9 @@ config EARLY_PRINTK
 	bool
 	default y
 
+config VIRQ
+	def_bool y
+
 config COMPAT
 	bool
 	default y if PPC64
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 67ab5fb..6dea0cb 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -17,10 +17,6 @@
 #include <asm/atomic.h>
 
 
-/* Define a way to iterate across irqs. */
-#define for_each_irq(i) \
-	for ((i) = 0; (i) < NR_IRQS; ++(i))
-
 extern atomic_t ppc_n_lost_interrupts;
 
 /* This number is used when no interrupt has been assigned */
@@ -41,270 +37,6 @@ extern atomic_t ppc_n_lost_interrupts;
 /* Same thing, used by the generic IRQ code */
 #define NR_IRQS_LEGACY		NUM_ISA_INTERRUPTS
 
-/* This type is the placeholder for a hardware interrupt number. It has to
- * be big enough to enclose whatever representation is used by a given
- * platform.
- */
-typedef unsigned long irq_hw_number_t;
-
-/* Interrupt controller "host" data structure. This could be defined as a
- * irq domain controller. That is, it handles the mapping between hardware
- * and virtual interrupt numbers for a given interrupt domain. The host
- * structure is generally created by the PIC code for a given PIC instance
- * (though a host can cover more than one PIC if they have a flat number
- * model). It's the host callbacks that are responsible for setting the
- * irq_chip on a given irq_desc after it's been mapped.
- *
- * The host code and data structures are fairly agnostic to the fact that
- * we use an open firmware device-tree. We do have references to struct
- * device_node in two places: in irq_find_host() to find the host matching
- * a given interrupt controller node, and of course as an argument to its
- * counterpart host->ops->match() callback. However, those are treated as
- * generic pointers by the core and the fact that it's actually a device-node
- * pointer is purely a convention between callers and implementation. This
- * code could thus be used on other architectures by replacing those two
- * by some sort of arch-specific void * "token" used to identify interrupt
- * controllers.
- */
-struct irq_host;
-struct radix_tree_root;
-
-/* Functions below are provided by the host and called whenever a new mapping
- * is created or an old mapping is disposed. The host can then proceed to
- * whatever internal data structures management is required. It also needs
- * to setup the irq_desc when returning from map().
- */
-struct irq_host_ops {
-	/* Match an interrupt controller device node to a host, returns
-	 * 1 on a match
-	 */
-	int (*match)(struct irq_host *h, struct device_node *node);
-
-	/* Create or update a mapping between a virtual irq number and a hw
-	 * irq number. This is called only once for a given mapping.
-	 */
-	int (*map)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
-
-	/* Dispose of such a mapping */
-	void (*unmap)(struct irq_host *h, unsigned int virq);
-
-	/* Update of such a mapping  */
-	void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
-
-	/* Translate device-tree interrupt specifier from raw format coming
-	 * from the firmware to a irq_hw_number_t (interrupt line number) and
-	 * type (sense) that can be passed to set_irq_type(). In the absence
-	 * of this callback, irq_create_of_mapping() and irq_of_parse_and_map()
-	 * will return the hw number in the first cell and IRQ_TYPE_NONE for
-	 * the type (which amount to keeping whatever default value the
-	 * interrupt controller has for that line)
-	 */
-	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
-		     const u32 *intspec, unsigned int intsize,
-		     irq_hw_number_t *out_hwirq, unsigned int *out_type);
-};
-
-struct irq_host {
-	struct list_head	link;
-
-	/* type of reverse mapping technique */
-	unsigned int		revmap_type;
-#define IRQ_HOST_MAP_LEGACY     0 /* legacy 8259, gets irqs 1..15 */
-#define IRQ_HOST_MAP_NOMAP	1 /* no fast reverse mapping */
-#define IRQ_HOST_MAP_LINEAR	2 /* linear map of interrupts */
-#define IRQ_HOST_MAP_TREE	3 /* radix tree */
-	union {
-		struct {
-			unsigned int size;
-			unsigned int *revmap;
-		} linear;
-		struct radix_tree_root tree;
-	} revmap_data;
-	struct irq_host_ops	*ops;
-	void			*host_data;
-	irq_hw_number_t		inval_irq;
-
-	/* Optional device node pointer */
-	struct device_node	*of_node;
-};
-
-/* The main irq map itself is an array of NR_IRQ entries containing the
- * associate host and irq number. An entry with a host of NULL is free.
- * An entry can be allocated if it's free, the allocator always then sets
- * hwirq first to the host's invalid irq number and then fills ops.
- */
-struct irq_map_entry {
-	irq_hw_number_t	hwirq;
-	struct irq_host	*host;
-};
-
-extern struct irq_map_entry irq_map[NR_IRQS];
-
-extern irq_hw_number_t virq_to_hw(unsigned int virq);
-
-/**
- * irq_alloc_host - Allocate a new irq_host data structure
- * @of_node: optional device-tree node of the interrupt controller
- * @revmap_type: type of reverse mapping to use
- * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
- * @ops: map/unmap host callbacks
- * @inval_irq: provide a hw number in that host space that is always invalid
- *
- * Allocates and initialize and irq_host structure. Note that in the case of
- * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns
- * for all legacy interrupts except 0 (which is always the invalid irq for
- * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by
- * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated
- * later during boot automatically (the reverse mapping will use the slow path
- * until that happens).
- */
-extern struct irq_host *irq_alloc_host(struct device_node *of_node,
-				       unsigned int revmap_type,
-				       unsigned int revmap_arg,
-				       struct irq_host_ops *ops,
-				       irq_hw_number_t inval_irq);
-
-
-/**
- * irq_find_host - Locates a host for a given device node
- * @node: device-tree node of the interrupt controller
- */
-extern struct irq_host *irq_find_host(struct device_node *node);
-
-
-/**
- * irq_set_default_host - Set a "default" host
- * @host: default host pointer
- *
- * For convenience, it's possible to set a "default" host that will be used
- * whenever NULL is passed to irq_create_mapping(). It makes life easier for
- * platforms that want to manipulate a few hard coded interrupt numbers that
- * aren't properly represented in the device-tree.
- */
-extern void irq_set_default_host(struct irq_host *host);
-
-
-/**
- * irq_set_virq_count - Set the maximum number of virt irqs
- * @count: number of linux virtual irqs, capped with NR_IRQS
- *
- * This is mainly for use by platforms like iSeries who want to program
- * the virtual irq number in the controller to avoid the reverse mapping
- */
-extern void irq_set_virq_count(unsigned int count);
-
-
-/**
- * irq_create_mapping - Map a hardware interrupt into linux virq space
- * @host: host owning this hardware interrupt or NULL for default host
- * @hwirq: hardware irq number in that host space
- *
- * Only one mapping per hardware interrupt is permitted. Returns a linux
- * virq number.
- * If the sense/trigger is to be specified, set_irq_type() should be called
- * on the number returned from that call.
- */
-extern unsigned int irq_create_mapping(struct irq_host *host,
-				       irq_hw_number_t hwirq);
-
-
-/**
- * irq_dispose_mapping - Unmap an interrupt
- * @virq: linux virq number of the interrupt to unmap
- */
-extern void irq_dispose_mapping(unsigned int virq);
-
-/**
- * irq_find_mapping - Find a linux virq from an hw irq number.
- * @host: host owning this hardware interrupt
- * @hwirq: hardware irq number in that host space
- *
- * This is a slow path, for use by generic code. It's expected that an
- * irq controller implementation directly calls the appropriate low level
- * mapping function.
- */
-extern unsigned int irq_find_mapping(struct irq_host *host,
-				     irq_hw_number_t hwirq);
-
-/**
- * irq_create_direct_mapping - Allocate a virq for direct mapping
- * @host: host to allocate the virq for or NULL for default host
- *
- * This routine is used for irq controllers which can choose the hardware
- * interrupt numbers they generate. In such a case it's simplest to use
- * the linux virq as the hardware interrupt number.
- */
-extern unsigned int irq_create_direct_mapping(struct irq_host *host);
-
-/**
- * irq_radix_revmap_insert - Insert a hw irq to linux virq number mapping.
- * @host: host owning this hardware interrupt
- * @virq: linux irq number
- * @hwirq: hardware irq number in that host space
- *
- * This is for use by irq controllers that use a radix tree reverse
- * mapping for fast lookup.
- */
-extern void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
-				    irq_hw_number_t hwirq);
-
-/**
- * irq_radix_revmap_lookup - Find a linux virq from a hw irq number.
- * @host: host owning this hardware interrupt
- * @hwirq: hardware irq number in that host space
- *
- * This is a fast path, for use by irq controller code that uses radix tree
- * revmaps
- */
-extern unsigned int irq_radix_revmap_lookup(struct irq_host *host,
-					    irq_hw_number_t hwirq);
-
-/**
- * irq_linear_revmap - Find a linux virq from a hw irq number.
- * @host: host owning this hardware interrupt
- * @hwirq: hardware irq number in that host space
- *
- * This is a fast path, for use by irq controller code that uses linear
- * revmaps. It does fallback to the slow path if the revmap doesn't exist
- * yet and will create the revmap entry with appropriate locking
- */
-
-extern unsigned int irq_linear_revmap(struct irq_host *host,
-				      irq_hw_number_t hwirq);
-
-
-
-/**
- * irq_alloc_virt - Allocate virtual irq numbers
- * @host: host owning these new virtual irqs
- * @count: number of consecutive numbers to allocate
- * @hint: pass a hint number, the allocator will try to use a 1:1 mapping
- *
- * This is a low level function that is used internally by irq_create_mapping()
- * and that can be used by some irq controllers implementations for things
- * like allocating ranges of numbers for MSIs. The revmaps are left untouched.
- */
-extern unsigned int irq_alloc_virt(struct irq_host *host,
-				   unsigned int count,
-				   unsigned int hint);
-
-/**
- * irq_free_virt - Free virtual irq numbers
- * @virq: virtual irq number of the first interrupt to free
- * @count: number of interrupts to free
- *
- * This function is the opposite of irq_alloc_virt. It will not clear reverse
- * maps, this should be done previously by unmap'ing the interrupt. In fact,
- * all interrupts covered by the range being freed should have been unmapped
- * prior to calling this.
- */
-extern void irq_free_virt(unsigned int virq, unsigned int count);
-
-/**
- * irq_early_init - Init irq remapping subsystem
- */
-extern void irq_early_init(void);
-
 static __inline__ int irq_canonicalize(int irq)
 {
 	return irq;
@@ -342,5 +74,7 @@ extern int call_handle_irq(int irq, void *p1,
 			   struct thread_info *tp, void *func);
 extern void do_IRQ(struct pt_regs *regs);
 
+#include <linux/virq.h>
+
 #endif /* _ASM_IRQ_H */
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4a65386..86d8e42 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -523,553 +523,6 @@ void do_softirq(void)
 }
 
 
-/*
- * IRQ controller and virtual interrupts
- */
-
-static LIST_HEAD(irq_hosts);
-static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static unsigned int revmap_trees_allocated;
-static DEFINE_MUTEX(revmap_trees_mutex);
-struct irq_map_entry irq_map[NR_IRQS];
-static unsigned int irq_virq_count = NR_IRQS;
-static struct irq_host *irq_default_host;
-
-irq_hw_number_t virq_to_hw(unsigned int virq)
-{
-	return irq_map[virq].hwirq;
-}
-EXPORT_SYMBOL_GPL(virq_to_hw);
-
-static int default_irq_host_match(struct irq_host *h, struct device_node *np)
-{
-	return h->of_node != NULL && h->of_node == np;
-}
-
-struct irq_host *irq_alloc_host(struct device_node *of_node,
-				unsigned int revmap_type,
-				unsigned int revmap_arg,
-				struct irq_host_ops *ops,
-				irq_hw_number_t inval_irq)
-{
-	struct irq_host *host;
-	unsigned int size = sizeof(struct irq_host);
-	unsigned int i;
-	unsigned int *rmap;
-	unsigned long flags;
-
-	/* Allocate structure and revmap table if using linear mapping */
-	if (revmap_type == IRQ_HOST_MAP_LINEAR)
-		size += revmap_arg * sizeof(unsigned int);
-	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
-	if (host == NULL)
-		return NULL;
-
-	/* Fill structure */
-	host->revmap_type = revmap_type;
-	host->inval_irq = inval_irq;
-	host->ops = ops;
-	host->of_node = of_node_get(of_node);
-
-	if (host->ops->match == NULL)
-		host->ops->match = default_irq_host_match;
-
-	raw_spin_lock_irqsave(&irq_big_lock, flags);
-
-	/* If it's a legacy controller, check for duplicates and
-	 * mark it as allocated (we use irq 0 host pointer for that
-	 */
-	if (revmap_type == IRQ_HOST_MAP_LEGACY) {
-		if (irq_map[0].host != NULL) {
-			raw_spin_unlock_irqrestore(&irq_big_lock, flags);
-			/* If we are early boot, we can't free the structure,
-			 * too bad...
-			 * this will be fixed once slab is made available early
-			 * instead of the current cruft
-			 */
-			if (mem_init_done)
-				kfree(host);
-			return NULL;
-		}
-		irq_map[0].host = host;
-	}
-
-	list_add(&host->link, &irq_hosts);
-	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
-
-	/* Additional setups per revmap type */
-	switch(revmap_type) {
-	case IRQ_HOST_MAP_LEGACY:
-		/* 0 is always the invalid number for legacy */
-		host->inval_irq = 0;
-		/* setup us as the host for all legacy interrupts */
-		for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
-			irq_map[i].hwirq = i;
-			smp_wmb();
-			irq_map[i].host = host;
-			smp_wmb();
-
-			/* Clear norequest flags */
-			irq_to_desc(i)->status &= ~IRQ_NOREQUEST;
-
-			/* Legacy flags are left to default at this point,
-			 * one can then use irq_create_mapping() to
-			 * explicitly change them
-			 */
-			ops->map(host, i, i);
-		}
-		break;
-	case IRQ_HOST_MAP_LINEAR:
-		rmap = (unsigned int *)(host + 1);
-		for (i = 0; i < revmap_arg; i++)
-			rmap[i] = NO_IRQ;
-		host->revmap_data.linear.size = revmap_arg;
-		smp_wmb();
-		host->revmap_data.linear.revmap = rmap;
-		break;
-	default:
-		break;
-	}
-
-	pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
-
-	return host;
-}
-
-struct irq_host *irq_find_host(struct device_node *node)
-{
-	struct irq_host *h, *found = NULL;
-	unsigned long flags;
-
-	/* We might want to match the legacy controller last since
-	 * it might potentially be set to match all interrupts in
-	 * the absence of a device node. This isn't a problem so far
-	 * yet though...
-	 */
-	raw_spin_lock_irqsave(&irq_big_lock, flags);
-	list_for_each_entry(h, &irq_hosts, link)
-		if (h->ops->match(h, node)) {
-			found = h;
-			break;
-		}
-	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
-	return found;
-}
-EXPORT_SYMBOL_GPL(irq_find_host);
-
-void irq_set_default_host(struct irq_host *host)
-{
-	pr_debug("irq: Default host set to @0x%p\n", host);
-
-	irq_default_host = host;
-}
-
-void irq_set_virq_count(unsigned int count)
-{
-	pr_debug("irq: Trying to set virq count to %d\n", count);
-
-	BUG_ON(count < NUM_ISA_INTERRUPTS);
-	if (count < NR_IRQS)
-		irq_virq_count = count;
-}
-
-static int irq_setup_virq(struct irq_host *host, unsigned int virq,
-			    irq_hw_number_t hwirq)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc_alloc_node(virq, 0);
-	if (!desc) {
-		pr_debug("irq: -> allocating desc failed\n");
-		goto error;
-	}
-
-	/* Clear IRQ_NOREQUEST flag */
-	desc->status &= ~IRQ_NOREQUEST;
-
-	/* map it */
-	smp_wmb();
-	irq_map[virq].hwirq = hwirq;
-	smp_mb();
-
-	if (host->ops->map(host, virq, hwirq)) {
-		pr_debug("irq: -> mapping failed, freeing\n");
-		goto error;
-	}
-
-	return 0;
-
-error:
-	irq_free_virt(virq, 1);
-	return -1;
-}
-
-unsigned int irq_create_direct_mapping(struct irq_host *host)
-{
-	unsigned int virq;
-
-	if (host == NULL)
-		host = irq_default_host;
-
-	BUG_ON(host == NULL);
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP);
-
-	virq = irq_alloc_virt(host, 1, 0);
-	if (virq == NO_IRQ) {
-		pr_debug("irq: create_direct virq allocation failed\n");
-		return NO_IRQ;
-	}
-
-	pr_debug("irq: create_direct obtained virq %d\n", virq);
-
-	if (irq_setup_virq(host, virq, virq))
-		return NO_IRQ;
-
-	return virq;
-}
-
-unsigned int irq_create_mapping(struct irq_host *host,
-				irq_hw_number_t hwirq)
-{
-	unsigned int virq, hint;
-
-	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
-
-	/* Look for default host if nececssary */
-	if (host == NULL)
-		host = irq_default_host;
-	if (host == NULL) {
-		printk(KERN_WARNING "irq_create_mapping called for"
-		       " NULL host, hwirq=%lx\n", hwirq);
-		WARN_ON(1);
-		return NO_IRQ;
-	}
-	pr_debug("irq: -> using host @%p\n", host);
-
-	/* Check if mapping already exist, if it does, call
-	 * host->ops->map() to update the flags
-	 */
-	virq = irq_find_mapping(host, hwirq);
-	if (virq != NO_IRQ) {
-		if (host->ops->remap)
-			host->ops->remap(host, virq, hwirq);
-		pr_debug("irq: -> existing mapping on virq %d\n", virq);
-		return virq;
-	}
-
-	/* Get a virtual interrupt number */
-	if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
-		/* Handle legacy */
-		virq = (unsigned int)hwirq;
-		if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
-			return NO_IRQ;
-		return virq;
-	} else {
-		/* Allocate a virtual interrupt number */
-		hint = hwirq % irq_virq_count;
-		virq = irq_alloc_virt(host, 1, hint);
-		if (virq == NO_IRQ) {
-			pr_debug("irq: -> virq allocation failed\n");
-			return NO_IRQ;
-		}
-	}
-
-	if (irq_setup_virq(host, virq, hwirq))
-		return NO_IRQ;
-
-	printk(KERN_DEBUG "irq: irq %lu on host %s mapped to virtual irq %u\n",
-		hwirq, host->of_node ? host->of_node->full_name : "null", virq);
-
-	return virq;
-}
-EXPORT_SYMBOL_GPL(irq_create_mapping);
-
-unsigned int irq_create_of_mapping(struct device_node *controller,
-				   const u32 *intspec, unsigned int intsize)
-{
-	struct irq_host *host;
-	irq_hw_number_t hwirq;
-	unsigned int type = IRQ_TYPE_NONE;
-	unsigned int virq;
-
-	if (controller == NULL)
-		host = irq_default_host;
-	else
-		host = irq_find_host(controller);
-	if (host == NULL) {
-		printk(KERN_WARNING "irq: no irq host found for %s !\n",
-		       controller->full_name);
-		return NO_IRQ;
-	}
-
-	/* If host has no translation, then we assume interrupt line */
-	if (host->ops->xlate == NULL)
-		hwirq = intspec[0];
-	else {
-		if (host->ops->xlate(host, controller, intspec, intsize,
-				     &hwirq, &type))
-			return NO_IRQ;
-	}
-
-	/* Create mapping */
-	virq = irq_create_mapping(host, hwirq);
-	if (virq == NO_IRQ)
-		return virq;
-
-	/* Set type if specified and different than the current one */
-	if (type != IRQ_TYPE_NONE &&
-	    type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
-		set_irq_type(virq, type);
-	return virq;
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-
-void irq_dispose_mapping(unsigned int virq)
-{
-	struct irq_host *host;
-	irq_hw_number_t hwirq;
-
-	if (virq == NO_IRQ)
-		return;
-
-	host = irq_map[virq].host;
-	WARN_ON (host == NULL);
-	if (host == NULL)
-		return;
-
-	/* Never unmap legacy interrupts */
-	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
-		return;
-
-	/* remove chip and handler */
-	set_irq_chip_and_handler(virq, NULL, NULL);
-
-	/* Make sure it's completed */
-	synchronize_irq(virq);
-
-	/* Tell the PIC about it */
-	if (host->ops->unmap)
-		host->ops->unmap(host, virq);
-	smp_mb();
-
-	/* Clear reverse map */
-	hwirq = irq_map[virq].hwirq;
-	switch(host->revmap_type) {
-	case IRQ_HOST_MAP_LINEAR:
-		if (hwirq < host->revmap_data.linear.size)
-			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
-		break;
-	case IRQ_HOST_MAP_TREE:
-		/*
-		 * Check if radix tree allocated yet, if not then nothing to
-		 * remove.
-		 */
-		smp_rmb();
-		if (revmap_trees_allocated < 1)
-			break;
-		mutex_lock(&revmap_trees_mutex);
-		radix_tree_delete(&host->revmap_data.tree, hwirq);
-		mutex_unlock(&revmap_trees_mutex);
-		break;
-	}
-
-	/* Destroy map */
-	smp_mb();
-	irq_map[virq].hwirq = host->inval_irq;
-
-	/* Set some flags */
-	irq_to_desc(virq)->status |= IRQ_NOREQUEST;
-
-	/* Free it */
-	irq_free_virt(virq, 1);
-}
-EXPORT_SYMBOL_GPL(irq_dispose_mapping);
-
-unsigned int irq_find_mapping(struct irq_host *host,
-			      irq_hw_number_t hwirq)
-{
-	unsigned int i;
-	unsigned int hint = hwirq % irq_virq_count;
-
-	/* Look for default host if nececssary */
-	if (host == NULL)
-		host = irq_default_host;
-	if (host == NULL)
-		return NO_IRQ;
-
-	/* legacy -> bail early */
-	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
-		return hwirq;
-
-	/* Slow path does a linear search of the map */
-	if (hint < NUM_ISA_INTERRUPTS)
-		hint = NUM_ISA_INTERRUPTS;
-	i = hint;
-	do  {
-		if (irq_map[i].host == host &&
-		    irq_map[i].hwirq == hwirq)
-			return i;
-		i++;
-		if (i >= irq_virq_count)
-			i = NUM_ISA_INTERRUPTS;
-	} while(i != hint);
-	return NO_IRQ;
-}
-EXPORT_SYMBOL_GPL(irq_find_mapping);
-
-
-unsigned int irq_radix_revmap_lookup(struct irq_host *host,
-				     irq_hw_number_t hwirq)
-{
-	struct irq_map_entry *ptr;
-	unsigned int virq;
-
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
-	/*
-	 * Check if the radix tree exists and has bee initialized.
-	 * If not, we fallback to slow mode
-	 */
-	if (revmap_trees_allocated < 2)
-		return irq_find_mapping(host, hwirq);
-
-	/* Now try to resolve */
-	/*
-	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us
-	 * as it's referencing an entry in the static irq_map table.
-	 */
-	ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq);
-
-	/*
-	 * If found in radix tree, then fine.
-	 * Else fallback to linear lookup - this should not happen in practice
-	 * as it means that we failed to insert the node in the radix tree.
-	 */
-	if (ptr)
-		virq = ptr - irq_map;
-	else
-		virq = irq_find_mapping(host, hwirq);
-
-	return virq;
-}
-
-void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
-			     irq_hw_number_t hwirq)
-{
-
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
-
-	/*
-	 * Check if the radix tree exists yet.
-	 * If not, then the irq will be inserted into the tree when it gets
-	 * initialized.
-	 */
-	smp_rmb();
-	if (revmap_trees_allocated < 1)
-		return;
-
-	if (virq != NO_IRQ) {
-		mutex_lock(&revmap_trees_mutex);
-		radix_tree_insert(&host->revmap_data.tree, hwirq,
-				  &irq_map[virq]);
-		mutex_unlock(&revmap_trees_mutex);
-	}
-}
-
-unsigned int irq_linear_revmap(struct irq_host *host,
-			       irq_hw_number_t hwirq)
-{
-	unsigned int *revmap;
-
-	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
-
-	/* Check revmap bounds */
-	if (unlikely(hwirq >= host->revmap_data.linear.size))
-		return irq_find_mapping(host, hwirq);
-
-	/* Check if revmap was allocated */
-	revmap = host->revmap_data.linear.revmap;
-	if (unlikely(revmap == NULL))
-		return irq_find_mapping(host, hwirq);
-
-	/* Fill up revmap with slow path if no mapping found */
-	if (unlikely(revmap[hwirq] == NO_IRQ))
-		revmap[hwirq] = irq_find_mapping(host, hwirq);
-
-	return revmap[hwirq];
-}
-
-unsigned int irq_alloc_virt(struct irq_host *host,
-			    unsigned int count,
-			    unsigned int hint)
-{
-	unsigned long flags;
-	unsigned int i, j, found = NO_IRQ;
-
-	if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
-		return NO_IRQ;
-
-	raw_spin_lock_irqsave(&irq_big_lock, flags);
-
-	/* Use hint for 1 interrupt if any */
-	if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
-	    hint < irq_virq_count && irq_map[hint].host == NULL) {
-		found = hint;
-		goto hint_found;
-	}
-
-	/* Look for count consecutive numbers in the allocatable
-	 * (non-legacy) space
-	 */
-	for (i = NUM_ISA_INTERRUPTS, j = 0; i < irq_virq_count; i++) {
-		if (irq_map[i].host != NULL)
-			j = 0;
-		else
-			j++;
-
-		if (j == count) {
-			found = i - count + 1;
-			break;
-		}
-	}
-	if (found == NO_IRQ) {
-		raw_spin_unlock_irqrestore(&irq_big_lock, flags);
-		return NO_IRQ;
-	}
- hint_found:
-	for (i = found; i < (found + count); i++) {
-		irq_map[i].hwirq = host->inval_irq;
-		smp_wmb();
-		irq_map[i].host = host;
-	}
-	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
-	return found;
-}
-
-void irq_free_virt(unsigned int virq, unsigned int count)
-{
-	unsigned long flags;
-	unsigned int i;
-
-	WARN_ON (virq < NUM_ISA_INTERRUPTS);
-	WARN_ON (count == 0 || (virq + count) > irq_virq_count);
-
-	raw_spin_lock_irqsave(&irq_big_lock, flags);
-	for (i = virq; i < (virq + count); i++) {
-		struct irq_host *host;
-
-		if (i < NUM_ISA_INTERRUPTS ||
-		    (virq + count) > irq_virq_count)
-			continue;
-
-		host = irq_map[i].host;
-		irq_map[i].hwirq = host->inval_irq;
-		smp_wmb();
-		irq_map[i].host = NULL;
-	}
-	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
-}
-
 int arch_early_irq_init(void)
 {
 	struct irq_desc *desc;
@@ -1090,118 +543,6 @@ int arch_init_chip_data(struct irq_desc *desc, int node)
 	return 0;
 }
 
-/* We need to create the radix trees late */
-static int irq_late_init(void)
-{
-	struct irq_host *h;
-	unsigned int i;
-
-	/*
-	 * No mutual exclusion with respect to accessors of the tree is needed
-	 * here as the synchronization is done via the state variable
-	 * revmap_trees_allocated.
-	 */
-	list_for_each_entry(h, &irq_hosts, link) {
-		if (h->revmap_type == IRQ_HOST_MAP_TREE)
-			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
-	}
-
-	/*
-	 * Make sure the radix trees inits are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 1;
-
-	/*
-	 * Insert the reverse mapping for those interrupts already present
-	 * in irq_map[].
-	 */
-	mutex_lock(&revmap_trees_mutex);
-	for (i = 0; i < irq_virq_count; i++) {
-		if (irq_map[i].host &&
-		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
-			radix_tree_insert(&irq_map[i].host->revmap_data.tree,
-					  irq_map[i].hwirq, &irq_map[i]);
-	}
-	mutex_unlock(&revmap_trees_mutex);
-
-	/*
-	 * Make sure the radix trees insertions are visible before setting
-	 * the flag
-	 */
-	smp_wmb();
-	revmap_trees_allocated = 2;
-
-	return 0;
-}
-arch_initcall(irq_late_init);
-
-#ifdef CONFIG_VIRQ_DEBUG
-static int virq_debug_show(struct seq_file *m, void *private)
-{
-	unsigned long flags;
-	struct irq_desc *desc;
-	const char *p;
-	char none[] = "none";
-	int i;
-
-	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
-		      "chip name", "host name");
-
-	for (i = 1; i < nr_irqs; i++) {
-		desc = irq_to_desc(i);
-		if (!desc)
-			continue;
-
-		raw_spin_lock_irqsave(&desc->lock, flags);
-
-		if (desc->action && desc->action->handler) {
-			seq_printf(m, "%5d  ", i);
-			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
-
-			if (desc->chip && desc->chip->name)
-				p = desc->chip->name;
-			else
-				p = none;
-			seq_printf(m, "%-15s  ", p);
-
-			if (irq_map[i].host && irq_map[i].host->of_node)
-				p = irq_map[i].host->of_node->full_name;
-			else
-				p = none;
-			seq_printf(m, "%s\n", p);
-		}
-
-		raw_spin_unlock_irqrestore(&desc->lock, flags);
-	}
-
-	return 0;
-}
-
-static int virq_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, virq_debug_show, inode->i_private);
-}
-
-static const struct file_operations virq_debug_fops = {
-	.open = virq_debug_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int __init irq_debugfs_init(void)
-{
-	if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root,
-				 NULL, &virq_debug_fops) == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-__initcall(irq_debugfs_init);
-#endif /* CONFIG_VIRQ_DEBUG */
-
 #ifdef CONFIG_PPC64
 static int __init setup_noirqdistrib(char *str)
 {
diff --git a/include/linux/virq.h b/include/linux/virq.h
new file mode 100644
index 0000000..06035ef
--- /dev/null
+++ b/include/linux/virq.h
@@ -0,0 +1,302 @@
+/*
+ * Virtual IRQ infrastructure
+ *
+ * Virtual IRQs provides support for dynamically allocating ranges of IRQ
+ * numbers for use by interrupt controllers.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+
+#ifdef __KERNEL__
+#ifndef _LINUX_VIRQ_H
+#define _LINUX_VIRQ_H
+
+#include <asm/irq.h>
+
+#ifdef CONFIG_VIRQ
+
+/* Define a way to iterate across irqs. */
+#define for_each_irq(i) \
+	for ((i) = 0; (i) < NR_IRQS; ++(i))
+
+/* This type is the placeholder for a hardware interrupt number. It has to
+ * be big enough to enclose whatever representation is used by a given
+ * platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+/* Interrupt controller "host" data structure. This could be defined as a
+ * irq domain controller. That is, it handles the mapping between hardware
+ * and virtual interrupt numbers for a given interrupt domain. The host
+ * structure is generally created by the PIC code for a given PIC instance
+ * (though a host can cover more than one PIC if they have a flat number
+ * model). It's the host callbacks that are responsible for setting the
+ * irq_chip on a given irq_desc after it's been mapped.
+ *
+ * The host code and data structures are fairly agnostic to the fact that
+ * we use an open firmware device-tree. We do have references to struct
+ * device_node in two places: in irq_find_host() to find the host matching
+ * a given interrupt controller node, and of course as an argument to its
+ * counterpart host->ops->match() callback. However, those are treated as
+ * generic pointers by the core and the fact that it's actually a device-node
+ * pointer is purely a convention between callers and implementation. This
+ * code could thus be used on other architectures by replacing those two
+ * by some sort of arch-specific void * "token" used to identify interrupt
+ * controllers.
+ */
+struct irq_host;
+struct radix_tree_root;
+struct device_node;
+
+/**
+ * struct irq_host_ops - operations for managing per-domain hw irq numbers
+ *
+ * Functions below are provided by the host and called whenever a new mapping
+ * is created or an old mapping is disposed. The host can then proceed to
+ * whatever internal data structures management is required. It also needs
+ * to setup the irq_desc when returning from map().
+ */
+struct irq_host_ops {
+	/* Match an interrupt controller device node to a host, returns
+	 * 1 on a match
+	 */
+	int (*match)(struct irq_host *h, struct device_node *node);
+
+	/* Create or update a mapping between a virtual irq number and a hw
+	 * irq number. This is called only once for a given mapping.
+	 */
+	int (*map)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
+
+	/* Dispose of such a mapping */
+	void (*unmap)(struct irq_host *h, unsigned int virq);
+
+	/* Update of such a mapping  */
+	void (*remap)(struct irq_host *h, unsigned int virq, irq_hw_number_t hw);
+
+	/* Translate device-tree interrupt specifier from raw format coming
+	 * from the firmware to a irq_hw_number_t (interrupt line number) and
+	 * type (sense) that can be passed to set_irq_type(). In the absence
+	 * of this callback, irq_create_of_mapping() and irq_of_parse_and_map()
+	 * will return the hw number in the first cell and IRQ_TYPE_NONE for
+	 * the type (which amount to keeping whatever default value the
+	 * interrupt controller has for that line)
+	 */
+	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
+		     const u32 *intspec, unsigned int intsize,
+		     irq_hw_number_t *out_hwirq, unsigned int *out_type);
+};
+
+/**
+ * struct irq_host - a single irq domain. maps hw irq numbers to Linux irq.
+ * @link: entry in global irq_host list
+ * @revmap_type: Method of reverse mapping hwirq to Linux irq number
+ * @revmap_data: reverse map data
+ * @ops: irq domain operations (documented above)
+ * @host_data: irq controller driver data; core does not touch this pointer
+ * @inval_irq: hw irq number used for unassigned virqs
+ * @of_node: Optional pointer to the irq controllers device tree node.
+ *
+ * One irq_host is allocated for each range (domain) of Linux irq numbers
+ * allocated.  Typically, one irq_host is allocated per controller, but it
+ * is perfectly valid to manage multiple controllers with a single irq_host
+ * instance if need be.
+ */
+struct irq_host {
+	struct list_head	link;
+
+	/* type of reverse mapping technique */
+	unsigned int		revmap_type;
+#define IRQ_HOST_MAP_LEGACY     0 /* legacy 8259, gets irqs 1..15 */
+#define IRQ_HOST_MAP_NOMAP	1 /* no fast reverse mapping */
+#define IRQ_HOST_MAP_LINEAR	2 /* linear map of interrupts */
+#define IRQ_HOST_MAP_TREE	3 /* radix tree */
+	union {
+		struct {
+			unsigned int size;
+			unsigned int *revmap;
+		} linear;
+		struct radix_tree_root tree;
+	} revmap_data;
+	struct irq_host_ops	*ops;
+	void			*host_data;
+	irq_hw_number_t		inval_irq;
+
+	/* Optional device node pointer */
+	struct device_node	*of_node;
+};
+
+/**
+ * irq_alloc_host() - Allocate a new irq_host data structure
+ * @of_node: optional device-tree node of the interrupt controller
+ * @revmap_type: type of reverse mapping to use
+ * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
+ * @ops: map/unmap host callbacks
+ * @inval_irq: provide a hw number in that host space that is always invalid
+ *
+ * Allocates and initialize and irq_host structure. Note that in the case of
+ * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns
+ * for all legacy interrupts except 0 (which is always the invalid irq for
+ * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by
+ * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated
+ * later during boot automatically (the reverse mapping will use the slow path
+ * until that happens).
+ */
+extern struct irq_host *irq_alloc_host(struct device_node *of_node,
+				       unsigned int revmap_type,
+				       unsigned int revmap_arg,
+				       struct irq_host_ops *ops,
+				       irq_hw_number_t inval_irq);
+
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+	irq_hw_number_t	hwirq;
+	struct irq_host	*host;
+};
+extern struct irq_map_entry irq_map[NR_IRQS];
+
+extern irq_hw_number_t virq_to_hw(unsigned int virq);
+
+/**
+ * irq_find_host - Locates a host for a given device node
+ * @node: device-tree node of the interrupt controller
+ */
+extern struct irq_host *irq_find_host(struct device_node *node);
+
+/**
+ * irq_set_default_host - Set a "default" host
+ * @host: default host pointer
+ *
+ * For convenience, it's possible to set a "default" host that will be used
+ * whenever NULL is passed to irq_create_mapping(). It makes life easier for
+ * platforms that want to manipulate a few hard coded interrupt numbers that
+ * aren't properly represented in the device-tree.
+ */
+extern void irq_set_default_host(struct irq_host *host);
+
+/**
+ * irq_set_virq_count - Set the maximum number of virt irqs
+ * @count: number of linux virtual irqs, capped with NR_IRQS
+ *
+ * This is mainly for use by platforms like iSeries who want to program
+ * the virtual irq number in the controller to avoid the reverse mapping
+ */
+extern void irq_set_virq_count(unsigned int count);
+
+/**
+ * irq_create_mapping - Map a hardware interrupt into linux virq space
+ * @host: host owning this hardware interrupt or NULL for default host
+ * @hwirq: hardware irq number in that host space
+ *
+ * Only one mapping per hardware interrupt is permitted. Returns a linux
+ * virq number.
+ * If the sense/trigger is to be specified, set_irq_type() should be called
+ * on the number returned from that call.
+ */
+extern unsigned int irq_create_mapping(struct irq_host *host,
+				       irq_hw_number_t hwirq);
+
+/**
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
+ */
+extern void irq_dispose_mapping(unsigned int virq);
+
+/**
+ * irq_find_mapping - Find a linux virq from an hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a slow path, for use by generic code. It's expected that an
+ * irq controller implementation directly calls the appropriate low level
+ * mapping function.
+ */
+extern unsigned int irq_find_mapping(struct irq_host *host,
+				     irq_hw_number_t hwirq);
+
+/**
+ * irq_create_direct_mapping - Allocate a virq for direct mapping
+ * @host: host to allocate the virq for or NULL for default host
+ *
+ * This routine is used for irq controllers which can choose the hardware
+ * interrupt numbers they generate. In such a case it's simplest to use
+ * the linux virq as the hardware interrupt number.
+ */
+extern unsigned int irq_create_direct_mapping(struct irq_host *host);
+
+/**
+ * irq_radix_revmap_insert - Insert a hw irq to linux virq number mapping.
+ * @host: host owning this hardware interrupt
+ * @virq: linux irq number
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is for use by irq controllers that use a radix tree reverse
+ * mapping for fast lookup.
+ */
+extern void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
+				    irq_hw_number_t hwirq);
+
+/**
+ * irq_radix_revmap_lookup - Find a linux virq from a hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a fast path, for use by irq controller code that uses radix tree
+ * revmaps
+ */
+extern unsigned int irq_radix_revmap_lookup(struct irq_host *host,
+					    irq_hw_number_t hwirq);
+
+/**
+ * irq_linear_revmap - Find a linux virq from a hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a fast path, for use by irq controller code that uses linear
+ * revmaps. It does fallback to the slow path if the revmap doesn't exist
+ * yet and will create the revmap entry with appropriate locking
+ */
+
+extern unsigned int irq_linear_revmap(struct irq_host *host,
+				      irq_hw_number_t hwirq);
+
+
+
+/**
+ * irq_alloc_virt - Allocate virtual irq numbers
+ * @host: host owning these new virtual irqs
+ * @count: number of consecutive numbers to allocate
+ * @hint: pass a hint number, the allocator will try to use a 1:1 mapping
+ *
+ * This is a low level function that is used internally by irq_create_mapping()
+ * and that can be used by some irq controllers implementations for things
+ * like allocating ranges of numbers for MSIs. The revmaps are left untouched.
+ */
+extern unsigned int irq_alloc_virt(struct irq_host *host,
+				   unsigned int count,
+				   unsigned int hint);
+
+/**
+ * irq_free_virt - Free virtual irq numbers
+ * @virq: virtual irq number of the first interrupt to free
+ * @count: number of interrupts to free
+ *
+ * This function is the opposite of irq_alloc_virt. It will not clear reverse
+ * maps, this should be done previously by unmap'ing the interrupt. In fact,
+ * all interrupts covered by the range being freed should have been unmapped
+ * prior to calling this.
+ */
+extern void irq_free_virt(unsigned int virq, unsigned int count);
+
+
+#endif /* CONFIG_VIRQ */
+
+#endif /* _LINUX_VIRQ_H */
+#endif /* __KERNEL__ */
+
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 7d04780..f5207dc 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,6 @@
 
 obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
+obj-$(CONFIG_VIRQ) += virq.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/virq.c b/kernel/irq/virq.c
new file mode 100644
index 0000000..b3c0db3
--- /dev/null
+++ b/kernel/irq/virq.c
@@ -0,0 +1,687 @@
+/*
+ * Mapping support from per-controller hw irq numbers to linux irqs
+ *
+ *  Derived from arch/i386/kernel/irq.c
+ *    Copyright (C) 1992 Linus Torvalds
+ *  Adapted from arch/i386 by Gary Thomas
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ *    Copyright (C) 1996-2001 Cort Dougan
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *  Generalized for virtual irq mapping on all platformes by Grant Likely
+ *    Copyright (C) 2010 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/virq.h>
+#include <linux/of_irq.h>
+
+/*
+ * IRQ controller and virtual interrupts
+ */
+static LIST_HEAD(irq_hosts);
+static DEFINE_RAW_SPINLOCK(irq_big_lock);
+static unsigned int revmap_trees_allocated;
+static DEFINE_MUTEX(revmap_trees_mutex);
+struct irq_map_entry irq_map[NR_IRQS];
+static unsigned int irq_virq_count = NR_IRQS;
+static struct irq_host *irq_default_host;
+
+irq_hw_number_t virq_to_hw(unsigned int virq)
+{
+	return irq_map[virq].hwirq;
+}
+EXPORT_SYMBOL_GPL(virq_to_hw);
+
+static int default_irq_host_match(struct irq_host *h, struct device_node *np)
+{
+	return h->of_node != NULL && h->of_node == np;
+}
+
+struct irq_host *irq_alloc_host(struct device_node *of_node,
+				unsigned int revmap_type,
+				unsigned int revmap_arg,
+				struct irq_host_ops *ops,
+				irq_hw_number_t inval_irq)
+{
+	struct irq_host *host;
+	unsigned int size = sizeof(struct irq_host);
+	unsigned int i;
+	unsigned int *rmap;
+	unsigned long flags;
+
+	/* Allocate structure and revmap table if using linear mapping */
+	if (revmap_type == IRQ_HOST_MAP_LINEAR)
+		size += revmap_arg * sizeof(unsigned int);
+	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
+	if (host == NULL)
+		return NULL;
+
+	/* Fill structure */
+	host->revmap_type = revmap_type;
+	host->inval_irq = inval_irq;
+	host->ops = ops;
+	host->of_node = of_node_get(of_node);
+
+	if (host->ops->match == NULL)
+		host->ops->match = default_irq_host_match;
+
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* If it's a legacy controller, check for duplicates and
+	 * mark it as allocated (we use irq 0 host pointer for that
+	 */
+	if (revmap_type == IRQ_HOST_MAP_LEGACY) {
+		if (irq_map[0].host != NULL) {
+			raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+			/* If we are early boot, we can't free the structure,
+			 * too bad...
+			 * this will be fixed once slab is made available early
+			 * instead of the current cruft
+			 */
+			if (mem_init_done)
+				kfree(host);
+			return NULL;
+		}
+		irq_map[0].host = host;
+	}
+
+	list_add(&host->link, &irq_hosts);
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+
+	/* Additional setups per revmap type */
+	switch(revmap_type) {
+	case IRQ_HOST_MAP_LEGACY:
+		/* 0 is always the invalid number for legacy */
+		host->inval_irq = 0;
+		/* setup us as the host for all legacy interrupts */
+		for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
+			irq_map[i].hwirq = i;
+			smp_wmb();
+			irq_map[i].host = host;
+			smp_wmb();
+
+			/* Clear norequest flags */
+			irq_to_desc(i)->status &= ~IRQ_NOREQUEST;
+
+			/* Legacy flags are left to default at this point,
+			 * one can then use irq_create_mapping() to
+			 * explicitly change them
+			 */
+			ops->map(host, i, i);
+		}
+		break;
+	case IRQ_HOST_MAP_LINEAR:
+		rmap = (unsigned int *)(host + 1);
+		for (i = 0; i < revmap_arg; i++)
+			rmap[i] = NO_IRQ;
+		host->revmap_data.linear.size = revmap_arg;
+		smp_wmb();
+		host->revmap_data.linear.revmap = rmap;
+		break;
+	default:
+		break;
+	}
+
+	pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
+
+	return host;
+}
+
+struct irq_host *irq_find_host(struct device_node *node)
+{
+	struct irq_host *h, *found = NULL;
+	unsigned long flags;
+
+	/* We might want to match the legacy controller last since
+	 * it might potentially be set to match all interrupts in
+	 * the absence of a device node. This isn't a problem so far
+	 * yet though...
+	 */
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+	list_for_each_entry(h, &irq_hosts, link)
+		if (h->ops->match(h, node)) {
+			found = h;
+			break;
+		}
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+	return found;
+}
+EXPORT_SYMBOL_GPL(irq_find_host);
+
+void irq_set_default_host(struct irq_host *host)
+{
+	pr_debug("irq: Default host set to @0x%p\n", host);
+
+	irq_default_host = host;
+}
+
+void irq_set_virq_count(unsigned int count)
+{
+	pr_debug("irq: Trying to set virq count to %d\n", count);
+
+	BUG_ON(count < NUM_ISA_INTERRUPTS);
+	if (count < NR_IRQS)
+		irq_virq_count = count;
+}
+
+static int irq_setup_virq(struct irq_host *host, unsigned int virq,
+			    irq_hw_number_t hwirq)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc_alloc_node(virq, 0);
+	if (!desc) {
+		pr_debug("irq: -> allocating desc failed\n");
+		goto error;
+	}
+
+	/* Clear IRQ_NOREQUEST flag */
+	desc->status &= ~IRQ_NOREQUEST;
+
+	/* map it */
+	smp_wmb();
+	irq_map[virq].hwirq = hwirq;
+	smp_mb();
+
+	if (host->ops->map(host, virq, hwirq)) {
+		pr_debug("irq: -> mapping failed, freeing\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	irq_free_virt(virq, 1);
+	return -1;
+}
+
+unsigned int irq_create_direct_mapping(struct irq_host *host)
+{
+	unsigned int virq;
+
+	if (host == NULL)
+		host = irq_default_host;
+
+	BUG_ON(host == NULL);
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP);
+
+	virq = irq_alloc_virt(host, 1, 0);
+	if (virq == NO_IRQ) {
+		pr_debug("irq: create_direct virq allocation failed\n");
+		return NO_IRQ;
+	}
+
+	pr_debug("irq: create_direct obtained virq %d\n", virq);
+
+	if (irq_setup_virq(host, virq, virq))
+		return NO_IRQ;
+
+	return virq;
+}
+
+unsigned int irq_create_mapping(struct irq_host *host,
+				irq_hw_number_t hwirq)
+{
+	unsigned int virq, hint;
+
+	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
+
+	/* Look for default host if nececssary */
+	if (host == NULL)
+		host = irq_default_host;
+	if (host == NULL) {
+		printk(KERN_WARNING "irq_create_mapping called for"
+		       " NULL host, hwirq=%lx\n", hwirq);
+		WARN_ON(1);
+		return NO_IRQ;
+	}
+	pr_debug("irq: -> using host @%p\n", host);
+
+	/* Check if mapping already exist, if it does, call
+	 * host->ops->map() to update the flags
+	 */
+	virq = irq_find_mapping(host, hwirq);
+	if (virq != NO_IRQ) {
+		if (host->ops->remap)
+			host->ops->remap(host, virq, hwirq);
+		pr_debug("irq: -> existing mapping on virq %d\n", virq);
+		return virq;
+	}
+
+	/* Get a virtual interrupt number */
+	if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
+		/* Handle legacy */
+		virq = (unsigned int)hwirq;
+		if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
+			return NO_IRQ;
+		return virq;
+	} else {
+		/* Allocate a virtual interrupt number */
+		hint = hwirq % irq_virq_count;
+		virq = irq_alloc_virt(host, 1, hint);
+		if (virq == NO_IRQ) {
+			pr_debug("irq: -> virq allocation failed\n");
+			return NO_IRQ;
+		}
+	}
+
+	if (irq_setup_virq(host, virq, hwirq))
+		return NO_IRQ;
+
+	printk(KERN_DEBUG "irq: irq %lu on host %s mapped to virtual irq %u\n",
+		hwirq, host->of_node ? host->of_node->full_name : "null", virq);
+
+	return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_mapping);
+
+unsigned int irq_create_of_mapping(struct device_node *controller,
+				   const u32 *intspec, unsigned int intsize)
+{
+	struct irq_host *host;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	unsigned int virq;
+
+	if (controller == NULL)
+		host = irq_default_host;
+	else
+		host = irq_find_host(controller);
+	if (host == NULL) {
+		printk(KERN_WARNING "irq: no irq host found for %s !\n",
+		       controller->full_name);
+		return NO_IRQ;
+	}
+
+	/* If host has no translation, then we assume interrupt line */
+	if (host->ops->xlate == NULL)
+		hwirq = intspec[0];
+	else {
+		if (host->ops->xlate(host, controller, intspec, intsize,
+				     &hwirq, &type))
+			return NO_IRQ;
+	}
+
+	/* Create mapping */
+	virq = irq_create_mapping(host, hwirq);
+	if (virq == NO_IRQ)
+		return virq;
+
+	/* Set type if specified and different than the current one */
+	if (type != IRQ_TYPE_NONE &&
+	    type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
+		set_irq_type(virq, type);
+	return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+void irq_dispose_mapping(unsigned int virq)
+{
+	struct irq_host *host;
+	irq_hw_number_t hwirq;
+
+	if (virq == NO_IRQ)
+		return;
+
+	host = irq_map[virq].host;
+	WARN_ON (host == NULL);
+	if (host == NULL)
+		return;
+
+	/* Never unmap legacy interrupts */
+	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
+		return;
+
+	/* remove chip and handler */
+	set_irq_chip_and_handler(virq, NULL, NULL);
+
+	/* Make sure it's completed */
+	synchronize_irq(virq);
+
+	/* Tell the PIC about it */
+	if (host->ops->unmap)
+		host->ops->unmap(host, virq);
+	smp_mb();
+
+	/* Clear reverse map */
+	hwirq = irq_map[virq].hwirq;
+	switch(host->revmap_type) {
+	case IRQ_HOST_MAP_LINEAR:
+		if (hwirq < host->revmap_data.linear.size)
+			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
+		break;
+	case IRQ_HOST_MAP_TREE:
+		/*
+		 * Check if radix tree allocated yet, if not then nothing to
+		 * remove.
+		 */
+		smp_rmb();
+		if (revmap_trees_allocated < 1)
+			break;
+		mutex_lock(&revmap_trees_mutex);
+		radix_tree_delete(&host->revmap_data.tree, hwirq);
+		mutex_unlock(&revmap_trees_mutex);
+		break;
+	}
+
+	/* Destroy map */
+	smp_mb();
+	irq_map[virq].hwirq = host->inval_irq;
+
+	/* Set some flags */
+	irq_to_desc(virq)->status |= IRQ_NOREQUEST;
+
+	/* Free it */
+	irq_free_virt(virq, 1);
+}
+EXPORT_SYMBOL_GPL(irq_dispose_mapping);
+
+unsigned int irq_find_mapping(struct irq_host *host,
+			      irq_hw_number_t hwirq)
+{
+	unsigned int i;
+	unsigned int hint = hwirq % irq_virq_count;
+
+	/* Look for default host if nececssary */
+	if (host == NULL)
+		host = irq_default_host;
+	if (host == NULL)
+		return NO_IRQ;
+
+	/* legacy -> bail early */
+	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
+		return hwirq;
+
+	/* Slow path does a linear search of the map */
+	if (hint < NUM_ISA_INTERRUPTS)
+		hint = NUM_ISA_INTERRUPTS;
+	i = hint;
+	do  {
+		if (irq_map[i].host == host &&
+		    irq_map[i].hwirq == hwirq)
+			return i;
+		i++;
+		if (i >= irq_virq_count)
+			i = NUM_ISA_INTERRUPTS;
+	} while(i != hint);
+	return NO_IRQ;
+}
+EXPORT_SYMBOL_GPL(irq_find_mapping);
+
+
+unsigned int irq_radix_revmap_lookup(struct irq_host *host,
+				     irq_hw_number_t hwirq)
+{
+	struct irq_map_entry *ptr;
+	unsigned int virq;
+
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
+
+	/*
+	 * Check if the radix tree exists and has bee initialized.
+	 * If not, we fallback to slow mode
+	 */
+	if (revmap_trees_allocated < 2)
+		return irq_find_mapping(host, hwirq);
+
+	/* Now try to resolve */
+	/*
+	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us
+	 * as it's referencing an entry in the static irq_map table.
+	 */
+	ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq);
+
+	/*
+	 * If found in radix tree, then fine.
+	 * Else fallback to linear lookup - this should not happen in practice
+	 * as it means that we failed to insert the node in the radix tree.
+	 */
+	if (ptr)
+		virq = ptr - irq_map;
+	else
+		virq = irq_find_mapping(host, hwirq);
+
+	return virq;
+}
+
+void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
+			     irq_hw_number_t hwirq)
+{
+
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
+
+	/*
+	 * Check if the radix tree exists yet.
+	 * If not, then the irq will be inserted into the tree when it gets
+	 * initialized.
+	 */
+	smp_rmb();
+	if (revmap_trees_allocated < 1)
+		return;
+
+	if (virq != NO_IRQ) {
+		mutex_lock(&revmap_trees_mutex);
+		radix_tree_insert(&host->revmap_data.tree, hwirq,
+				  &irq_map[virq]);
+		mutex_unlock(&revmap_trees_mutex);
+	}
+}
+
+unsigned int irq_linear_revmap(struct irq_host *host,
+			       irq_hw_number_t hwirq)
+{
+	unsigned int *revmap;
+
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+
+	/* Check revmap bounds */
+	if (unlikely(hwirq >= host->revmap_data.linear.size))
+		return irq_find_mapping(host, hwirq);
+
+	/* Check if revmap was allocated */
+	revmap = host->revmap_data.linear.revmap;
+	if (unlikely(revmap == NULL))
+		return irq_find_mapping(host, hwirq);
+
+	/* Fill up revmap with slow path if no mapping found */
+	if (unlikely(revmap[hwirq] == NO_IRQ))
+		revmap[hwirq] = irq_find_mapping(host, hwirq);
+
+	return revmap[hwirq];
+}
+
+unsigned int irq_alloc_virt(struct irq_host *host,
+			    unsigned int count,
+			    unsigned int hint)
+{
+	unsigned long flags;
+	unsigned int i, j, found = NO_IRQ;
+
+	if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
+		return NO_IRQ;
+
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* Use hint for 1 interrupt if any */
+	if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
+	    hint < irq_virq_count && irq_map[hint].host == NULL) {
+		found = hint;
+		goto hint_found;
+	}
+
+	/* Look for count consecutive numbers in the allocatable
+	 * (non-legacy) space
+	 */
+	for (i = NUM_ISA_INTERRUPTS, j = 0; i < irq_virq_count; i++) {
+		if (irq_map[i].host != NULL)
+			j = 0;
+		else
+			j++;
+
+		if (j == count) {
+			found = i - count + 1;
+			break;
+		}
+	}
+	if (found == NO_IRQ) {
+		raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+		return NO_IRQ;
+	}
+ hint_found:
+	for (i = found; i < (found + count); i++) {
+		irq_map[i].hwirq = host->inval_irq;
+		smp_wmb();
+		irq_map[i].host = host;
+	}
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+	return found;
+}
+
+void irq_free_virt(unsigned int virq, unsigned int count)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	WARN_ON (virq < NUM_ISA_INTERRUPTS);
+	WARN_ON (count == 0 || (virq + count) > irq_virq_count);
+
+	raw_spin_lock_irqsave(&irq_big_lock, flags);
+	for (i = virq; i < (virq + count); i++) {
+		struct irq_host *host;
+
+		if (i < NUM_ISA_INTERRUPTS ||
+		    (virq + count) > irq_virq_count)
+			continue;
+
+		host = irq_map[i].host;
+		irq_map[i].hwirq = host->inval_irq;
+		smp_wmb();
+		irq_map[i].host = NULL;
+	}
+	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
+}
+
+/* We need to create the radix trees late */
+static int irq_late_init(void)
+{
+	struct irq_host *h;
+	unsigned int i;
+
+	/*
+	 * No mutual exclusion with respect to accessors of the tree is needed
+	 * here as the synchronization is done via the state variable
+	 * revmap_trees_allocated.
+	 */
+	list_for_each_entry(h, &irq_hosts, link) {
+		if (h->revmap_type == IRQ_HOST_MAP_TREE)
+			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
+	}
+
+	/*
+	 * Make sure the radix trees inits are visible before setting
+	 * the flag
+	 */
+	smp_wmb();
+	revmap_trees_allocated = 1;
+
+	/*
+	 * Insert the reverse mapping for those interrupts already present
+	 * in irq_map[].
+	 */
+	mutex_lock(&revmap_trees_mutex);
+	for (i = 0; i < irq_virq_count; i++) {
+		if (irq_map[i].host &&
+		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
+			radix_tree_insert(&irq_map[i].host->revmap_data.tree,
+					  irq_map[i].hwirq, &irq_map[i]);
+	}
+	mutex_unlock(&revmap_trees_mutex);
+
+	/*
+	 * Make sure the radix trees insertions are visible before setting
+	 * the flag
+	 */
+	smp_wmb();
+	revmap_trees_allocated = 2;
+
+	return 0;
+}
+arch_initcall(irq_late_init);
+
+#ifdef CONFIG_VIRQ_DEBUG
+static int virq_debug_show(struct seq_file *m, void *private)
+{
+	unsigned long flags;
+	struct irq_desc *desc;
+	const char *p;
+	char none[] = "none";
+	int i;
+
+	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
+		      "chip name", "host name");
+
+	for (i = 1; i < nr_irqs; i++) {
+		desc = irq_to_desc(i);
+		if (!desc)
+			continue;
+
+		raw_spin_lock_irqsave(&desc->lock, flags);
+
+		if (desc->action && desc->action->handler) {
+			seq_printf(m, "%5d  ", i);
+			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
+
+			if (desc->chip && desc->chip->name)
+				p = desc->chip->name;
+			else
+				p = none;
+			seq_printf(m, "%-15s  ", p);
+
+			if (irq_map[i].host && irq_map[i].host->of_node)
+				p = irq_map[i].host->of_node->full_name;
+			else
+				p = none;
+			seq_printf(m, "%s\n", p);
+		}
+
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	return 0;
+}
+
+static int virq_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, virq_debug_show, inode->i_private);
+}
+
+static const struct file_operations virq_debug_fops = {
+	.open = virq_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __init irq_debugfs_init(void)
+{
+	if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root,
+				 NULL, &virq_debug_fops) == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+__initcall(irq_debugfs_init);
+#endif /* CONFIG_VIRQ_DEBUG */
+

^ permalink raw reply related

* Re: [U-Boot] cuImage and multi image?
From: Shawn Jin @ 2010-09-22 20:22 UTC (permalink / raw)
  To: Chen, Tiejun; +Cc: Scott Wood, ppcdev, uboot
In-Reply-To: <52CF90264091A14888078A031D780F4306C8C0E5@ism-mail03.corp.ad.wrs.com>

>> I have a large ramdisk image. The size of the image itself (i.e. the
>> *.gz) is about 4MB. When the ramdisk was being decompressed
>
> Did you try to change link_address on the file, arch/powerpc/boot/wrapper?

No. I don't have to. Right? The link_address is still 0x400000.

> Did you try boot the uImage and the ramdisk separately? For example, you can boot this as the following command:
> # bootm ${kernel_addr} ${ramdisk_addr} ${fdt_addr}

Mine is a cuImage. I'm pretty sure that my ramdisk is valid when it's
a separate image. I used "bootm <kernel_addr> <ramdisk_addr>" to boot.

> Can you paste the whole log from the u-boot prompt?

In the previous run the ramdisk image was corrupted because the single
image was loaded at 0x800000. But the boot message showed that the
initrd image was at 0x0066c000-0x009ae825. So it was over the 8MB
area.

However after the load address was changed to 0x04000000 (64MB), the
ramdisk still seemed corrupted but with different error messages.

=> bootm
## Booting image at 04000000 ...
   Image Name:   Linux-2.6.33.5
   Image Type:   PowerPC Linux Kernel Image (gzip compressed)
   Data Size:    4424922 Bytes =  4.2 MB
   Load Address: 00400000
   Entry Point:  00400554
   Verifying Checksum ... OK
   Uncompressing Kernel Image ... OK
Memory <- <0x0 0x8000000> (128MB)
ENET0: local-mac-address <- 00:09:9b:01:58:64
CPU clock-frequency <- 0x7270e00 (120MHz)
CPU timebase-frequency <- 0x7270e0 (8MHz)
CPU bus-frequency <- 0x3938700 (60MHz)

zImage starting: loaded at 0x00400000 (sp: 0x07d1cbd0)
Allocating 0x22a1e1 bytes for kernel ...
gunzipping (0x00000000 <- 0x0040c000:0x0066b0ac)...done 0x21c6c8 bytes
Attached initrd image at 0x0066c000-0x009ae825
initrd head: 0x1f8b0808

Linux/PowerPC load: root=/dev/ram
Finalizing device tree... flat tree at 0x9bb300
Using my870 machine description
Linux version 2.6.33.5 (shawn@ubuntu) (gcc version 4.2.2) #4 Tue Sep
21 09:23:51 PDT 2010
Found initrd at 0xc066c000:0xc09ae825
Zone PFN ranges:
  DMA      0x00000000 -> 0x00008000
  Normal   0x00008000 -> 0x00008000
Movable zone start PFN for each node
early_node_map[1] active PFN ranges
    0: 0x00000000 -> 0x00008000
MMU: Allocated 72 bytes of context maps for 16 contexts
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 32512
Kernel command line: root=/dev/ram
PID hash table entries: 512 (order: -1, 2048 bytes)
Dentry cache hash table entries: 16384 (order: 4, 65536 bytes)
Inode-cache hash table entries: 8192 (order: 3, 32768 bytes)
Memory: 124072k/131072k available (2080k kernel code, 6836k reserved,
84k data, 52k bss, 104k init)
Kernel virtual memory layout:
  * 0xfffdf000..0xfffff000  : fixmap
  * 0xfde00000..0xfe000000  : consistent mem
  * 0xfddfa000..0xfde00000  : early ioremap
  * 0xc9000000..0xfddfa000  : vmalloc & ioremap
SLUB: Genslabs=12, HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
....
<snipped>
....
RAMDISK: gzip image found at block 0
uncompression error
VFS: Mounted root (ext2 filesystem) readonly on device 1:0.
Freeing unused kernel memory: 104k init
EXT2-fs (ram0): error: ext2_check_page: bad entry in directory #336: :
unaligned directory entry - offset=0, inode=74187384, rec_len=2081,
name_len=126
EXT2-fs (ram0): error: remounting filesystem read-only
attempt to access beyond end of device
ram0: rw=0, want=156831968, limit=32768
Buffer I/O error on device ram0, logical block 78415983
attempt to access beyond end of device
ram0: rw=0, want=112233212, limit=32768
Buffer I/O error on device ram0, logical block 56116605
attempt to access beyond end of device
ram0: rw=0, want=6626681482, limit=32768
Buffer I/O error on device ram0, logical block 3313340740
attempt to access beyond end of device
ram0: rw=0, want=184684282, limit=32768
Buffer I/O error on device ram0, logical block 92342140
Kernel panic - not syncing: No init found.  Try passing init= option to kernel.
Call Trace:
[c7821f30] [c0006cd8] show_stack+0x40/0x168 (unreliable)
[c7821f70] [c001cefc] panic+0x8c/0x178
[c7821fc0] [c00026d4] init_post+0xe4/0xf4
[c7821fd0] [c01ee224] kernel_init+0x108/0x130
[c7821ff0] [c000dcc0] kernel_thread+0x4c/0x68
Rebooting in 180 seconds..

Thanks,
-Shawn.

^ permalink raw reply

* Re: Oops in trace_hardirqs_on (powerpc)
From: Steven Rostedt @ 2010-09-22 19:44 UTC (permalink / raw)
  To: Jörg Sommer
  Cc: Frederic Weisbecker, Ingo Molnar, linux-kernel, linuxppc-dev
In-Reply-To: <20100806233157.GA7117@alea.gnuu.de>

Sorry for the late reply, but I was on vacation when you sent this, and
I missed it while going through email.

Do you still have this issue?

-- Steve


On Sat, 2010-08-07 at 01:31 +0200, Jörg Sommer wrote:
> Hi,
> 
> I've built my 2.6.35 with tracing support and now, I'm getting
> continuously oops'. It seems to happen on high process activity.
> 
> [   52.336371] device eth0 entered promiscuous mode
> [   52.347616] device eth0 left promiscuous mode
> [   55.240663] Unable to handle kernel paging request for data at address 0xbfaf4a24
> [   55.248289] Faulting instruction address: 0xc00aad98
> [   55.255562] Oops: Kernel access of bad area, sig: 11 [#1]
> [   55.262588] PowerMac
> [   55.269606] last sysfs file: /sys/devices/pci0000:00/0000:00:10.0/graphics/fb0/radeonbl0/brightness
> [   55.277111] Modules linked in: fuse snd_powermac option usb_wwan usbserial ecb b43 snd_aoa_i2sbus snd_pcm_oss 
> [   55.302368] NIP: c00aad98 LR: c001771c CTR: c003dba0
> [   55.310738] REGS: e3211e70 TRAP: 0300   Not tainted  (2.6.35)
> [   55.319122] MSR: 00001032 <ME,IR,DR>  CR: 22f88f42  XER: 20000000
> [   55.327650] DAR: bfaf4a24, DSISR: 40000000
> [   55.335954] TASK = e3245bc0[1929] 'sh' THREAD: e3210000
> [   55.336144] GPR00: 00000000 e3211f20 e3245bc0 e3245bc0 c000b944 00000000 003a1040 00000000 
> [   55.344859] GPR08: bfaf4a20 c05e0000 c0614d18 c0610000 00000000 10033368 10018520 10007c2c 
> [   55.353723] GPR16: 10007c30 00000000 00000000 00000000 bfecaa10 101d8304 10019c28 bfecbfab 
> [   55.362438] GPR24: bfecaa08 10019c58 000006d1 00000000 c063be80 bfeca9a0 0ffebff4 e3211f20 
> [   55.378913] NIP [c00aad98] trace_hardirqs_on+0x5c/0x124
> [   55.386856] LR [c001771c] restore+0x10/0x6c
> [   55.394527] Call Trace:
> [   55.401878] [e3211f20] [10019c58] 0x10019c58 (unreliable)
> [   55.409437] [e3211f40] [c001771c] restore+0x10/0x6c
> [   55.417065] --- Exception: c00 at 0xff23c88
> [   55.417071]     LR = 0xff23c54
> [   55.432267] Instruction dump:
> [   55.439808] 800a005c 70090002 418200c8 7c0000a6 70008000 408200bc 3d20c05e 838a0058 
> [   55.447730] 81096f98 2f880000 811f0000 81080000 <83680004> 41be009c 816b4d18 90096f98 
> [   55.455722] ---[ end trace 547f1189532873f7 ]---
> [  390.022834] EXT4-fs (dm-0): mounted filesystem with ordered data mode. Opts: (null)
> 
> [  507.793120] lo: Disabled Privacy Extensions
> [  518.228969] eth0: no IPv6 routers present
> [  737.593898] Unable to handle kernel paging request for data at address 0x00000004
> [  737.593927] Faulting instruction address: 0xc00aad98
> [  737.593957] Oops: Kernel access of bad area, sig: 11 [#2]
> [  737.593967] PowerMac
> [  737.593976] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
> [  737.593992] Modules linked in: ppp_async crc_ccitt ipv6 ppp_generic slhc fuse snd_powermac option usb_wwan usb
> [  737.594132] NIP: c00aad98 LR: c001771c CTR: c003dba0
> [  737.594148] REGS: e685de70 TRAP: 0300   Tainted: G      D      (2.6.35)
> [  737.594159] MSR: 00001032 <ME,IR,DR>  CR: 24000042  XER: 20000000
> [  737.594187] DAR: 00000004, DSISR: 40000000
> [  737.594198] TASK = e30b3780[3322] 'zsh-beta' THREAD: e685c000
> [  737.594208] GPR00: 00000000 e685df20 e30b3780 e30b3780 c000b944 00000000 003e5f00 00000000 
> [  737.594240] GPR08: 00000000 c05e0000 c0614d18 c0610000 00000000 100b4ee8 10092dec 00000000 
> [  737.594271] GPR16: 100bb400 100916fc 00000000 bfbda1b0 bfbda4ec 00000000 00000000 00000000 
> [  737.594303] GPR24: 100b0000 100bae50 00000cea 00000000 c063be80 bfbd9e60 0fe64ff4 e685df20 
> [  737.594362] NIP [c00aad98] trace_hardirqs_on+0x5c/0x124
> [  737.594379] LR [c001771c] restore+0x10/0x6c
> [  737.594388] Call Trace:
> [  737.594402] [e685df20] [100bae50] 0x100bae50 (unreliable)
> [  737.594421] [e685df40] [c001771c] restore+0x10/0x6c
> [  737.594432] Instruction dump:
> [  737.594442] 800a005c 70090002 418200c8 7c0000a6 70008000 408200bc 3d20c05e 838a0058 
> [  737.594473] 81096f98 2f880000 811f0000 81080000 <83680004> 41be009c 816b4d18 90096f98 
> [  737.594514] ---[ end trace 547f1189532873f8 ]---
> [  737.919108] Unable to handle kernel paging request for data at address 0x00000003
> [  737.919137] Faulting instruction address: 0xc00aad98
> [  737.919168] Oops: Kernel access of bad area, sig: 11 [#3]
> [  737.919179] PowerMac
> [  737.919187] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
> [  737.919203] Modules linked in: ppp_async crc_ccitt ipv6 ppp_generic slhc fuse snd_powermac option usb_wwan usb
> [  737.919342] NIP: c00aad98 LR: c001771c CTR: 00000000
> [  737.919358] REGS: e6d15e70 TRAP: 0300   Tainted: G      D      (2.6.35)
> [  737.919369] MSR: 00001032 <ME,IR,DR>  CR: 24ffff42  XER: 00000000
> [  737.919397] DAR: 00000003, DSISR: 40000000
> [  737.919409] TASK = e30b3780[3350] 'zsh-beta' THREAD: e6d14000
> [  737.919419] GPR00: 00000000 e6d15f20 e30b3780 e30b3780 c000b944 00000000 0065df00 00000008 
> [  737.919451] GPR08: ffffffff c05e0000 c0614d18 c0610000 ffffffff 100b4ee8 100ad1e8 00000004 
> [  737.919483] GPR16: 100bb400 100916fc 00000000 bfbdad70 bfbdb0a8 10091e04 10091e08 100ad314 
> [  737.919515] GPR24: 100b0000 100bae50 00000cea 00000000 c063be80 bfbdaa20 0fe64ff4 e6d15f20 
> [  737.919576] NIP [c00aad98] trace_hardirqs_on+0x5c/0x124
> [  737.919593] LR [c001771c] restore+0x10/0x6c
> [  737.919602] Call Trace:
> [  737.919616] [e6d15f20] [100bae50] 0x100bae50 (unreliable)
> [  737.919635] [e6d15f40] [c001771c] restore+0x10/0x6c
> [  737.919646] Instruction dump:
> [  737.919657] 800a005c 70090002 418200c8 7c0000a6 70008000 408200bc 3d20c05e 838a0058 
> [  737.919688] 81096f98 2f880000 811f0000 81080000 <83680004> 41be009c 816b4d18 90096f98 
> [  737.919728] ---[ end trace 547f1189532873f9 ]---
> 
> % uname -a
> Linux ibook 2.6.35 #33 Fri Aug 6 21:44:01 CEST 2010 ppc GNU/Linux
> 
> % cat /proc/cpuinfo
> processor	: 0
> cpu		: 7455, altivec supported
> clock		: 606.000000MHz
> revision	: 3.3 (pvr 8001 0303)
> bogomips	: 36.86
> timebase	: 18432000
> platform	: PowerMac
> model		: PowerBook6,3
> machine		: PowerBook6,3
> motherboard	: PowerBook6,3 MacRISC3 Power Macintosh
> detected as	: 287 (iBook G4)
> pmac flags	: 0000001b
> L2 cache	: 256K unified
> pmac-generation	: NewWorld
> Memory		: 640 MB
> 
> My config is at <http://alioth.debian.org/~jo-guest/config-2.6.35>. With
> the version 2.6.35-rc6 and the former config I didn't have this problem.
> 
> http://alioth.debian.org/~jo-guest/config-2.6.35-rc6
> http://alioth.debian.org/~jo-guest/kern.log
> 
> (gdb) disassemble trace_hardirqs_on
> Dump of assembler code for function trace_hardirqs_on:
>    0xc00aad3c <+0>:     stwu    r1,-32(r1)
>    0xc00aad40 <+4>:     mflr    r0
>    0xc00aad44 <+8>:     stw     r0,36(r1)
>    0xc00aad48 <+12>:    stw     r27,12(r1)
>    0xc00aad4c <+16>:    stw     r28,16(r1)
>    0xc00aad50 <+20>:    stw     r29,20(r1)
>    0xc00aad54 <+24>:    stw     r30,24(r1)
>    0xc00aad58 <+28>:    stw     r31,28(r1)
>    0xc00aad5c <+32>:    mr      r31,r1
>    0xc00aad60 <+36>:    lis     r11,-16287
>    0xc00aad64 <+40>:    addi    r10,r11,19736
>    0xc00aad68 <+44>:    lwz     r0,92(r10)
>    0xc00aad6c <+48>:    andi.   r9,r0,2
>    0xc00aad70 <+52>:    beq     0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aad74 <+56>:    mfmsr   r0
>    0xc00aad78 <+60>:    andi.   r0,r0,32768
>    0xc00aad7c <+64>:    bne     0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aad80 <+68>:    lis     r9,-16290
>    0xc00aad84 <+72>:    lwz     r28,88(r10)
>    0xc00aad88 <+76>:    lwz     r8,28568(r9)
>    0xc00aad8c <+80>:    cmpwi   cr7,r8,0
>    0xc00aad90 <+84>:    lwz     r8,0(r31)
>    0xc00aad94 <+88>:    lwz     r8,0(r8)
>    0xc00aad98 <+92>:    lwz     r27,4(r8)
>    0xc00aad9c <+96>:    beq     cr7,0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aada0 <+100>:   lwz     r11,19736(r11)
>    0xc00aada4 <+104>:   stw     r0,28568(r9)
>    0xc00aada8 <+108>:   cmpwi   cr7,r11,0
>    0xc00aadac <+112>:   beq     cr7,0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aadb0 <+116>:   lwz     r30,28(r28)
>    0xc00aadb4 <+120>:   cmpwi   cr7,r30,0
>    0xc00aadb8 <+124>:   beq     cr7,0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aadbc <+128>:   lwz     r0,12(r30)
>    0xc00aadc0 <+132>:   cmpwi   cr7,r0,0
>    0xc00aadc4 <+136>:   beq     cr7,0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aadc8 <+140>:   lwz     r0,0(r30)
>    0xc00aadcc <+144>:   cmpwi   cr7,r0,0
>    0xc00aadd0 <+148>:   bne     cr7,0xc00aae38 <trace_hardirqs_on+252>
>    0xc00aadd4 <+152>:   mflr    r29
>    0xc00aadd8 <+156>:   lwarx   r0,0,r30
>    0xc00aaddc <+160>:   addic   r0,r0,1
> 
> Bye, Jörg.
> -- 
> Two types have compatible type if their types are the same.
> [ANSI C, 6.2.7]

^ permalink raw reply

* Re: [PATCH 0/8] De-couple sysfs memory directories from memory sections
From: Dave Hansen @ 2010-09-22 18:58 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: linux-mm, Greg KH, linux-kernel, KAMEZAWA Hiroyuki, linuxppc-dev
In-Reply-To: <4C9A4DBB.6080500@austin.ibm.com>

On Wed, 2010-09-22 at 13:40 -0500, Nathan Fontenot wrote:
> On 09/22/2010 10:20 AM, Dave Hansen wrote:
> >                            and phys_index's calculation needs to be:
> > 
> > 	mem->start_phys_index * SECTION_SIZE / memory_block_size_bytes()
> 
> I'm not sure if  I follow where you suggest using this formula.  Is this
> instead of what is used now, the base_memory_block_id() calculation?
> 
> If so, then I'm not sure it would work. The formula used in base_memory_block_id()
> is done because the memory sections are not guaranteed to be added to the
> memory block starting with the first section of the block.
> 
> If you meant somewhere else let me know.

My point was just that if we change the "block_size_bytes" contents,
then we have to scale down the "memoryXXXX/phys_index" by that same
amount.

It *used* to be in numbers of SECTION_SIZE units, and I think it still
is:

-       mem->start_phys_index = __section_nr(section);
+       mem->start_phys_index = base_memory_block_id(__section_nr(section));
+       mem->end_phys_index = mem->start_phys_index + sections_per_block - 1;

but now it needs to be changed to be in memory_block_size_bytes() units,
*NOT* SECTION_SIZE units.

Let's say we have a system with 4 16MB sections starting at 0x0.
Before, we would have:

	block_size_bytes: 16777216
	memory0/phys_index: 0
	memory1/phys_index: 1
	memory2/phys_index: 2
	memory3/phys_index: 3

Now, we change memory_block_size_bytes() to be 32MB instead.  We reduce
the number of sections in half, and I think the right thing to get is:

	block_size_bytes: 33554432
	memory0/phys_index: 0
	memory1/phys_index: 1

I think, with your code (as it stands in these patches, no fixes) that
we'd instead get this:

	block_size_bytes: 16777216
	memory0/phys_index: 0
	memory1/phys_index: 2

Without consulting "end_phys_index" (which isn't and can't be a part of
the existing ABI), we'd think that we have two 16MB banks instead of
four.


-- Dave

^ permalink raw reply

* Re: [PATCH 0/8] De-couple sysfs memory directories from memory sections
From: Nathan Fontenot @ 2010-09-22 18:40 UTC (permalink / raw)
  To: Dave Hansen
  Cc: linux-mm, Greg KH, linux-kernel, KAMEZAWA Hiroyuki, linuxppc-dev
In-Reply-To: <1285168800.3292.5228.camel@nimitz>

On 09/22/2010 10:20 AM, Dave Hansen wrote:
> On Wed, 2010-09-22 at 09:15 -0500, Nathan Fontenot wrote:
>> For architectures that define their own version of this routine,
>> as is done for powerpc in this patchset, the view in userspace
>> would change such that each memoryXXX directory would span
>> multiple memory sections.  The number of sections spanned would
>> depend on the value reported by memory_block_size_bytes.
>>
>> In both cases a new file 'end_phys_index' is created in each
>> memoryXXX directory.  This file will contain the physical id
>> of the last memory section covered by the sysfs directory.  For
>> the default case, the value in 'end_phys_index' will be the same
>> as in the existing 'phys_index' file.
> 
> Hi Nathan,
> 
> There's one bit missing here, I think.
> 
> "block_size_bytes" today means two things today:
> 1. the SECTION_SIZE from sparsemem
> 2. the size covered by each memoryXXXX directory
> 
> SECTION_SIZE isn't exposed to userspace, but the memoryXXXX directories
> are.  You've done all of the heavy lifting here to make sure that the
> memory directories are no longer bound to SECTION_SIZE, but you've also
> broken the assumption that _each_ directory covers "block_size_bytes".
> 
> I think it's fairly simple to fix.  block_size_bytes() needs to return
> memory_block_size_bytes(),

yes, missed that.  I will update the patch set to include this.

>                            and phys_index's calculation needs to be:
> 
> 	mem->start_phys_index * SECTION_SIZE / memory_block_size_bytes()

I'm not sure if  I follow where you suggest using this formula.  Is this
instead of what is used now, the base_memory_block_id() calculation?

If so, then I'm not sure it would work. The formula used in base_memory_block_id()
is done because the memory sections are not guaranteed to be added to the
memory block starting with the first section of the block.

If you meant somewhere else let me know.

-Nathan
> 
> That way, to userspace, it just looks like before, but with a larger
> SECTION_SIZE.  Doing that preserves the ABI pretty nicely, I believe.
> 
> -- Dave
> 

^ permalink raw reply

* MPC8641D PEX: programming OWBAR in Endpoint mode?
From: david.hagood @ 2010-09-22 15:55 UTC (permalink / raw)
  To: linuxppc-dev

I am trying to get the PCIe interfaces of a Freescale MPC8641D working in
endpoint mode (i.e. as a PCI device rather than a PCI root complex host).

I can get the device to show up on the host's PCI bus, I can program the
inbound ATMUs such that the BARS are updated when the host (re-)scans
them, but I cannot for the life of me get the PPC's Outbound ATMUS to
work.

When I attempt to program them, I can program ALL the registers EXCEPT the
OWBAR - which steadfastly remains 0 no matter what I write to it.

As a result, when I attempt to bus master out from the PPC to the PCIe
address spaces via the outbound ATMUs, I get a bus fault on the PPC as
there is no device at the address I am accessing.

I've double-checked the LAWs to make sure the PEX is mapped into local
space, I've put the OWBAR in that address space, I've tried different
outbound ATMUs, and NOTHING works. Not ATMU0, not ATMU1, etc.

I've been trying to work with our Freescale rep, but I am getting nowhere
on that front.

Does anybody have any suggestions on what I might be doing wrong? I mean,
it looks like it should be a simple out_be32(addr_of_OWBAR,value), just
like all the other accesses to the ATMU registers that seem to be working
(as in, I read back what I wrote).

^ permalink raw reply

* Re: [PATCH 0/8] De-couple sysfs memory directories from memory sections
From: Dave Hansen @ 2010-09-22 15:20 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: linux-mm, Greg KH, linux-kernel, KAMEZAWA Hiroyuki, linuxppc-dev
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

On Wed, 2010-09-22 at 09:15 -0500, Nathan Fontenot wrote:
> For architectures that define their own version of this routine,
> as is done for powerpc in this patchset, the view in userspace
> would change such that each memoryXXX directory would span
> multiple memory sections.  The number of sections spanned would
> depend on the value reported by memory_block_size_bytes.
> 
> In both cases a new file 'end_phys_index' is created in each
> memoryXXX directory.  This file will contain the physical id
> of the last memory section covered by the sysfs directory.  For
> the default case, the value in 'end_phys_index' will be the same
> as in the existing 'phys_index' file.

Hi Nathan,

There's one bit missing here, I think.

"block_size_bytes" today means two things today:
1. the SECTION_SIZE from sparsemem
2. the size covered by each memoryXXXX directory

SECTION_SIZE isn't exposed to userspace, but the memoryXXXX directories
are.  You've done all of the heavy lifting here to make sure that the
memory directories are no longer bound to SECTION_SIZE, but you've also
broken the assumption that _each_ directory covers "block_size_bytes".

I think it's fairly simple to fix.  block_size_bytes() needs to return
memory_block_size_bytes(), and phys_index's calculation needs to be:

	mem->start_phys_index * SECTION_SIZE / memory_block_size_bytes()

That way, to userspace, it just looks like before, but with a larger
SECTION_SIZE.  Doing that preserves the ABI pretty nicely, I believe.

-- Dave

^ permalink raw reply

* [PATCH 8/8] Update memory hotplug documentation
From: Nathan Fontenot @ 2010-09-22 14:36 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Update the memory hotplug documentation to reflect the new behaviors of
memory blocks reflected in sysfs.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 Documentation/memory-hotplug.txt |   46 +++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

Index: linux-next/Documentation/memory-hotplug.txt
===================================================================
--- linux-next.orig/Documentation/memory-hotplug.txt	2010-09-21 11:59:22.000000000 -0500
+++ linux-next/Documentation/memory-hotplug.txt	2010-09-21 12:39:05.000000000 -0500
@@ -126,36 +126,50 @@ config options.
 --------------------------------
 4 sysfs files for memory hotplug
 --------------------------------
-All sections have their device information under /sys/devices/system/memory as
+All sections have their device information in sysfs.  Each section is part of
+a memory block under /sys/devices/system/memory as
 
 /sys/devices/system/memory/memoryXXX
-(XXX is section id.)
+(XXX is the section id.)
 
-Now, XXX is defined as start_address_of_section / section_size.
+Now, XXX is defined as (start_address_of_section / section_size) of the first
+section contained in the memory block.  The files 'phys_index' and
+'end_phys_index' under each directory report the beginning and end section id's
+for the memory block covered by the sysfs directory.  It is expected that all
+memory sections in this range are present and no memory holes exist in the
+range. Currently there is no way to determine if there is a memory hole, but
+the existence of one should not affect the hotplug capabilities of the memory
+block.
 
 For example, assume 1GiB section size. A device for a memory starting at
 0x100000000 is /sys/device/system/memory/memory4
 (0x100000000 / 1Gib = 4)
 This device covers address range [0x100000000 ... 0x140000000)
 
-Under each section, you can see 4 files.
+Under each section, you can see 5 files.
 
-/sys/devices/system/memory/memoryXXX/phys_index
+/sys/devices/system/memory/memoryXXX/start_phys_index
+/sys/devices/system/memory/memoryXXX/end_phys_index
 /sys/devices/system/memory/memoryXXX/phys_device
 /sys/devices/system/memory/memoryXXX/state
 /sys/devices/system/memory/memoryXXX/removable
 
-'phys_index' : read-only and contains section id, same as XXX.
-'state'      : read-write
-               at read:  contains online/offline state of memory.
-               at write: user can specify "online", "offline" command
-'phys_device': read-only: designed to show the name of physical memory device.
-               This is not well implemented now.
-'removable'  : read-only: contains an integer value indicating
-               whether the memory section is removable or not
-               removable.  A value of 1 indicates that the memory
-               section is removable and a value of 0 indicates that
-               it is not removable.
+'phys_index'      : read-only and contains section id of the first section
+		    in the memory block, same as XXX.
+'end_phys_index'  : read-only and contains section id of the last section
+		    in the memory block.
+'state'           : read-write
+                    at read:  contains online/offline state of memory.
+                    at write: user can specify "online", "offline" command
+                    which will be performed on al sections in the block.
+'phys_device'     : read-only: designed to show the name of physical memory
+                    device.  This is not well implemented now.
+'removable'       : read-only: contains an integer value indicating
+                    whether the memory block is removable or not
+                    removable.  A value of 1 indicates that the memory
+                    block is removable and a value of 0 indicates that
+                    it is not removable. A memory block is removable only if
+                    every section in the block is removable.
 
 NOTE:
   These directories/files appear after physical memory hotplug phase.

^ permalink raw reply

* [PATCH 7/8] Define memory_block_size_bytes() for powerpc/pseries
From: Nathan Fontenot @ 2010-09-22 14:35 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Define a version of memory_block_size_bytes() for powerpc/pseries such that
a memory block spans an entire lmb.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   66 +++++++++++++++++++-----
 1 file changed, 53 insertions(+), 13 deletions(-)

Index: linux-next/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- linux-next.orig/arch/powerpc/platforms/pseries/hotplug-memory.c	2010-09-21 11:59:24.000000000 -0500
+++ linux-next/arch/powerpc/platforms/pseries/hotplug-memory.c	2010-09-21 12:38:31.000000000 -0500
@@ -17,6 +17,54 @@
 #include <asm/pSeries_reconfig.h>
 #include <asm/sparsemem.h>
 
+static u32 get_memblock_size(void)
+{
+	struct device_node *np;
+	unsigned int memblock_size = 0;
+
+	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (np) {
+		const unsigned long *size;
+
+		size = of_get_property(np, "ibm,lmb-size", NULL);
+		memblock_size = size ? *size : 0;
+
+		of_node_put(np);
+	} else {
+		unsigned int memzero_size = 0;
+		const unsigned int *regs;
+
+		np = of_find_node_by_path("/memory@0");
+		if (np) {
+			regs = of_get_property(np, "reg", NULL);
+			memzero_size = regs ? regs[3] : 0;
+			of_node_put(np);
+		}
+
+		if (memzero_size) {
+			/* We now know the size of memory@0, use this to find
+			 * the first memoryblock and get its size.
+			 */
+			char buf[64];
+
+			sprintf(buf, "/memory@%x", memzero_size);
+			np = of_find_node_by_path(buf);
+			if (np) {
+				regs = of_get_property(np, "reg", NULL);
+				memblock_size = regs ? regs[3] : 0;
+				of_node_put(np);
+			}
+		}
+	}
+
+	return memblock_size;
+}
+
+u32 memory_block_size_bytes(void)
+{
+	return get_memblock_size();
+}
+
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
 	unsigned long start, start_pfn;
@@ -127,30 +175,22 @@ static int pseries_add_memory(struct dev
 
 static int pseries_drconf_memory(unsigned long *base, unsigned int action)
 {
-	struct device_node *np;
-	const unsigned long *lmb_size;
+	unsigned long memblock_size;
 	int rc;
 
-	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-	if (!np)
+	memblock_size = get_memblock_size();
+	if (!memblock_size)
 		return -EINVAL;
 
-	lmb_size = of_get_property(np, "ibm,lmb-size", NULL);
-	if (!lmb_size) {
-		of_node_put(np);
-		return -EINVAL;
-	}
-
 	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = memblock_add(*base, *lmb_size);
+		rc = memblock_add(*base, memblock_size);
 		rc = (rc < 0) ? -EINVAL : 0;
 	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_memblock(*base, *lmb_size);
+		rc = pseries_remove_memblock(*base, memblock_size);
 	} else {
 		rc = -EINVAL;
 	}
 
-	of_node_put(np);
 	return rc;
 }

^ permalink raw reply

* [PATCH 6/8] Update node sysfs code
From: Nathan Fontenot @ 2010-09-22 14:34 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Update the node sysfs code to be aware of the new capability for a memory
block to span multiple memory sections.  This requires an additional
parameter to unregister_mem_sect_under_nodes so that we know which memory
section of the memory block to unregister.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 drivers/base/memory.c |    4 +++-
 drivers/base/node.c   |   12 ++++++++----
 include/linux/node.h  |    6 ++++--
 3 files changed, 15 insertions(+), 7 deletions(-)

Index: linux-next/drivers/base/node.c
===================================================================
--- linux-next.orig/drivers/base/node.c	2010-09-21 11:59:24.000000000 -0500
+++ linux-next/drivers/base/node.c	2010-09-21 12:38:02.000000000 -0500
@@ -346,8 +346,10 @@ int register_mem_sect_under_node(struct
 		return -EFAULT;
 	if (!node_online(nid))
 		return 0;
-	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
-	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
+
+	sect_start_pfn = section_nr_to_pfn(mem_blk->start_phys_index);
+	sect_end_pfn = section_nr_to_pfn(mem_blk->end_phys_index);
+	sect_end_pfn += PAGES_PER_SECTION - 1;
 	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
 		int page_nid;
 
@@ -371,7 +373,8 @@ int register_mem_sect_under_node(struct
 }
 
 /* unregister memory section under all nodes that it spans */
-int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
+				    unsigned long phys_index)
 {
 	NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
 	unsigned long pfn, sect_start_pfn, sect_end_pfn;
@@ -383,7 +386,8 @@ int unregister_mem_sect_under_nodes(stru
 	if (!unlinked_nodes)
 		return -ENOMEM;
 	nodes_clear(*unlinked_nodes);
-	sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
+
+	sect_start_pfn = section_nr_to_pfn(phys_index);
 	sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
 	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
 		int nid;
Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-21 12:37:30.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-09-21 12:38:02.000000000 -0500
@@ -555,9 +555,9 @@ int remove_memory_block(unsigned long no
 
 	mutex_lock(&mem_sysfs_mutex);
 	mem = find_memory_block(section);
+	unregister_mem_sect_under_nodes(mem, __section_nr(section));
 
 	if (atomic_dec_and_test(&mem->section_count)) {
-		unregister_mem_sect_under_nodes(mem);
 		mem_remove_simple_file(mem, phys_index);
 		mem_remove_simple_file(mem, end_phys_index);
 		mem_remove_simple_file(mem, state);
@@ -631,6 +631,7 @@ int __init memory_dev_init(void)
 	 * Create entries for memory sections that were found
 	 * during boot and have been initialized
 	 */
+	printk(KERN_ERR "Memory Start\n");
 	for (i = 0; i < NR_MEM_SECTIONS; i++) {
 		if (!present_section_nr(i))
 			continue;
@@ -639,6 +640,7 @@ int __init memory_dev_init(void)
 		if (!ret)
 			ret = err;
 	}
+	printk(KERN_ERR "Memory End\n");
 
 	err = memory_probe_init();
 	if (!ret)
Index: linux-next/include/linux/node.h
===================================================================
--- linux-next.orig/include/linux/node.h	2010-09-21 11:59:28.000000000 -0500
+++ linux-next/include/linux/node.h	2010-09-21 12:38:02.000000000 -0500
@@ -44,7 +44,8 @@ extern int register_cpu_under_node(unsig
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int register_mem_sect_under_node(struct memory_block *mem_blk,
 						int nid);
-extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk);
+extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
+					   unsigned long phys_index);
 
 #ifdef CONFIG_HUGETLBFS
 extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
@@ -72,7 +73,8 @@ static inline int register_mem_sect_unde
 {
 	return 0;
 }
-static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
+static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
+						  unsigned long phys_index)
 {
 	return 0;
 }

^ permalink raw reply

* [PATCH 5/8] Allow a memory block to span multiple memory sections
From: Nathan Fontenot @ 2010-09-22 14:33 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Update the memory sysfs code such that each sysfs memory directory is now
considered a memory block that can span multiple memory sections per
memory block.  The default size of each memory block is SECTION_SIZE_BITS
to maintain the current behavior of having a single memory section per
memory block (i.e. one sysfs directory per memory section).

For architectures that want to have memory blocks span multiple
memory sections they need only define their own memory_block_size_bytes()
routine.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 drivers/base/memory.c |  148 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 103 insertions(+), 45 deletions(-)

Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-21 12:37:03.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-09-21 12:37:30.000000000 -0500
@@ -30,6 +30,14 @@
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
 #define MEMORY_CLASS_NAME	"memory"
+#define MIN_MEMORY_BLOCK_SIZE	(1 << SECTION_SIZE_BITS)
+
+static int sections_per_block;
+
+static inline int base_memory_block_id(int section_nr)
+{
+	return (section_nr / sections_per_block) * sections_per_block;
+}
 
 static struct sysdev_class memory_sysdev_class = {
 	.name = MEMORY_CLASS_NAME,
@@ -84,22 +92,21 @@ EXPORT_SYMBOL(unregister_memory_isolate_
  * register_memory - Setup a sysfs device for a memory block
  */
 static
-int register_memory(struct memory_block *memory, struct mem_section *section)
+int register_memory(struct memory_block *memory)
 {
 	int error;
 
 	memory->sysdev.cls = &memory_sysdev_class;
-	memory->sysdev.id = __section_nr(section);
+	memory->sysdev.id = memory->start_phys_index;
 
 	error = sysdev_register(&memory->sysdev);
 	return error;
 }
 
 static void
-unregister_memory(struct memory_block *memory, struct mem_section *section)
+unregister_memory(struct memory_block *memory)
 {
 	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
-	BUG_ON(memory->sysdev.id != __section_nr(section));
 
 	/* drop the ref. we got in remove_memory_block() */
 	kobject_put(&memory->sysdev.kobj);
@@ -133,13 +140,16 @@ static ssize_t show_mem_end_phys_index(s
 static ssize_t show_mem_removable(struct sys_device *dev,
 			struct sysdev_attribute *attr, char *buf)
 {
-	unsigned long start_pfn;
-	int ret;
+	unsigned long i, pfn;
+	int ret = 1;
 	struct memory_block *mem =
 		container_of(dev, struct memory_block, sysdev);
 
-	start_pfn = section_nr_to_pfn(mem->start_phys_index);
-	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+	for (i = mem->start_phys_index; i <= mem->end_phys_index; i++) {
+		pfn = section_nr_to_pfn(i);
+		ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+	}
+
 	return sprintf(buf, "%d\n", ret);
 }
 
@@ -192,17 +202,14 @@ int memory_isolate_notify(unsigned long
  * OK to have direct references to sparsemem variables in here.
  */
 static int
-memory_block_action(struct memory_block *mem, unsigned long action)
+memory_section_action(unsigned long phys_index, unsigned long action)
 {
 	int i;
-	unsigned long psection;
 	unsigned long start_pfn, start_paddr;
 	struct page *first_page;
 	int ret;
-	int old_state = mem->state;
 
-	psection = mem->start_phys_index;
-	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
+	first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
 
 	/*
 	 * The probe routines leave the pages reserved, just
@@ -215,8 +222,8 @@ memory_block_action(struct memory_block
 				continue;
 
 			printk(KERN_WARNING "section number %ld page number %d "
-				"not reserved, was it already online? \n",
-				psection, i);
+				"not reserved, was it already online?\n",
+				phys_index, i);
 			return -EBUSY;
 		}
 	}
@@ -227,18 +234,13 @@ memory_block_action(struct memory_block
 			ret = online_pages(start_pfn, PAGES_PER_SECTION);
 			break;
 		case MEM_OFFLINE:
-			mem->state = MEM_GOING_OFFLINE;
 			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
 			ret = remove_memory(start_paddr,
 					    PAGES_PER_SECTION << PAGE_SHIFT);
-			if (ret) {
-				mem->state = old_state;
-				break;
-			}
 			break;
 		default:
-			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
-					__func__, mem, action, action);
+			WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
+			     "%ld\n", __func__, phys_index, action, action);
 			ret = -EINVAL;
 	}
 
@@ -248,7 +250,7 @@ memory_block_action(struct memory_block
 static int memory_block_change_state(struct memory_block *mem,
 		unsigned long to_state, unsigned long from_state_req)
 {
-	int ret = 0;
+	int i, ret = 0;
 	mutex_lock(&mem->state_mutex);
 
 	if (mem->state != from_state_req) {
@@ -256,8 +258,21 @@ static int memory_block_change_state(str
 		goto out;
 	}
 
-	ret = memory_block_action(mem, to_state);
-	if (!ret)
+	if (to_state == MEM_OFFLINE)
+		mem->state = MEM_GOING_OFFLINE;
+
+	for (i = mem->start_phys_index; i <= mem->end_phys_index; i++) {
+		ret = memory_section_action(i, to_state);
+		if (ret)
+			break;
+	}
+
+	if (ret) {
+		for (i = mem->start_phys_index; i <= mem->end_phys_index; i++)
+			memory_section_action(i, from_state_req);
+
+		mem->state = from_state_req;
+	} else
 		mem->state = to_state;
 
 out:
@@ -270,20 +285,15 @@ store_mem_state(struct sys_device *dev,
 		struct sysdev_attribute *attr, const char *buf, size_t count)
 {
 	struct memory_block *mem;
-	unsigned int phys_section_nr;
 	int ret = -EINVAL;
 
 	mem = container_of(dev, struct memory_block, sysdev);
-	phys_section_nr = mem->start_phys_index;
-
-	if (!present_section_nr(phys_section_nr))
-		goto out;
 
 	if (!strncmp(buf, "online", min((int)count, 6)))
 		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 	else if(!strncmp(buf, "offline", min((int)count, 7)))
 		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
-out:
+
 	if (ret)
 		return ret;
 	return count;
@@ -460,12 +470,13 @@ struct memory_block *find_memory_block(s
 	struct sys_device *sysdev;
 	struct memory_block *mem;
 	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+	int block_id = base_memory_block_id(__section_nr(section));
 
 	/*
 	 * This only works because we know that section == sysdev->id
 	 * slightly redundant with sysdev_register()
 	 */
-	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
+	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
 
 	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
 	if (!kobj)
@@ -477,26 +488,26 @@ struct memory_block *find_memory_block(s
 	return mem;
 }
 
-static int add_memory_block(int nid, struct mem_section *section,
-			unsigned long state, enum mem_add_context context)
+static int init_memory_block(struct memory_block **memory,
+			     struct mem_section *section, unsigned long state)
 {
-	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	struct memory_block *mem;
 	unsigned long start_pfn;
 	int ret = 0;
 
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
-	mutex_lock(&mem_sysfs_mutex);
-
-	mem->start_phys_index = __section_nr(section);
+	mem->start_phys_index = base_memory_block_id(__section_nr(section));
+	mem->end_phys_index = mem->start_phys_index + sections_per_block - 1;
 	mem->state = state;
 	atomic_inc(&mem->section_count);
 	mutex_init(&mem->state_mutex);
 	start_pfn = section_nr_to_pfn(mem->start_phys_index);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
-	ret = register_memory(mem, section);
+	ret = register_memory(mem);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_index);
 	if (!ret)
@@ -507,8 +518,29 @@ static int add_memory_block(int nid, str
 		ret = mem_create_simple_file(mem, phys_device);
 	if (!ret)
 		ret = mem_create_simple_file(mem, removable);
+
+	*memory = mem;
+	return ret;
+}
+
+static int add_memory_section(int nid, struct mem_section *section,
+			unsigned long state, enum mem_add_context context)
+{
+	struct memory_block *mem;
+	int ret = 0;
+
+	mutex_lock(&mem_sysfs_mutex);
+
+	mem = find_memory_block(section);
+	if (mem) {
+		atomic_inc(&mem->section_count);
+		kobject_put(&mem->sysdev.kobj);
+	} else
+		ret = init_memory_block(&mem, section, state);
+
 	if (!ret) {
-		if (context == HOTPLUG)
+		if (context == HOTPLUG &&
+		    atomic_read(&mem->section_count) == sections_per_block)
 			ret = register_mem_sect_under_node(mem, nid);
 	}
 
@@ -531,8 +563,10 @@ int remove_memory_block(unsigned long no
 		mem_remove_simple_file(mem, state);
 		mem_remove_simple_file(mem, phys_device);
 		mem_remove_simple_file(mem, removable);
-		unregister_memory(mem, section);
-	}
+		unregister_memory(mem);
+		kfree(mem);
+	} else
+		kobject_put(&mem->sysdev.kobj);
 
 	mutex_unlock(&mem_sysfs_mutex);
 	return 0;
@@ -544,7 +578,7 @@ int remove_memory_block(unsigned long no
  */
 int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
+	return add_memory_section(nid, section, MEM_OFFLINE, HOTPLUG);
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -555,6 +589,26 @@ int unregister_memory_section(struct mem
 	return remove_memory_block(0, section, 0);
 }
 
+u32 __weak memory_block_size_bytes(void)
+{
+	return MIN_MEMORY_BLOCK_SIZE;
+}
+
+static u32 get_memory_block_size(void)
+{
+	u32 block_sz;
+
+	block_sz = memory_block_size_bytes();
+
+	/* Validate blk_sz is a power of 2 and not less than section size */
+	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
+		WARN_ON(1);
+		block_sz = MIN_MEMORY_BLOCK_SIZE;
+	}
+
+	return block_sz;
+}
+
 /*
  * Initialize the sysfs support for memory devices...
  */
@@ -563,12 +617,16 @@ int __init memory_dev_init(void)
 	unsigned int i;
 	int ret;
 	int err;
+	int block_sz;
 
 	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
 	ret = sysdev_class_register(&memory_sysdev_class);
 	if (ret)
 		goto out;
 
+	block_sz = get_memory_block_size();
+	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
 	/*
 	 * Create entries for memory sections that were found
 	 * during boot and have been initialized
@@ -576,8 +634,8 @@ int __init memory_dev_init(void)
 	for (i = 0; i < NR_MEM_SECTIONS; i++) {
 		if (!present_section_nr(i))
 			continue;
-		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
-				       BOOT);
+		err = add_memory_section(0, __nr_to_section(i), MEM_ONLINE,
+					 BOOT);
 		if (!ret)
 			ret = err;
 	}

^ permalink raw reply

* [PATCH 4/8] Add mutex for adding/removing memory blocks
From: Nathan Fontenot @ 2010-09-22 14:32 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Add a new mutex for use in adding and removing of memory blocks.  This
is needed to avoid any race conditions in which the same memory block could
be added and removed at the same time.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 drivers/base/memory.c |    7 +++++++
 1 file changed, 7 insertions(+)

Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-21 12:36:45.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-09-21 12:37:03.000000000 -0500
@@ -27,6 +27,8 @@
 #include <asm/atomic.h>
 #include <asm/uaccess.h>
 
+static DEFINE_MUTEX(mem_sysfs_mutex);
+
 #define MEMORY_CLASS_NAME	"memory"
 
 static struct sysdev_class memory_sysdev_class = {
@@ -485,6 +487,8 @@ static int add_memory_block(int nid, str
 	if (!mem)
 		return -ENOMEM;
 
+	mutex_lock(&mem_sysfs_mutex);
+
 	mem->start_phys_index = __section_nr(section);
 	mem->state = state;
 	atomic_inc(&mem->section_count);
@@ -508,6 +512,7 @@ static int add_memory_block(int nid, str
 			ret = register_mem_sect_under_node(mem, nid);
 	}
 
+	mutex_unlock(&mem_sysfs_mutex);
 	return ret;
 }
 
@@ -516,6 +521,7 @@ int remove_memory_block(unsigned long no
 {
 	struct memory_block *mem;
 
+	mutex_lock(&mem_sysfs_mutex);
 	mem = find_memory_block(section);
 
 	if (atomic_dec_and_test(&mem->section_count)) {
@@ -528,6 +534,7 @@ int remove_memory_block(unsigned long no
 		unregister_memory(mem, section);
 	}
 
+	mutex_unlock(&mem_sysfs_mutex);
 	return 0;
 }

^ permalink raw reply

* [PATCH 3/8] Add section count to memory_block struct
From: Nathan Fontenot @ 2010-09-22 14:30 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Add a section count property to the memory_block struct to track the number
of memory sections that have been added/removed from a memory block. This
allows us to know when the last memory section of a memory block has been
removed so we can remove the memory block.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 drivers/base/memory.c  |   18 +++++++++++-------
 include/linux/memory.h |    2 ++
 2 files changed, 13 insertions(+), 7 deletions(-)

Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-21 12:36:03.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-09-21 12:36:45.000000000 -0500
@@ -487,6 +487,7 @@ static int add_memory_block(int nid, str
 
 	mem->start_phys_index = __section_nr(section);
 	mem->state = state;
+	atomic_inc(&mem->section_count);
 	mutex_init(&mem->state_mutex);
 	start_pfn = section_nr_to_pfn(mem->start_phys_index);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
@@ -516,13 +517,16 @@ int remove_memory_block(unsigned long no
 	struct memory_block *mem;
 
 	mem = find_memory_block(section);
-	unregister_mem_sect_under_nodes(mem);
-	mem_remove_simple_file(mem, phys_index);
-	mem_remove_simple_file(mem, end_phys_index);
-	mem_remove_simple_file(mem, state);
-	mem_remove_simple_file(mem, phys_device);
-	mem_remove_simple_file(mem, removable);
-	unregister_memory(mem, section);
+
+	if (atomic_dec_and_test(&mem->section_count)) {
+		unregister_mem_sect_under_nodes(mem);
+		mem_remove_simple_file(mem, phys_index);
+		mem_remove_simple_file(mem, end_phys_index);
+		mem_remove_simple_file(mem, state);
+		mem_remove_simple_file(mem, phys_device);
+		mem_remove_simple_file(mem, removable);
+		unregister_memory(mem, section);
+	}
 
 	return 0;
 }
Index: linux-next/include/linux/memory.h
===================================================================
--- linux-next.orig/include/linux/memory.h	2010-09-21 12:34:04.000000000 -0500
+++ linux-next/include/linux/memory.h	2010-09-21 12:36:45.000000000 -0500
@@ -19,11 +19,13 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/mutex.h>
+#include <asm/atomic.h>
 
 struct memory_block {
 	unsigned long start_phys_index;
 	unsigned long end_phys_index;
 	unsigned long state;
+	atomic_t section_count;
 	/*
 	 * This serializes all state change requests.  It isn't
 	 * held during creation because the control files are

^ permalink raw reply

* [PATCH 2/8] Update memory block struct to have start and end phys index
From: Nathan Fontenot @ 2010-09-22 14:29 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Update the 'phys_index' properties of a memory block to include a
'start_phys_index' which is the same as the current 'phys_index' property.
The property still appears as 'phys_index' in sysfs but the memory_block
struct name is updated to indicate the start and end values.
This also adds an 'end_phys_index' property to indicate the id of the
last section in th memory block.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 drivers/base/memory.c  |   28 ++++++++++++++++++++--------
 include/linux/memory.h |    3 ++-
 2 files changed, 22 insertions(+), 9 deletions(-)

Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-21 12:32:45.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-09-21 12:34:04.000000000 -0500
@@ -109,12 +109,20 @@ unregister_memory(struct memory_block *m
  * uses.
  */
 
-static ssize_t show_mem_phys_index(struct sys_device *dev,
+static ssize_t show_mem_start_phys_index(struct sys_device *dev,
 			struct sysdev_attribute *attr, char *buf)
 {
 	struct memory_block *mem =
 		container_of(dev, struct memory_block, sysdev);
-	return sprintf(buf, "%08lx\n", mem->phys_index);
+	return sprintf(buf, "%08lx\n", mem->start_phys_index);
+}
+
+static ssize_t show_mem_end_phys_index(struct sys_device *dev,
+			struct sysdev_attribute *attr, char *buf)
+{
+	struct memory_block *mem =
+		container_of(dev, struct memory_block, sysdev);
+	return sprintf(buf, "%08lx\n", mem->end_phys_index);
 }
 
 /*
@@ -128,7 +136,7 @@ static ssize_t show_mem_removable(struct
 	struct memory_block *mem =
 		container_of(dev, struct memory_block, sysdev);
 
-	start_pfn = section_nr_to_pfn(mem->phys_index);
+	start_pfn = section_nr_to_pfn(mem->start_phys_index);
 	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
 	return sprintf(buf, "%d\n", ret);
 }
@@ -191,7 +199,7 @@ memory_block_action(struct memory_block
 	int ret;
 	int old_state = mem->state;
 
-	psection = mem->phys_index;
+	psection = mem->start_phys_index;
 	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
 
 	/*
@@ -264,7 +272,7 @@ store_mem_state(struct sys_device *dev,
 	int ret = -EINVAL;
 
 	mem = container_of(dev, struct memory_block, sysdev);
-	phys_section_nr = mem->phys_index;
+	phys_section_nr = mem->start_phys_index;
 
 	if (!present_section_nr(phys_section_nr))
 		goto out;
@@ -296,7 +304,8 @@ static ssize_t show_phys_device(struct s
 	return sprintf(buf, "%d\n", mem->phys_device);
 }
 
-static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
+static SYSDEV_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
+static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
 static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
 static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
 static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
@@ -476,16 +485,18 @@ static int add_memory_block(int nid, str
 	if (!mem)
 		return -ENOMEM;
 
-	mem->phys_index = __section_nr(section);
+	mem->start_phys_index = __section_nr(section);
 	mem->state = state;
 	mutex_init(&mem->state_mutex);
-	start_pfn = section_nr_to_pfn(mem->phys_index);
+	start_pfn = section_nr_to_pfn(mem->start_phys_index);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
 	ret = register_memory(mem, section);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_index);
 	if (!ret)
+		ret = mem_create_simple_file(mem, end_phys_index);
+	if (!ret)
 		ret = mem_create_simple_file(mem, state);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_device);
@@ -507,6 +518,7 @@ int remove_memory_block(unsigned long no
 	mem = find_memory_block(section);
 	unregister_mem_sect_under_nodes(mem);
 	mem_remove_simple_file(mem, phys_index);
+	mem_remove_simple_file(mem, end_phys_index);
 	mem_remove_simple_file(mem, state);
 	mem_remove_simple_file(mem, phys_device);
 	mem_remove_simple_file(mem, removable);
Index: linux-next/include/linux/memory.h
===================================================================
--- linux-next.orig/include/linux/memory.h	2010-09-21 11:59:28.000000000 -0500
+++ linux-next/include/linux/memory.h	2010-09-21 12:34:04.000000000 -0500
@@ -21,7 +21,8 @@
 #include <linux/mutex.h>
 
 struct memory_block {
-	unsigned long phys_index;
+	unsigned long start_phys_index;
+	unsigned long end_phys_index;
 	unsigned long state;
 	/*
 	 * This serializes all state change requests.  It isn't

^ permalink raw reply

* [PATCH 1/8] Move find_memory_block() routine
From: Nathan Fontenot @ 2010-09-22 14:28 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4C9A0F8F.2030409@austin.ibm.com>

Move the find_memory_block() routine up to avoid needing a forward
declaration in subsequent patches.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

---
 drivers/base/memory.c |   62 +++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-21 11:59:24.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-09-21 12:32:45.000000000 -0500
@@ -435,6 +435,37 @@ int __weak arch_get_memory_phys_device(u
 	return 0;
 }
 
+/*
+ * For now, we have a linear search to go find the appropriate
+ * memory_block corresponding to a particular phys_index. If
+ * this gets to be a real problem, we can always use a radix
+ * tree or something here.
+ *
+ * This could be made generic for all sysdev classes.
+ */
+struct memory_block *find_memory_block(struct mem_section *section)
+{
+	struct kobject *kobj;
+	struct sys_device *sysdev;
+	struct memory_block *mem;
+	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+
+	/*
+	 * This only works because we know that section == sysdev->id
+	 * slightly redundant with sysdev_register()
+	 */
+	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
+
+	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
+	if (!kobj)
+		return NULL;
+
+	sysdev = container_of(kobj, struct sys_device, kobj);
+	mem = container_of(sysdev, struct memory_block, sysdev);
+
+	return mem;
+}
+
 static int add_memory_block(int nid, struct mem_section *section,
 			unsigned long state, enum mem_add_context context)
 {
@@ -468,37 +499,6 @@ static int add_memory_block(int nid, str
 	return ret;
 }
 
-/*
- * For now, we have a linear search to go find the appropriate
- * memory_block corresponding to a particular phys_index. If
- * this gets to be a real problem, we can always use a radix
- * tree or something here.
- *
- * This could be made generic for all sysdev classes.
- */
-struct memory_block *find_memory_block(struct mem_section *section)
-{
-	struct kobject *kobj;
-	struct sys_device *sysdev;
-	struct memory_block *mem;
-	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
-
-	/*
-	 * This only works because we know that section == sysdev->id
-	 * slightly redundant with sysdev_register()
-	 */
-	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
-
-	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
-	if (!kobj)
-		return NULL;
-
-	sysdev = container_of(kobj, struct sys_device, kobj);
-	mem = container_of(sysdev, struct memory_block, sysdev);
-
-	return mem;
-}
-
 int remove_memory_block(unsigned long node_id, struct mem_section *section,
 		int phys_device)
 {

^ permalink raw reply

* RE: [U-Boot] cuImage and multi image?
From: Chen, Tiejun @ 2010-09-22 14:28 UTC (permalink / raw)
  To: Shawn Jin; +Cc: Scott Wood, ppcdev, uboot
In-Reply-To: <AANLkTi=kySFBW8Lwt_hoOZMObGc=kFom22GjbwoccQR-@mail.gmail.com>

> -----Original Message-----
> From: Shawn Jin [mailto:shawnxjin@gmail.com]=20
> Sent: Wednesday, September 22, 2010 1:43 PM
> To: Chen, Tiejun
> Cc: Scott Wood; ppcdev; uboot
> Subject: Re: [U-Boot] cuImage and multi image?
>=20
> >> > > A follow up question. With this method, the total image size
> >> > > (uncompressed) is limited to the 4MB (the link address of
> >> the boot
> >> > > wrapper)?
> >> >
> >> > No.
> >>
> >> Yes, unless you change the link address, or provide a=20
> vmlinux_alloc=20
> >> callback (which currently only happens on true OF, not cuImage).
> >>
> >> Unless you're talking about the "(uncompressed)"? =A0The=20
> limit applies=20
> >> to the uncompressed boot image -- anything that the bootwrapper=20
> >> itself is decompressing. =A0It does not apply to any further=20
> >> uncompression of the ramdisk itself.
> >>
> >
> > He should point the latter, "the total image size",=20
> including ramdisk.
> > But the link address should be limited for the boot Image,=20
> not for the=20
> > attached ramdisk.
>=20
> Thanks for the clarification. Scott, so what are the things=20
> that the bootwrapper is decompressing? The kernel for sure.=20
> And anything else?

I think you can read the file, arch/powerpc/boot/zImage.lds.S, to =
understand this :)

> I understand that the ramdisk image will be decompressed=20
> later when trying to boot it.
>=20
> I have a large ramdisk image. The size of the image itself (i.e. the
> *.gz) is about 4MB. When the ramdisk was being decompressed=20

Did you try to change link_address on the file, =
arch/powerpc/boot/wrapper?

> in the later stage. It had the following errors. It seems to=20
> me that the ramdisk image somehow was corrupted. Did the=20
> script wrapper mess up the image? Is there any known bug in=20
> the wrapper? My kernel is 2.6.33.

I tried this mechanism on v2.6.34 and it's fine.

> BTW the ramdisk image can be mounted properly if it's=20
> separated from the kernel image.
>=20

Did you try boot the uImage and the ramdisk separately? For example, you =
can boot this as the following command:
# bootm ${kernel_addr} ${ramdisk_addr} ${fdt_addr}

Please use this to check if your ramdisk is valid.

Can you paste the whole log from the u-boot prompt?

Tiejun

> RAMDISK: gzip image found at block 0
> uncompression error
> VFS: Mounted root (ext2 filesystem) readonly on device 1:0.
> Freeing unused kernel memory: 104k init
> EXT2-fs (ram0): error: ext2_check_page: bad entry in directory #336: :
> rec_len is smaller than minimal - offset=3D0, inode=3D0,=20
> rec_len=3D0, name_len=3D0 EXT2-fs (ram0): error: remounting=20
> filesystem read-only Kernel panic - not syncing: No init=20
> found.  Try passing init=3D option to kernel.
> Call Trace:
> [c7821f30] [c0006cd8] show_stack+0x40/0x168 (unreliable)=20
> [c7821f70] [c001cefc] panic+0x8c/0x178 [c7821fc0] [c00026d4]=20
> init_post+0xe4/0xf4 [c7821fd0] [c01ee224]=20
> kernel_init+0x108/0x130 [c7821ff0] [c000dcc0]=20
> kernel_thread+0x4c/0x68 Rebooting in 180 seconds..
>=20
> Thanks,
> -Shawn.
>=20

^ permalink raw reply

* RE: external interrupt mapping with FPGA on loacl bus
From: Chen, Tiejun @ 2010-09-22 14:24 UTC (permalink / raw)
  To: deebul nair, linuxppc-dev
In-Reply-To: <AANLkTikkM0HJHdTd5j9bRaW9uN8OsJcN9O99GAHtPQ18@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 4103 bytes --]

Firstly you should make sure which interrupt level is used to trigger
the external interrupt. High level, low level, rising edge or falling
edge? 
------
 0 = low to high edge sensitive type enabled
 1 = active low level sensitive type enabled
 2 = active high level sensitive type enabled
 3 = high to low edge sensitive type enable  
 
Then set the appropriate sense and level for your external interrupt.
And you have to ensure the FPGA issue the real interrupt. Maybe you can
use the hardware tool to probe if there is a interrupt signal on
external interrupt pin.
 
Additionally I think your 'reg' property is wrong unless you have
abnormal way to parse that :)
 
Cheers
Tiejun
 


________________________________

	From:
linuxppc-dev-bounces+tiejun.chen=windriver.com@lists.ozlabs.org
[mailto:linuxppc-dev-bounces+tiejun.chen=windriver.com@lists.ozlabs.org]
On Behalf Of deebul nair
	Sent: Wednesday, September 22, 2010 4:15 PM
	To: linuxppc-dev@lists.ozlabs.org
	Subject: external interrupt mapping with FPGA on loacl bus
	
	
	 
	Hi
	 
	i have a FPGA called mcmc which is connected to the powerpc
mpc8572 on the local bus. It provides 2 interrupts which are connected
to the
	external interupts IRQ1 and IRQ3 of the mpc8572.
	the fpga is memory mapped on the local bus at address 0xc0000000

	I am able to register the interrupt and obtain virq , but no
interrupts.
	is the modifications made in the dts file enought/right for
getting an external interrupt??
	 
	 
	The modified dts file for enrm8572
	 
	localbus@e0005000 {
	 

		#address-cells = <2>;
		 
		#size-cells = <1>;
		 
		compatible = "fsl,mpc8572-elbc", "fsl,elbc",
"simple-bus";
		 
		reg = <0xe0005000 0x1000>;
		 
		interrupts = <19 2>;
		 
		interrupt-parent = <&mpic>;
		 
		ranges = <0x0 0x0 0xfc000000 0x04000000
		 
		0x1 0x0 0xc0000000 0x00080000
		 
		0x2 0x0 0x90100000 0x00008000
		 
		0x3 0x0 0x90000000 0x00040000
		 
		0x4 0x0 0x91000000 0x00008000>;
		 
		nand@3,0 {
		 
		#address-cells = <1>;
		 
		#size-cells = <1>;
		 
		compatible = "fsl,mpc8572-fcm-nand",
		 
		"fsl,elbc-fcm-nand";
		 
		reg = <0x3 0x0 0x40000>;
		 
		u-boot@0 {
		 
		reg = <0x0 0x02000000>;
		 
		read-only;
		 
		};
		 
		jffs2@2000000 {
		 
		reg = <0x02000000 0x10000000>;
		 
		};
		 
		ramdisk@12000000 {
		 
		reg = <0x12000000 0x08000000>;
		 
		read-only;
		 
		};
		 
		kernel@1a000000 {
		 
		reg = <0x1a000000 0x04000000>;
		 
		};
		 
		dtb@1e000000 {
		 
		reg = <0x1e000000 0x01000000>;
		 
		read-only;
		 
		};
		 
		empty@1f000000 {
		 
		reg = <0x1f000000 0x21000000>;
		 
		};
		 
		};
		 
		 
		 
		mcmc@1,0 {
		 
		compatible = "fsl,fpga-pixis";
		 
		reg = <1 0 32>;
		 
		interrupts = <1 1 3 1>;
		 
		interrupt-parent = <&mpic>;
		 
		};

	 
	};
	 
	code for registering the interrupt :
	 
	np = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
	 
	mcmc_irq = irq;
	 
	virq = irq_of_parse_and_map(np, mcmc_irq);
	 
	if(virq == NO_IRQ){
	 
	printk(KERN_ERR "mcmc: failed to map interrupt");
	 
	}
	 
	printk(KERN_INFO "mcmc: interrupt mapped to virq %d\n", virq);
	 
	ret = request_irq(virq, mcmc_interrupt, IRQF_DISABLED,"mcmc",
NULL);
	 
	if(ret){
	 
	printk(KERN_INFO "mcmc : cant get assigned irq %i\n errno :
<%d>\n",mcmc_irq,ret);
	 
	}
	 
	else{
	 
	printk(KERN_ALERT "Interrupt requested OK\n");
	 
	}
	 
	d :
	 
	when i register the driver the following output is obtaine
	 
	for irq=1
	 
	irq_of_parse_and_map
	 
	irq : irq_create_mapping(0xc0fffb40,0x1)
	 
	irq : using host @ c0fffb40
	 
	irq : obtained virq 18
	 
	irq : virq=12 <6>mcmc : interrupt mapped to irq 18
	 
	Interrupt requested OK!
	 
	for irq=0
	 
	irq_of_parse_and_map
	 
	irq : irq_create_mapping(0xc0fffb40,0x3)
	 
	irq : using host @ c0fffb40
	 
	irq : obtained virq 20
	 
	irq : virq=14 <6>mcmc : interrupt mapped to irq 20
	 
	Interrupt requested OK!
	 
	 
	 
	 
	 
	--
	Cheeers
	 
	Deebul !!!!!!
	 
	 


[-- Attachment #2: Type: text/html, Size: 15817 bytes --]

^ permalink raw reply

* [PATCH 0/8] De-couple sysfs memory directories from memory sections
From: Nathan Fontenot @ 2010-09-22 14:15 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, KAMEZAWA Hiroyuki, Dave Hansen

This set of patches decouples the concept that a single memory
section corresponds to a single directory in 
/sys/devices/system/memory/.  On systems
with large amounts of memory (1+ TB) there are performance issues
related to creating the large number of sysfs directories.  For
a powerpc machine with 1 TB of memory we are creating 63,000+
directories.  This is resulting in boot times of around 45-50
minutes for systems with 1 TB of memory and 8 hours for systems
with 2 TB of memory.  With this patch set applied I am now seeing
boot times of 5 minutes or less.

The root of this issue is in sysfs directory creation. Every time
a directory is created a string compare is done against all sibling
directories to ensure we do not create duplicates.  The list of
directory nodes in sysfs is kept as an unsorted list which results
in this being an exponentially longer operation as the number of
directories are created.

The solution solved by this patch set is to allow a single
directory in sysfs to span multiple memory sections.  This is
controlled by an optional architecturally defined function
memory_block_size_bytes().  The default definition of this
routine returns a memory block size equal to the memory section
size. This maintains the current layout of sysfs memory
directories as it appears to userspace to remain the same as it
is today.

For architectures that define their own version of this routine,
as is done for powerpc in this patchset, the view in userspace
would change such that each memoryXXX directory would span
multiple memory sections.  The number of sections spanned would
depend on the value reported by memory_block_size_bytes.

In both cases a new file 'end_phys_index' is created in each
memoryXXX directory.  This file will contain the physical id
of the last memory section covered by the sysfs directory.  For
the default case, the value in 'end_phys_index' will be the same
as in the existing 'phys_index' file.

-Nathan Fontenot

^ permalink raw reply

* Re: [PATCH 1/2 v3]Update broken web addresses.
From: Jeremy Kerr @ 2010-09-22 13:06 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: linux-mips, linux-m32r, trivial, Justin P. Mattock, linux-m68k,
	linux-laptop, linux-kernel, Finn Thain, Randy Dunlap,
	Maciej W. Rozycki, patchwork, linux-omap, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20100922105852.GC4710@linux-mips.org>

Hi Ralf,

> On Tue, Sep 21, 2010 at 06:29:16PM -0700, Justin P. Mattock wrote:
> > Date:   Tue, 21 Sep 2010 18:29:16 -0700
> > From: "Justin P. Mattock" <justinmattock@gmail.com>
> > To: trivial@kernel.org
> > Cc: linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org,
> >  linux-omap@vger.kernel.org, linux-m32r@ml.linux-m32r.org,
> >  linux-m68k@lists.linux-m68k.org, linux-mips@linux-mips.org,
> >  linuxppc-dev@lists.ozlabs.org, linux-laptop@vger.kernel.org, "Justin P.
> >  Mattock" <justinmattock@gmail.com>, "Maciej W. Rozycki"
> >  <macro@linux-mips.org>, Finn Thain <fthain@telegraphics.com.au>, Randy
> >  Dunlap <rdunlap@xenotime.net>
> > Subject: [PATCH 1/2 v3]Update broken web addresses.
> > Content-Type: text/plain; charset=UTF-8
> 
> Patchwork MIME butchers the subject of this patch, see
> 
> https://patchwork.linux-mips.org/patch/1587/
> https://patchwork.kernel.org/patch/198382/
> 

Thanks for the heads-up - I'll see what's happening with the header
decoding here.

Cheers,


Jeremy

^ permalink raw reply

* Re: [PATCH 1/2 v3]Update broken web addresses.
From: Ralf Baechle @ 2010-09-22 10:58 UTC (permalink / raw)
  To: patchwork
  Cc: linux-mips, Randy Dunlap, linux-m32r, trivial, Maciej W. Rozycki,
	linux-laptop, linux-kernel, Finn Thain, linux-m68k,
	Justin P. Mattock, linux-omap, linuxppc-dev, linux-arm-kernel
In-Reply-To: <1285118957-24965-1-git-send-email-justinmattock@gmail.com>

On Tue, Sep 21, 2010 at 06:29:16PM -0700, Justin P. Mattock wrote:
> Date:   Tue, 21 Sep 2010 18:29:16 -0700
> From: "Justin P. Mattock" <justinmattock@gmail.com>
> To: trivial@kernel.org
> Cc: linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org,
>  linux-omap@vger.kernel.org, linux-m32r@ml.linux-m32r.org,
>  linux-m68k@lists.linux-m68k.org, linux-mips@linux-mips.org,
>  linuxppc-dev@lists.ozlabs.org, linux-laptop@vger.kernel.org, "Justin P.
>  Mattock" <justinmattock@gmail.com>, "Maciej W. Rozycki"
>  <macro@linux-mips.org>, Finn Thain <fthain@telegraphics.com.au>, Randy
>  Dunlap <rdunlap@xenotime.net>
> Subject: [PATCH 1/2 v3]Update broken web addresses.
> Content-Type: text/plain; charset=UTF-8

Patchwork MIME butchers the subject of this patch, see

https://patchwork.linux-mips.org/patch/1587/
https://patchwork.kernel.org/patch/198382/

  Ralf

^ permalink raw reply

* Modifying mpc8308rdb.dts
From: Maria Johansen @ 2010-09-22 10:32 UTC (permalink / raw)
  To: linuxppc-dev

Hi,

I am working on an MPC8308rdb, and have needed to add support for spi
and an lm75 thermal sensor on the i2c-bus. Is it possible to upload my
changes to the "official" mpc8308rdb.dts, or should I just keep
modifying it locally and hope the maintainers of the file will add
support for more devices soon?

I apologize if this may seem like a silly question, but this is the
first time I feel the need to add something into the official kernel :)=20

--=20
Regards,
Maria

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox