LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 8/9] v3 Define memory_block_size_bytes for x86_64 with CONFIG_X86_UV set
From: Robin Holt @ 2010-10-01 18:57 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Greg KH, steiner, linux-kernel, Dave Hansen, linux-mm, Robin Holt,
	linuxppc-dev, KAMEZAWA Hiroyuki
In-Reply-To: <4CA62A51.70807@austin.ibm.com>

On Fri, Oct 01, 2010 at 01:37:05PM -0500, Nathan Fontenot wrote:
> Define a version of memory_block_size_bytes for x86_64 when CONFIG_X86_UV is
> set.
> 
> Signed-off-by: Robin Holt <holt@sgi.com>
> Signed-off-by: Jack Steiner <steiner@sgi.com>

I think this technically needs a Signed-off-by: <you> since you
are passing it upstream.

> 
> ---
>  arch/x86/mm/init_64.c |   14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> Index: linux-next/arch/x86/mm/init_64.c
> ===================================================================
> --- linux-next.orig/arch/x86/mm/init_64.c	2010-09-29 14:56:25.000000000 -0500
> +++ linux-next/arch/x86/mm/init_64.c	2010-10-01 13:00:50.000000000 -0500
> @@ -51,6 +51,7 @@
>  #include <asm/numa.h>
>  #include <asm/cacheflush.h>
>  #include <asm/init.h>
> +#include <asm/uv/uv.h>
>  #include <linux/bootmem.h>
>  
>  static int __init parse_direct_gbpages_off(char *arg)
> @@ -902,6 +903,19 @@
>  	return NULL;
>  }
>  
> +#ifdef CONFIG_X86_UV
> +#define MIN_MEMORY_BLOCK_SIZE   (1 << SECTION_SIZE_BITS)
> +
> +unsigned long memory_block_size_bytes(void)
> +{
> +	if (is_uv_system()) {
> +		printk(KERN_INFO "UV: memory block size 2GB\n");
> +		return 2UL * 1024 * 1024 * 1024;
> +	}
> +	return MIN_MEMORY_BLOCK_SIZE;
> +}
> +#endif
> +
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP
>  /*
>   * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
> 

^ permalink raw reply

* Re: [PATCH 9/9] v3 Update memory hotplug documentation
From: Robin Holt @ 2010-10-01 18:58 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Greg KH, steiner, linux-kernel, Dave Hansen, linux-mm, Robin Holt,
	linuxppc-dev, KAMEZAWA Hiroyuki
In-Reply-To: <4CA62A7D.8030905@austin.ibm.com>

On Fri, Oct 01, 2010 at 01:37:49PM -0500, Nathan Fontenot wrote:
> Update the memory hotplug documentation to reflect the new behaviors of
> memory blocks reflected in sysfs.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Reviewed-by: Robin Holt <holt@sgi.com>

^ permalink raw reply

* Re: [PATCH 4/9] v3 Allow memory blocks to span multiple memory sections
From: Nathan Fontenot @ 2010-10-01 19:00 UTC (permalink / raw)
  To: linux-kernel, linux-mm, linuxppc-dev
  Cc: Greg KH, steiner, Robin Holt, KAMEZAWA Hiroyuki, Dave Hansen
In-Reply-To: <4CA62917.80008@austin.ibm.com>

Update the memory sysfs code such that each sysfs memory directory is now
considered a memory block that can span multiple memory sections per
memory block.  The default size of each memory block is SECTION_SIZE_BITS
to maintain the current behavior of having a single memory section per
memory block (i.e. one sysfs directory per memory section).

For architectures that want to have memory blocks span multiple
memory sections they need only define their own memory_block_size_bytes()
routine.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Updated patch to correct get_memory_block_size() variable block_sz to be
an unsigned long.

---
 drivers/base/memory.c |  155 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 108 insertions(+), 47 deletions(-)

Index: linux-next/drivers/base/memory.c
===================================================================
--- linux-next.orig/drivers/base/memory.c	2010-09-30 14:13:50.000000000 -0500
+++ linux-next/drivers/base/memory.c	2010-10-01 13:50:19.000000000 -0500
@@ -30,6 +30,14 @@
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
 #define MEMORY_CLASS_NAME	"memory"
+#define MIN_MEMORY_BLOCK_SIZE	(1 << SECTION_SIZE_BITS)
+
+static int sections_per_block;
+
+static inline int base_memory_block_id(int section_nr)
+{
+	return section_nr / sections_per_block;
+}
 
 static struct sysdev_class memory_sysdev_class = {
 	.name = MEMORY_CLASS_NAME,
@@ -84,28 +92,47 @@
  * register_memory - Setup a sysfs device for a memory block
  */
 static
-int register_memory(struct memory_block *memory, struct mem_section *section)
+int register_memory(struct memory_block *memory)
 {
 	int error;
 
 	memory->sysdev.cls = &memory_sysdev_class;
-	memory->sysdev.id = __section_nr(section);
+	memory->sysdev.id = memory->phys_index / sections_per_block;
 
 	error = sysdev_register(&memory->sysdev);
 	return error;
 }
 
 static void
-unregister_memory(struct memory_block *memory, struct mem_section *section)
+unregister_memory(struct memory_block *memory)
 {
 	BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
-	BUG_ON(memory->sysdev.id != __section_nr(section));
 
 	/* drop the ref. we got in remove_memory_block() */
 	kobject_put(&memory->sysdev.kobj);
 	sysdev_unregister(&memory->sysdev);
 }
 
+unsigned long __weak memory_block_size_bytes(void)
+{
+	return MIN_MEMORY_BLOCK_SIZE;
+}
+
+static unsigned long get_memory_block_size(void)
+{
+	unsigned long block_sz;
+
+	block_sz = memory_block_size_bytes();
+
+	/* Validate blk_sz is a power of 2 and not less than section size */
+	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
+		WARN_ON(1);
+		block_sz = MIN_MEMORY_BLOCK_SIZE;
+	}
+
+	return block_sz;
+}
+
 /*
  * use this as the physical section index that this memsection
  * uses.
@@ -116,7 +143,7 @@
 {
 	struct memory_block *mem =
 		container_of(dev, struct memory_block, sysdev);
-	return sprintf(buf, "%08lx\n", mem->phys_index);
+	return sprintf(buf, "%08lx\n", mem->phys_index / sections_per_block);
 }
 
 /*
@@ -125,13 +152,16 @@
 static ssize_t show_mem_removable(struct sys_device *dev,
 			struct sysdev_attribute *attr, char *buf)
 {
-	unsigned long start_pfn;
-	int ret;
+	unsigned long i, pfn;
+	int ret = 1;
 	struct memory_block *mem =
 		container_of(dev, struct memory_block, sysdev);
 
-	start_pfn = section_nr_to_pfn(mem->phys_index);
-	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+	for (i = 0; i < sections_per_block; i++) {
+		pfn = section_nr_to_pfn(mem->phys_index + i);
+		ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+	}
+
 	return sprintf(buf, "%d\n", ret);
 }
 
@@ -184,17 +214,14 @@
  * OK to have direct references to sparsemem variables in here.
  */
 static int
-memory_block_action(struct memory_block *mem, unsigned long action)
+memory_section_action(unsigned long phys_index, unsigned long action)
 {
 	int i;
-	unsigned long psection;
 	unsigned long start_pfn, start_paddr;
 	struct page *first_page;
 	int ret;
-	int old_state = mem->state;
 
-	psection = mem->phys_index;
-	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
+	first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
 
 	/*
 	 * The probe routines leave the pages reserved, just
@@ -207,8 +234,8 @@
 				continue;
 
 			printk(KERN_WARNING "section number %ld page number %d "
-				"not reserved, was it already online? \n",
-				psection, i);
+				"not reserved, was it already online?\n",
+				phys_index, i);
 			return -EBUSY;
 		}
 	}
@@ -219,18 +246,13 @@
 			ret = online_pages(start_pfn, PAGES_PER_SECTION);
 			break;
 		case MEM_OFFLINE:
-			mem->state = MEM_GOING_OFFLINE;
 			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
 			ret = remove_memory(start_paddr,
 					    PAGES_PER_SECTION << PAGE_SHIFT);
-			if (ret) {
-				mem->state = old_state;
-				break;
-			}
 			break;
 		default:
-			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
-					__func__, mem, action, action);
+			WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
+			     "%ld\n", __func__, phys_index, action, action);
 			ret = -EINVAL;
 	}
 
@@ -240,7 +262,8 @@
 static int memory_block_change_state(struct memory_block *mem,
 		unsigned long to_state, unsigned long from_state_req)
 {
-	int ret = 0;
+	int i, ret = 0;
+
 	mutex_lock(&mem->state_mutex);
 
 	if (mem->state != from_state_req) {
@@ -248,8 +271,22 @@
 		goto out;
 	}
 
-	ret = memory_block_action(mem, to_state);
-	if (!ret)
+	if (to_state == MEM_OFFLINE)
+		mem->state = MEM_GOING_OFFLINE;
+
+	for (i = 0; i < sections_per_block; i++) {
+		ret = memory_section_action(mem->phys_index + i, to_state);
+		if (ret)
+			break;
+	}
+
+	if (ret) {
+		for (i = 0; i < sections_per_block; i++)
+			memory_section_action(mem->phys_index + i,
+					      from_state_req);
+
+		mem->state = from_state_req;
+	} else
 		mem->state = to_state;
 
 out:
@@ -262,20 +299,15 @@
 		struct sysdev_attribute *attr, const char *buf, size_t count)
 {
 	struct memory_block *mem;
-	unsigned int phys_section_nr;
 	int ret = -EINVAL;
 
 	mem = container_of(dev, struct memory_block, sysdev);
-	phys_section_nr = mem->phys_index;
-
-	if (!present_section_nr(phys_section_nr))
-		goto out;
 
 	if (!strncmp(buf, "online", min((int)count, 6)))
 		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 	else if(!strncmp(buf, "offline", min((int)count, 7)))
 		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
-out:
+
 	if (ret)
 		return ret;
 	return count;
@@ -315,7 +347,7 @@
 print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
 		 char *buf)
 {
-	return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
+	return sprintf(buf, "%lx\n", get_memory_block_size());
 }
 
 static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
@@ -451,12 +483,13 @@
 	struct sys_device *sysdev;
 	struct memory_block *mem;
 	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
+	int block_id = base_memory_block_id(__section_nr(section));
 
 	/*
 	 * This only works because we know that section == sysdev->id
 	 * slightly redundant with sysdev_register()
 	 */
-	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
+	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
 
 	kobj = kset_find_obj(&memory_sysdev_class.kset, name);
 	if (!kobj)
@@ -468,26 +501,27 @@
 	return mem;
 }
 
-static int add_memory_block(int nid, struct mem_section *section,
-			unsigned long state, enum mem_add_context context)
+static int init_memory_block(struct memory_block **memory,
+			     struct mem_section *section, unsigned long state)
 {
-	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	struct memory_block *mem;
 	unsigned long start_pfn;
+	int scn_nr;
 	int ret = 0;
 
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
-	mutex_lock(&mem_sysfs_mutex);
-
-	mem->phys_index = __section_nr(section);
+	scn_nr = __section_nr(section);
+	mem->phys_index = base_memory_block_id(scn_nr) * sections_per_block;
 	mem->state = state;
 	mem->section_count++;
 	mutex_init(&mem->state_mutex);
 	start_pfn = section_nr_to_pfn(mem->phys_index);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
 
-	ret = register_memory(mem, section);
+	ret = register_memory(mem);
 	if (!ret)
 		ret = mem_create_simple_file(mem, phys_index);
 	if (!ret)
@@ -496,8 +530,29 @@
 		ret = mem_create_simple_file(mem, phys_device);
 	if (!ret)
 		ret = mem_create_simple_file(mem, removable);
+
+	*memory = mem;
+	return ret;
+}
+
+static int add_memory_section(int nid, struct mem_section *section,
+			unsigned long state, enum mem_add_context context)
+{
+	struct memory_block *mem;
+	int ret = 0;
+
+	mutex_lock(&mem_sysfs_mutex);
+
+	mem = find_memory_block(section);
+	if (mem) {
+		mem->section_count++;
+		kobject_put(&mem->sysdev.kobj);
+	} else
+		ret = init_memory_block(&mem, section, state);
+
 	if (!ret) {
-		if (context == HOTPLUG)
+		if (context == HOTPLUG &&
+		    mem->section_count == sections_per_block)
 			ret = register_mem_sect_under_node(mem, nid);
 	}
 
@@ -520,8 +575,10 @@
 		mem_remove_simple_file(mem, state);
 		mem_remove_simple_file(mem, phys_device);
 		mem_remove_simple_file(mem, removable);
-		unregister_memory(mem, section);
-	}
+		unregister_memory(mem);
+		kfree(mem);
+	} else
+		kobject_put(&mem->sysdev.kobj);
 
 	mutex_unlock(&mem_sysfs_mutex);
 	return 0;
@@ -533,7 +590,7 @@
  */
 int register_new_memory(int nid, struct mem_section *section)
 {
-	return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG);
+	return add_memory_section(nid, section, MEM_OFFLINE, HOTPLUG);
 }
 
 int unregister_memory_section(struct mem_section *section)
@@ -552,12 +609,16 @@
 	unsigned int i;
 	int ret;
 	int err;
+	unsigned long block_sz;
 
 	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
 	ret = sysdev_class_register(&memory_sysdev_class);
 	if (ret)
 		goto out;
 
+	block_sz = get_memory_block_size();
+	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
 	/*
 	 * Create entries for memory sections that were found
 	 * during boot and have been initialized
@@ -565,8 +626,8 @@
 	for (i = 0; i < NR_MEM_SECTIONS; i++) {
 		if (!present_section_nr(i))
 			continue;
-		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
-				       BOOT);
+		err = add_memory_section(0, __nr_to_section(i), MEM_ONLINE,
+					 BOOT);
 		if (!ret)
 			ret = err;
 	}

^ permalink raw reply

* Re: [PATCH 4/9] v3 Allow memory blocks to span multiple memory sections
From: Robin Holt @ 2010-10-01 18:52 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Greg KH, steiner, linux-kernel, Dave Hansen, linux-mm, Robin Holt,
	linuxppc-dev, KAMEZAWA Hiroyuki
In-Reply-To: <4CA62917.80008@austin.ibm.com>

On Fri, Oct 01, 2010 at 01:31:51PM -0500, Nathan Fontenot wrote:
> Update the memory sysfs code such that each sysfs memory directory is now
> considered a memory block that can span multiple memory sections per
> memory block.  The default size of each memory block is SECTION_SIZE_BITS
> to maintain the current behavior of having a single memory section per
> memory block (i.e. one sysfs directory per memory section).
> 
> For architectures that want to have memory blocks span multiple
> memory sections they need only define their own memory_block_size_bytes()
> routine.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
> 
> ---
>  drivers/base/memory.c |  155 ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 108 insertions(+), 47 deletions(-)
> 
> Index: linux-next/drivers/base/memory.c
> ===================================================================
> --- linux-next.orig/drivers/base/memory.c	2010-09-30 14:13:50.000000000 -0500
> +++ linux-next/drivers/base/memory.c	2010-09-30 14:46:00.000000000 -0500
...
> +static unsigned long get_memory_block_size(void)
> +{
> +	u32 block_sz;
        ^^^

I think this should be unsigned long.  u32 will work, but everything
else has been changed to use unsigned long.  If you disagree, I will
happily acquiesce as nothing is currently broken.  If SGI decides to make
memory_block_size_bytes more dynamic, we will fix this up at that time.

Robin

^ permalink raw reply

* Re: [PATCH 7/9] v3 Define memory_block_size_bytes for powerpc/pseries
From: Robin Holt @ 2010-10-01 18:56 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Greg KH, steiner, linux-kernel, Dave Hansen, linux-mm, Robin Holt,
	linuxppc-dev, KAMEZAWA Hiroyuki
In-Reply-To: <4CA62A0A.4050406@austin.ibm.com>

On Fri, Oct 01, 2010 at 01:35:54PM -0500, Nathan Fontenot wrote:
> Define a version of memory_block_size_bytes() for powerpc/pseries such that
> a memory block spans an entire lmb.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Reviewed-by: Robin Holt <holt@sgi.com>

^ permalink raw reply

* Re: [PATCH 6/9] v3 Update node sysfs code
From: Robin Holt @ 2010-10-01 18:55 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Greg KH, steiner, linux-kernel, Dave Hansen, linux-mm, Robin Holt,
	linuxppc-dev, KAMEZAWA Hiroyuki
In-Reply-To: <4CA629BA.60100@austin.ibm.com>

On Fri, Oct 01, 2010 at 01:34:34PM -0500, Nathan Fontenot wrote:
> Update the node sysfs code to be aware of the new capability for a memory
> block to contain multiple memory sections and be aware of the memory block
> structure name changes (start_section_nr).  This requires an additional
> parameter to unregister_mem_sect_under_nodes so that we know which memory
> section of the memory block to unregister.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Reviewed-by: Robin Holt <holt@sgi.com>

^ permalink raw reply

* Re: [PATCH 4/9] v3 Allow memory blocks to span multiple memory sections
From: Robin Holt @ 2010-10-01 19:20 UTC (permalink / raw)
  To: Nathan Fontenot
  Cc: Greg KH, steiner, linux-kernel, Dave Hansen, linux-mm, Robin Holt,
	linuxppc-dev, KAMEZAWA Hiroyuki
In-Reply-To: <4CA62FE2.2000003@austin.ibm.com>

On Fri, Oct 01, 2010 at 02:00:50PM -0500, Nathan Fontenot wrote:
> Update the memory sysfs code such that each sysfs memory directory is now
> considered a memory block that can span multiple memory sections per
> memory block.  The default size of each memory block is SECTION_SIZE_BITS
> to maintain the current behavior of having a single memory section per
> memory block (i.e. one sysfs directory per memory section).
> 
> For architectures that want to have memory blocks span multiple
> memory sections they need only define their own memory_block_size_bytes()
> routine.
> 
> Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>

Reviewed-by: Robin Holt <holt@sgi.com>

^ permalink raw reply

* Re: [PATCH 15/18] mtd: Fix endianness issues from device tree
From: Artem Bityutskiy @ 2010-10-01 19:29 UTC (permalink / raw)
  To: Ian Munsie
  Cc: Jason Gunthorpe, Artem Bityutskiy, linuxppc-dev, linux-kernel,
	H Hartley Sweeten, paulus, linux-mtd, Sean MacLennan, Tejun Heo,
	Julia Lawall, Steve Deiters, Anatolij Gustschin, David Woodhouse,
	David S. Miller
In-Reply-To: <1285916771-18033-16-git-send-email-imunsie@au1.ibm.com>

On Fri, 2010-10-01 at 17:06 +1000, Ian Munsie wrote:
> From: Ian Munsie <imunsie@au1.ibm.com>
> 
> This patch adds the appropriate conversions to correct the endianness
> issues in the MTD driver whenever it accesses the device tree (which is
> always big endian).
> 
> Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>

Pushed to l2-mtd-2.6.git, thanks!

-- 
Best Regards,
Artem Bityutskiy (Битюцкий Артём)

^ permalink raw reply

* [PATCH -mm] RapidIO: fix IDLE2 bits corruption
From: Alexandre Bounine @ 2010-10-01 19:55 UTC (permalink / raw)
  To: akpm, linux-kernel, linuxppc-dev; +Cc: Alexandre Bounine, Thomas Moll

RapidIO spec v.2.1 adds Idle Sequence 2 into LP-Serial Physical
Layer. The fix ensures that corresponding bits are not corrupted during
error handling.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Thomas Moll <thomas.moll@sysgo.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Li Yang <leoli@freescale.com>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Micha Nelissen <micha@neli.hopto.org>
---
 drivers/rapidio/rio.c    |    9 ++-------
 include/linux/rio_regs.h |    3 +--
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 7f18a65..68cf0c9 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -871,15 +871,10 @@ int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
 			rdev->em_efptr + RIO_EM_LTL_ERR_DETECT, 0);
 	}
 
-	/* Clear remaining error bits */
+	/* Clear remaining error bits and Port-Write Pending bit */
 	rio_mport_write_config_32(mport, destid, hopcount,
 			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
-			err_status & RIO_PORT_N_ERR_STS_CLR_MASK);
-
-	/* Clear Port-Write Pending bit */
-	rio_mport_write_config_32(mport, destid, hopcount,
-			rdev->phys_efptr + RIO_PORT_N_ERR_STS_CSR(portnum),
-			RIO_PORT_N_ERR_STS_PW_PEND);
+			err_status);
 
 	return 0;
 }
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index a18b2e2..d63dcba 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -229,7 +229,7 @@
 #define  RIO_MNT_REQ_CMD_IS		0x04	/* Input-status command */
 #define RIO_PORT_N_MNT_RSP_CSR(x)	(0x0044 + x*0x20)	/* 0x0002 */
 #define  RIO_PORT_N_MNT_RSP_RVAL	0x80000000 /* Response Valid */
-#define  RIO_PORT_N_MNT_RSP_ASTAT	0x000003e0 /* ackID Status */
+#define  RIO_PORT_N_MNT_RSP_ASTAT	0x000007e0 /* ackID Status */
 #define  RIO_PORT_N_MNT_RSP_LSTAT	0x0000001f /* Link Status */
 #define RIO_PORT_N_ACK_STS_CSR(x)	(0x0048 + x*0x20)	/* 0x0002 */
 #define  RIO_PORT_N_ACK_CLEAR		0x80000000
@@ -243,7 +243,6 @@
 #define  RIO_PORT_N_ERR_STS_PORT_ERR	0x00000004
 #define  RIO_PORT_N_ERR_STS_PORT_OK	0x00000002
 #define  RIO_PORT_N_ERR_STS_PORT_UNINIT	0x00000001
-#define  RIO_PORT_N_ERR_STS_CLR_MASK	0x07120204
 #define RIO_PORT_N_CTL_CSR(x)		(0x005c + x*0x20)
 #define  RIO_PORT_N_CTL_PWIDTH		0xc0000000
 #define  RIO_PORT_N_CTL_PWIDTH_1	0x00000000
-- 
1.7.3.1

^ permalink raw reply related

* RE: [PATCH v2 03/10] RapidIO: Use stored ingress port number instead of register read
From: Bounine, Alexandre @ 2010-10-01 20:46 UTC (permalink / raw)
  To: Micha Nelissen; +Cc: linux-kernel, Thomas Moll, Andrew Morton, linuxppc-dev
In-Reply-To: <4C97C6D8.4010407@neli.hopto.org>

Hi Micha,

Sorry for delayed reply.

Micha Nelissen <micha@neli.hopto.org> wrote:
>=20
> Bounine, Alexandre wrote:
> > struct rio_dev {
> >     struct list_head global_list;
> >     struct list_head net_list;
> >     .....
> >     ..... rest of rio_dev
> >     .....
> >     struct rio_switch switch[0];
> > }
>=20
> It makes sense to let rio_dev structures point to the switch they are
> attached to. That can be useful in various situations, but is not
> possible with this setup.
>=20
> If a rio_dev is a switch then rdev->rswitch->rdev =3D=3D rdev holds.
>

But the switch is a RIO device itself and all other parts of rio_dev
structure
are applicable to it as well. If there is situation when a device=20
needs to hold a pointer to the attached switch that should be a pointer
to the switch rio_dev and not to its switch-specific extension.=20
=20
> > This will remove extra memory allocation, remove overlapping
structure
> > members and clean code sections like one shown below:
> >
> > 	u8 hopcount =3D 0xff;
> > 	u16 destid =3D rdev->destid;
> >
> > 	if (rdev->rswitch) {
> > 		destid =3D rdev->rswitch->destid;
> > 		hopcount =3D rdev->rswitch->hopcount;
> > 	}
>=20
> Note that it is possible for rdev->destid to differ from
> rdev->rswitch->destid even if rswitch->rdev =3D=3D rdev (for non-hosts
i.e.
> agents). rswitch->destid is the destid by which we discovered the
switch
> (and can reach it) but rdev->destid is the actual id given to the
switch.
>=20

My goal is to have one destid storage for device - endpoint or switch.
And destid should be used only for one purpose: to reach corresponding
device.
In your statement above you suggest using rdev->destid instead of
rswitch->switchid.
RIO switches do not have any specific RIO ID that can be assigned to the
switch.
In this case the rswitch->switchid should work well for logical
identification
of the switch.

I think if (for switch) we load rdev->destid with some function
different
from its routing role this may bring unnecessary confusion.

I also will move rswitch->hopcount to rdev->hopcount. For endpoint it
will be set
to 0xff during rio_dev initialization.

Alex.
      =20

^ permalink raw reply

* Re: Introduce support for little endian PowerPC
From: Benjamin Herrenschmidt @ 2010-10-01 20:50 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: paulus, linuxppc-dev, linux-kernel, Ian Munsie
In-Reply-To: <1285950041.15020.272.camel@thor.local>

On Fri, 2010-10-01 at 18:20 +0200, Michel Dänzer wrote:
> On Fre, 2010-10-01 at 22:14 +1000, Benjamin Herrenschmidt wrote: 
> > 
> > Now, the main reasons in practice are anything touching graphics.
> > 
> > There's quite a few IP cores out there for SoCs that don't have HW
> > swappers, and -tons- of more or less ugly code that can't deal with non
> > native pixel ordering (hell, even Xorg isn't good at it, we really only
> > support cards that have HW swappers today).
> 
> That's not true. Even the radeon driver doesn't really need the HW
> swappers anymore with KMS.

And last I looked X still pukes if you give it a pixmap in non native
byte order but that might have been fixed. In any case, X is far from
the target here. More like existing stacks for embedded SoCs, including
codecs etc... all written for LE.

> > There's an even bigger pile of application code that deals with graphics
> > without any regard for endianness and is essentially unfixable.
> 
> Out of curiosity, what kind of APIs are those apps using? X11 and OpenGL
> have well-defined semantics wrt endianness, allowing the drivers to
> handle any necessary byte swapping internally, and IME the vast majority
> of apps handle this correctly.

So why is it so hard to get any video card working on ppc ? :-) I
haven't even started to look at r6xx which -does- have HW swapping
capabilities...

In this case tho, see above. I don't even need to care much about the
details, customers are making the point over and over again. It might be
fixable, but either they don't have the resources to fix it or don't
want to fix it, or their -own- customers won't chose their product if
it's BE for "perceived" difficulty of porting reason, whether they are
valid or not.

So it boils down to do we want to be another Amiga sinking into oblivion
but keeping our purity intact, or do we make that "reasonably easy"
thing to support LE at least at the kernel level for now, and -possibly-
give powerpc a bit more juice on the market for a while longer ?

Cheers,
Ben.

^ permalink raw reply

* Re: Introduce support for little endian PowerPC
From: Benjamin Herrenschmidt @ 2010-10-01 20:51 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, paulus, Ian Munsie, linux-kernel
In-Reply-To: <BBD78246-343E-4A96-9E55-3E40AD628C0B@kernel.crashing.org>

On Fri, 2010-10-01 at 12:59 -0500, Kumar Gala wrote:
> I'm not against it, and I agree some of the patches seem like good
> clean up.  I'm concerned about this bit rotting pretty quickly.

Maybe. Most of it doesn't seem to be that bit-rottable.

The changes to the asm stuff in misc_32.S for example are functions we
never ever touch once written (libgcc replacements) so I don't see them
rotting more with LE support than they did with BE :-)

What might rot is that we might introduce new LE breakage, true, and I
suppose it's going to be to some extent my job to at least every now and
then shoot that thing on a 44x to see if it still flies.

Cheers,
Ben.

^ permalink raw reply

* [patch 1/1] powerpc: enable ARCH_DMA_ADDR_T_64BIT with ARCH_PHYS_ADDR_T_64BIT
From: akpm @ 2010-10-01 21:12 UTC (permalink / raw)
  To: benh; +Cc: fujita.tomonori, linuxppc-dev, akpm

From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 arch/powerpc/Kconfig |    3 +++
 1 file changed, 3 insertions(+)

diff -puN arch/powerpc/Kconfig~powerpc-enable-arch_dma_addr_t_64bit-with-arch_phys_addr_t_64bit arch/powerpc/Kconfig
--- a/arch/powerpc/Kconfig~powerpc-enable-arch_dma_addr_t_64bit-with-arch_phys_addr_t_64bit
+++ a/arch/powerpc/Kconfig
@@ -16,6 +16,9 @@ config WORD_SIZE
 config ARCH_PHYS_ADDR_T_64BIT
        def_bool PPC64 || PHYS_64BIT
 
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool ARCH_PHYS_ADDR_T_64BIT
+
 config MMU
 	bool
 	default y
_

^ permalink raw reply

* [RESEND PATCH 0/2] Fix IRQ round-robing w/o irqbalance on pseries
From: Nishanth Aravamudan @ 2010-10-01 21:26 UTC (permalink / raw)
  To: nacc; +Cc: linuxppc-dev, linux-kernel, miltonm

We have received reports on power systems not running irqbalance where
all interrupts are being routed to CPU0 rather than being interleaved by
default across the system. Current firmware only allows either sending
interrupts to all CPUs or sending them to one CPU. The following two
patches address this issue by fixing the mask used in generic code and
by fixing the check for the "all" setting in the pseries code.

Nishanth Aravamudan (2):
  IRQ: use cpu_possible_mask rather than online_mask in setup_affinity
  pseries/xics: use cpu_possible_mask rather than cpu_all_mask

 arch/powerpc/platforms/pseries/xics.c |    2 +-
 kernel/irq/manage.c                   |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

^ permalink raw reply

* [RESEND PATCH 2/2] pseries/xics: use cpu_possible_mask rather than cpu_all_mask
From: Nishanth Aravamudan @ 2010-10-01 21:26 UTC (permalink / raw)
  To: nacc
  Cc: Mark Nelson, linux-kernel, miltonm, Paul Mackerras,
	Anton Blanchard, Thomas Gleixner, linuxppc-dev
In-Reply-To: <1285968378-12805-1-git-send-email-nacc@us.ibm.com>

Current firmware only allows us to send IRQs to the first processor or
all processors. We currently check to see if the passed in mask is equal
to the all_mask, but the firmware is only considering whether the
request is for the equivalent of the possible_mask. Thus, we think the
request is for some subset of CPUs and only assign IRQs to the first CPU
(on systems without irqbalance running) as evidenced by
/proc/interrupts. By using possible_mask instead, we account for this
and proper interleaving of interrupts occurs.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
---
 arch/powerpc/platforms/pseries/xics.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 93834b0..7c1e342 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -178,7 +178,7 @@ static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 	if (!distribute_irqs)
 		return default_server;
 
-	if (!cpumask_equal(cpumask, cpu_all_mask)) {
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
 		int server = cpumask_first_and(cpu_online_mask, cpumask);
 
 		if (server < nr_cpu_ids)
-- 
1.7.0.4

^ permalink raw reply related

* Re: Introduce support for little endian PowerPC
From: Olof Johansson @ 2010-10-01 22:03 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: paulus, linuxppc-dev, linux-kernel, Ian Munsie
In-Reply-To: <1285966315.2463.141.camel@pasglop>

On Sat, Oct 02, 2010 at 06:51:55AM +1000, Benjamin Herrenschmidt wrote:
> On Fri, 2010-10-01 at 12:59 -0500, Kumar Gala wrote:
> > I'm not against it, and I agree some of the patches seem like good
> > clean up.  I'm concerned about this bit rotting pretty quickly.
> 
> Maybe. Most of it doesn't seem to be that bit-rottable.
> 
> The changes to the asm stuff in misc_32.S for example are functions we
> never ever touch once written (libgcc replacements) so I don't see them
> rotting more with LE support than they did with BE :-)

Does KVM/qemu support running LE guest on BE host? That'd help keeping
the bitrot lower. :)



-Olof

^ permalink raw reply

* Re: Introduce support for little endian PowerPC
From: Benjamin Herrenschmidt @ 2010-10-01 22:28 UTC (permalink / raw)
  To: Olof Johansson; +Cc: paulus, linuxppc-dev, linux-kernel, Ian Munsie
In-Reply-To: <20101001220343.GA10494@lixom.net>

On Fri, 2010-10-01 at 17:03 -0500, Olof Johansson wrote:
> > Maybe. Most of it doesn't seem to be that bit-rottable.
> > 
> > The changes to the asm stuff in misc_32.S for example are functions we
> > never ever touch once written (libgcc replacements) so I don't see them
> > rotting more with LE support than they did with BE :-)
> 
> Does KVM/qemu support running LE guest on BE host? That'd help keeping
> the bitrot lower. :) 

Not yet I suppose :-) But then, I'm not sure it would make a big
difference, if you have a 440 board in the first place, you can boot
either LE or BE, no need for a special FW or anything.

What we've done is basically keep the zImage wrapper BE (for now at
least), and have it trampoline to LE when executing the actual kernel
(using a cuImage, of course a device-tree enabled u-Boot would probably
need something akin to a proper ePAPR zImage to do that but that's
reasonably easy to do nowadays).


Cheers,
Ben.

^ permalink raw reply

* Serial RapidIO Maintaintance read causes lock up
From: Bastiaan Nijkamp @ 2010-10-01 22:35 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 5023 bytes --]

Hi,

We are currently evaluating Serial RapidIO on two WindRiver SBC8548 boards
that use a Freescale Powerquicc III processor (MPC8548E rev. 2). We are
running U-Boot version 2010.09 as bootloader and are using kernel version
2.6.35.6 stable.

We have consulted multiple resources to collect al the requirements for
a successful RapidIO connection (LAW, TLB, Registers) and we seem to have
configured everything correctly. However, as soon as the board that is
configured as the host starts the enumeration process, the system locks up.
It locks in such a manner that we cannot use a JTAG interface to read any of
the registers.  We have also added a breakpoint just before the command that
causes the lock up, to make sure the registers are correctly set at that
point, and it seems they are.

We have tripple checked everything that we could possibly think of and
everything seems to be configured as required but the system keeps
locking-up so there must be something that we are missing. I really hope
that someone could point us in the right direction. The lock-up occurs when
__fsl_read_rio_config is called by fsl_rio_config_read in fsl-rio.c.

The LAW and TLB entries we have added to U-Boot are as follows:

#define CONFIG_RIO 1
#define CONFIG_SYS_RIO_MEM_VIRT 0xc0000000 /* base address */
#define CONFIG_SYS_RIO_MEM_BUS 0xc0000000 /* base address */
#define CONFIG_SYS_RIO_MEM_PHYS 0xc0000000
#define CONFIG_SYS_RIO_MEM_SIZE 0x20000000 /* 512M */

SET_LAW(CONFIG_SYS_RIO_MEM_PHYS, LAW_SIZE_512M, LAW_TRGT_IF_RIO),

-------------

Here is the kernel log:

Using SBC8548 machine description
Memory CAM mapping: 256 Mb, residual: 0Mb
Linux version 2.6.35.6 (dl704@lxws006) (gcc version 4.1.2 (Wind River Linux
Sourcery G++ 4.1-91)) #7 We
d Sep 29 13:27:18 CEST 2010
bootconsole [udbg0] enabled
setup_arch: bootmem
sbc8548_setup_arch()
arch: exit
Zone PFN ranges:
 DMA      0x00000000 -> 0x00010000
 Normal   empty
Movable zone start PFN for each node
early_node_map[1] active PFN ranges
   0: 0x00000000 -> 0x00010000
MMU: Allocated 1088 bytes of context maps for 255 contexts
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 65024
Kernel command line: root=/dev/nfs rw
nfsroot=192.168.100.21:/thales/target/rfs/sbc8548_wrlinux4
ip=192
.168.100.151:192.168.100.21:192.168.100.21:255.255.255.0:sbc8548_1:eth0:off
console=ttyS0,115200 riohdid=1
PID hash table entries: 1024 (order: 0, 4096 bytes)
Dentry cache hash table entries: 32768 (order: 5, 131072 bytes)
Inode-cache hash table entries: 16384 (order: 4, 65536 bytes)
Memory: 256884k/262144k available (2712k kernel code, 5260k reserved, 112k
data, 77k bss, 144k init)
Kernel virtual memory layout:
 * 0xfffdf000..0xfffff000  : fixmap
 * 0xfc7f9000..0xfe000000  : early ioremap
 * 0xd1000000..0xfc7f9000  : vmalloc & ioremap
Hierarchical RCU implementation.
       RCU-based detection of stalled CPUs is disabled.
       Verbose stalled-CPUs detection is disabled.
NR_IRQS:512 nr_irqs:512
mpic: Setting up MPIC " OpenPIC  " version 1.2 at e0040000, max 1 CPUs
mpic: ISU size: 80, shift: 7, mask: 7f
mpic: Initializing for 80 sources
clocksource: timebase mult[50cede6] shift[22] registered
pid_max: default: 32768 minimum: 301
Mount-cache hash table entries: 512
NET: Registered protocol family 16

PCI: Probing PCI hardware
bio: create slab <bio-0> at 0
vgaarb: loaded
Switching to clocksource timebase
NET: Registered protocol family 2
IP route cache hash table entries: 2048 (order: 1, 8192 bytes)
TCP established hash table entries: 8192 (order: 4, 65536 bytes)
TCP bind hash table entries: 8192 (order: 3, 32768 bytes)
TCP: Hash tables configured (established 8192 bind 8192)
TCP reno registered
UDP hash table entries: 256 (order: 0, 4096 bytes)
UDP-Lite hash table entries: 256 (order: 0, 4096 bytes)
NET: Registered protocol family 1
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
RPC: Registered tcp NFSv4.1 backchannel transport module.
Setting up RapidIO peer-to-peer network /soc8548@e0000000/rapidio@c0000
fsl-of-rio e00c0000.rapidio: Of-device full name /soc8548@e0000000
/rapidio@c0000
fsl-of-rio e00c0000.rapidio: Regs: [mem 0xe00c0000-0xe00dffff]
fsl-of-rio e00c0000.rapidio: LAW start 0x00000000c0000000, size
0x0000000020000000.
fsl-of-rio e00c0000.rapidio: pwirq: 48, bellirq: 50, txirq: 53, rxirq 54
fsl-of-rio e00c0000.rapidio: DeviceID is 0x1
fsl-of-rio e00c0000.rapidio: Configured as HOST
fsl-of-rio e00c0000.rapidio: RapidIO PHY type: serial
fsl-of-rio e00c0000.rapidio: Hardware port width: 4
fsl-of-rio e00c0000.rapidio: Training connection status: Four-lane
fsl-of-rio e00c0000.rapidio: RapidIO Common Transport System size: 256
RIO: enumerate master port 0, RIO0 mport
fsl_rio_config_read: index 0 destid 255 hopcount 0 offset 00000068 len 4
fsl_rio_config_read: Passed IS_ALIGNED.
fsl_rio_config_read: Passed 'out_be32_1'
fsl_rio_config_read: Passed 'out_be32_2'
fsl_rio_config_read: len is 4
fsl_rio_config_read: about to trigger '__fsl_read_rio_config'

Regards,
Bastiaan Nijkamp

[-- Attachment #2: Type: text/html, Size: 6059 bytes --]

^ permalink raw reply

* RE: [PATCH] PPC4xx: ADMA separating SoC specific functions
From: Tirumala Marri @ 2010-10-02  0:54 UTC (permalink / raw)
  To: Dan Williams
  Cc: Wolfgang Denk, Greg KH, yur, linux-raid, linux-crypto,
	linuxppc-dev
In-Reply-To: <AANLkTi=JqTU898DfW1=4qcb2WbwHvroY6LqiAX_oBb5L@mail.gmail.com>

> You definitely need to be able to resolve "used but not defined" and
> "defined but not used" warnings before tackling a driver conversion
> like this.  In light of this comment I wonder if it would be
> appropriate to submit your original driver, that just duplicated
> routines from the ppc440spe driver, to the -staging tree.  Then it
> would be available for someone familiar with driver conversions to
> take a shot at unifying.
>
> Greg, is this an appropriate use of -staging?
The other option is to define non static functions in ppc440spe-adma.c
which are used in common
File adma.c . This way there will not be any warnings. Is this something
acceptable ?

Here is the break down

ppc440spe-adma.c: It will have all the 440spe SoC specific functions.
ppc4xx_adma.h will have the declarations from 440spe-adma.c as non static.
adma.c will have common functions which are independent of SoC.

Please suggest.
Regards,

-Marri

^ permalink raw reply

* RE: [RFC] irq: Migrate powerpc virq subsystem into generic code
From: Benjamin Herrenschmidt @ 2010-10-02  1:02 UTC (permalink / raw)
  To: Lorenzo Pieralisi; +Cc: devicetree-discuss, linuxppc-dev
In-Reply-To: <000b01cb6166$588cebe0$09a6c3a0$@Pieralisi@arm.com>

On Fri, 2010-10-01 at 13:44 +0100, Lorenzo Pieralisi wrote:
> Hi Grant, Ben, all

Hi Lorenzo,

> I have a question on the PowerPC IRQ layer and how to use it
> for device drivers and device tree in general.
> 
> A device such as eth smsc911x (example) requires the platform_data 
> pointer to specify irq sense/level to programme the chip accordingly. 
> As agreed with Grant these pieces of information should be retrieved 
> from the device tree, from the interrupt-specifier.

Right.

> Now: the device driver should be interrupt-controller agnostic, so
> I cannot just retrieve the interrupts property at probe and decode it
> in order to get the interrupt sense/level bits (the driver has no clue 
> about the interrupt-controller irq flags encoding).

Indeed.

> I need to associate a irq_host to the interrupt controller node, with
> a proper xlate function to correctly decode the interrupt-specifier 
> and set the irq type accordingly (irq_create_of_mapping(),
> for now 1:1 on ARM, so useless from this standpoint).

Yes, basically -something- at some stage will translate the IRQ
"host" (let's call it "domain" instead, that was a bad naming choice
from me initially) and HW number within that domain to a newly allocated
linux "virq" number (the irq number manipulated generically in Linux).

The way that works is actually quite simple, but let me reply to you
first and then fill in the blanks...

> At platform device init time, of_irq_to_resource() is called to parse
> and map irqs; if we code the irq_host correctly for the ARM GIC for
> instance the xlate function gets called and irq type set accordingly
> (and maybe the function could set platform_device IRQ resource 
> flags as well ?)

Yes. It should basically all happen automatically from that standpoint
so the driver has nothing else to do but call something like
of_irq_to_resource(). This is a high level function tho, and if so
desired, lower level functions are also accessible if the driver or the
platform which to "tweak" thing a bit.

For example, if the device-tree misses something, or interrupts for a
given device are dynamically allocated in HW (such as MSIs), you can
also call irq_create_mapping() directly to allocate virq's and associate
them to host(domain)/hw_number pairs. This is typically done in MSI
backends.

In fact, cascaded controllers tend to basically just be devices nodes
that are both an interrupt-parent and have an interrupt parent &
interrupts property(ies).

So the cascaded controller does of_irq_to_resource() for example to
obtain its own "upstream" IRQ and request it.

The net result is you no longer have to create some "global" numbering
scheme with magic offsets and range for various cascaded or separate
controllers in a SoC setup. The HW numbering is local to a given domain
(typically a PIC but in some cases, it could extend accross multiple
copies of the same PIC in SMP that share a global interrupt domain), and
the linux numbers dynamically allocated.

> At driver dt probe, from the hwirq number defined in "interrupts"
> the driver retrieves the virq, hence sense/level flags and use them,
> or just use the platform_device IRQ resource flags if set properly
> by the OF layer. 

Right so generally drivers don't care about the details, they just use a
helper like of_irq_to_resource() that does it for them. But if they
want, they can call lower level things to parse the interrupts property,
and then establish a mapping etc... they can also override the flags,
that sort of thing.

> Correct ?

That's the idea ... minus bugs :-) We haven't been using interrupt
"resources" that much on powerpc in the past so we may have bugs setting
the flags there, but I suppose Grant is fixing all of that. We used to
just set the flags in the irq desc directly when doing the mapping of
the virq.

> If yes I will have a stab at it on a ARM platform with complex
> IRQ routing.
> 
> Thank you very much.

My pleasure,

Cheers,
Ben.

> Cheers,
> Lorenzo
> 
> > 
> >  arch/microblaze/kernel/setup.c |    2
> >  arch/powerpc/Kconfig           |    3
> >  arch/powerpc/include/asm/irq.h |  270 ----------------
> >  arch/powerpc/kernel/irq.c      |  659 --------------------------------
> > ------
> >  include/linux/virq.h           |  302 ++++++++++++++++++
> >  kernel/irq/Makefile            |    1
> >  kernel/irq/virq.c              |  687
> > ++++++++++++++++++++++++++++++++++++++++
> >  7 files changed, 995 insertions(+), 929 deletions(-)
> >  create mode 100644 include/linux/virq.h
> >  create mode 100644 kernel/irq/virq.c
> > 
> > diff --git a/arch/microblaze/kernel/setup.c
> > b/arch/microblaze/kernel/setup.c
> > index f5f7688..39cf20d 100644
> > --- a/arch/microblaze/kernel/setup.c
> > +++ b/arch/microblaze/kernel/setup.c
> > @@ -51,8 +51,6 @@ void __init setup_arch(char **cmdline_p)
> > 
> >  	unflatten_device_tree();
> > 
> > -	/* NOTE I think that this function is not necessary to call */
> > -	/* irq_early_init(); */
> >  	setup_cpuinfo();
> > 
> >  	microblaze_cache_init();
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 631e5a0..cc06e59 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -146,6 +146,9 @@ config EARLY_PRINTK
> >  	bool
> >  	default y
> > 
> > +config VIRQ
> > +	def_bool y
> > +
> >  config COMPAT
> >  	bool
> >  	default y if PPC64
> > diff --git a/arch/powerpc/include/asm/irq.h
> > b/arch/powerpc/include/asm/irq.h
> > index 67ab5fb..6dea0cb 100644
> > --- a/arch/powerpc/include/asm/irq.h
> > +++ b/arch/powerpc/include/asm/irq.h
> > @@ -17,10 +17,6 @@
> >  #include <asm/atomic.h>
> > 
> > 
> > -/* Define a way to iterate across irqs. */
> > -#define for_each_irq(i) \
> > -	for ((i) = 0; (i) < NR_IRQS; ++(i))
> > -
> >  extern atomic_t ppc_n_lost_interrupts;
> > 
> >  /* This number is used when no interrupt has been assigned */
> > @@ -41,270 +37,6 @@ extern atomic_t ppc_n_lost_interrupts;
> >  /* Same thing, used by the generic IRQ code */
> >  #define NR_IRQS_LEGACY		NUM_ISA_INTERRUPTS
> > 
> > -/* This type is the placeholder for a hardware interrupt number. It
> > has to
> > - * be big enough to enclose whatever representation is used by a given
> > - * platform.
> > - */
> > -typedef unsigned long irq_hw_number_t;
> > -
> > -/* Interrupt controller "host" data structure. This could be defined
> > as a
> > - * irq domain controller. That is, it handles the mapping between
> > hardware
> > - * and virtual interrupt numbers for a given interrupt domain. The
> > host
> > - * structure is generally created by the PIC code for a given PIC
> > instance
> > - * (though a host can cover more than one PIC if they have a flat
> > number
> > - * model). It's the host callbacks that are responsible for setting
> > the
> > - * irq_chip on a given irq_desc after it's been mapped.
> > - *
> > - * The host code and data structures are fairly agnostic to the fact
> > that
> > - * we use an open firmware device-tree. We do have references to
> > struct
> > - * device_node in two places: in irq_find_host() to find the host
> > matching
> > - * a given interrupt controller node, and of course as an argument to
> > its
> > - * counterpart host->ops->match() callback. However, those are treated
> > as
> > - * generic pointers by the core and the fact that it's actually a
> > device-node
> > - * pointer is purely a convention between callers and implementation.
> > This
> > - * code could thus be used on other architectures by replacing those
> > two
> > - * by some sort of arch-specific void * "token" used to identify
> > interrupt
> > - * controllers.
> > - */
> > -struct irq_host;
> > -struct radix_tree_root;
> > -
> > -/* Functions below are provided by the host and called whenever a new
> > mapping
> > - * is created or an old mapping is disposed. The host can then proceed
> > to
> > - * whatever internal data structures management is required. It also
> > needs
> > - * to setup the irq_desc when returning from map().
> > - */
> > -struct irq_host_ops {
> > -	/* Match an interrupt controller device node to a host, returns
> > -	 * 1 on a match
> > -	 */
> > -	int (*match)(struct irq_host *h, struct device_node *node);
> > -
> > -	/* Create or update a mapping between a virtual irq number and a
> > hw
> > -	 * irq number. This is called only once for a given mapping.
> > -	 */
> > -	int (*map)(struct irq_host *h, unsigned int virq, irq_hw_number_t
> > hw);
> > -
> > -	/* Dispose of such a mapping */
> > -	void (*unmap)(struct irq_host *h, unsigned int virq);
> > -
> > -	/* Update of such a mapping  */
> > -	void (*remap)(struct irq_host *h, unsigned int virq,
> > irq_hw_number_t hw);
> > -
> > -	/* Translate device-tree interrupt specifier from raw format
> > coming
> > -	 * from the firmware to a irq_hw_number_t (interrupt line number)
> > and
> > -	 * type (sense) that can be passed to set_irq_type(). In the
> > absence
> > -	 * of this callback, irq_create_of_mapping() and
> > irq_of_parse_and_map()
> > -	 * will return the hw number in the first cell and IRQ_TYPE_NONE
> > for
> > -	 * the type (which amount to keeping whatever default value the
> > -	 * interrupt controller has for that line)
> > -	 */
> > -	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
> > -		     const u32 *intspec, unsigned int intsize,
> > -		     irq_hw_number_t *out_hwirq, unsigned int *out_type);
> > -};
> > -
> > -struct irq_host {
> > -	struct list_head	link;
> > -
> > -	/* type of reverse mapping technique */
> > -	unsigned int		revmap_type;
> > -#define IRQ_HOST_MAP_LEGACY     0 /* legacy 8259, gets irqs 1..15 */
> > -#define IRQ_HOST_MAP_NOMAP	1 /* no fast reverse mapping */
> > -#define IRQ_HOST_MAP_LINEAR	2 /* linear map of interrupts */
> > -#define IRQ_HOST_MAP_TREE	3 /* radix tree */
> > -	union {
> > -		struct {
> > -			unsigned int size;
> > -			unsigned int *revmap;
> > -		} linear;
> > -		struct radix_tree_root tree;
> > -	} revmap_data;
> > -	struct irq_host_ops	*ops;
> > -	void			*host_data;
> > -	irq_hw_number_t		inval_irq;
> > -
> > -	/* Optional device node pointer */
> > -	struct device_node	*of_node;
> > -};
> > -
> > -/* The main irq map itself is an array of NR_IRQ entries containing
> > the
> > - * associate host and irq number. An entry with a host of NULL is
> > free.
> > - * An entry can be allocated if it's free, the allocator always then
> > sets
> > - * hwirq first to the host's invalid irq number and then fills ops.
> > - */
> > -struct irq_map_entry {
> > -	irq_hw_number_t	hwirq;
> > -	struct irq_host	*host;
> > -};
> > -
> > -extern struct irq_map_entry irq_map[NR_IRQS];
> > -
> > -extern irq_hw_number_t virq_to_hw(unsigned int virq);
> > -
> > -/**
> > - * irq_alloc_host - Allocate a new irq_host data structure
> > - * @of_node: optional device-tree node of the interrupt controller
> > - * @revmap_type: type of reverse mapping to use
> > - * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
> > - * @ops: map/unmap host callbacks
> > - * @inval_irq: provide a hw number in that host space that is always
> > invalid
> > - *
> > - * Allocates and initialize and irq_host structure. Note that in the
> > case of
> > - * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this
> > returns
> > - * for all legacy interrupts except 0 (which is always the invalid irq
> > for
> > - * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is
> > allocated by
> > - * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be
> > allocated
> > - * later during boot automatically (the reverse mapping will use the
> > slow path
> > - * until that happens).
> > - */
> > -extern struct irq_host *irq_alloc_host(struct device_node *of_node,
> > -				       unsigned int revmap_type,
> > -				       unsigned int revmap_arg,
> > -				       struct irq_host_ops *ops,
> > -				       irq_hw_number_t inval_irq);
> > -
> > -
> > -/**
> > - * irq_find_host - Locates a host for a given device node
> > - * @node: device-tree node of the interrupt controller
> > - */
> > -extern struct irq_host *irq_find_host(struct device_node *node);
> > -
> > -
> > -/**
> > - * irq_set_default_host - Set a "default" host
> > - * @host: default host pointer
> > - *
> > - * For convenience, it's possible to set a "default" host that will be
> > used
> > - * whenever NULL is passed to irq_create_mapping(). It makes life
> > easier for
> > - * platforms that want to manipulate a few hard coded interrupt
> > numbers that
> > - * aren't properly represented in the device-tree.
> > - */
> > -extern void irq_set_default_host(struct irq_host *host);
> > -
> > -
> > -/**
> > - * irq_set_virq_count - Set the maximum number of virt irqs
> > - * @count: number of linux virtual irqs, capped with NR_IRQS
> > - *
> > - * This is mainly for use by platforms like iSeries who want to
> > program
> > - * the virtual irq number in the controller to avoid the reverse
> > mapping
> > - */
> > -extern void irq_set_virq_count(unsigned int count);
> > -
> > -
> > -/**
> > - * irq_create_mapping - Map a hardware interrupt into linux virq space
> > - * @host: host owning this hardware interrupt or NULL for default host
> > - * @hwirq: hardware irq number in that host space
> > - *
> > - * Only one mapping per hardware interrupt is permitted. Returns a
> > linux
> > - * virq number.
> > - * If the sense/trigger is to be specified, set_irq_type() should be
> > called
> > - * on the number returned from that call.
> > - */
> > -extern unsigned int irq_create_mapping(struct irq_host *host,
> > -				       irq_hw_number_t hwirq);
> > -
> > -
> > -/**
> > - * irq_dispose_mapping - Unmap an interrupt
> > - * @virq: linux virq number of the interrupt to unmap
> > - */
> > -extern void irq_dispose_mapping(unsigned int virq);
> > -
> > -/**
> > - * irq_find_mapping - Find a linux virq from an hw irq number.
> > - * @host: host owning this hardware interrupt
> > - * @hwirq: hardware irq number in that host space
> > - *
> > - * This is a slow path, for use by generic code. It's expected that an
> > - * irq controller implementation directly calls the appropriate low
> > level
> > - * mapping function.
> > - */
> > -extern unsigned int irq_find_mapping(struct irq_host *host,
> > -				     irq_hw_number_t hwirq);
> > -
> > -/**
> > - * irq_create_direct_mapping - Allocate a virq for direct mapping
> > - * @host: host to allocate the virq for or NULL for default host
> > - *
> > - * This routine is used for irq controllers which can choose the
> > hardware
> > - * interrupt numbers they generate. In such a case it's simplest to
> > use
> > - * the linux virq as the hardware interrupt number.
> > - */
> > -extern unsigned int irq_create_direct_mapping(struct irq_host *host);
> > -
> > -/**
> > - * irq_radix_revmap_insert - Insert a hw irq to linux virq number
> > mapping.
> > - * @host: host owning this hardware interrupt
> > - * @virq: linux irq number
> > - * @hwirq: hardware irq number in that host space
> > - *
> > - * This is for use by irq controllers that use a radix tree reverse
> > - * mapping for fast lookup.
> > - */
> > -extern void irq_radix_revmap_insert(struct irq_host *host, unsigned
> > int virq,
> > -				    irq_hw_number_t hwirq);
> > -
> > -/**
> > - * irq_radix_revmap_lookup - Find a linux virq from a hw irq number.
> > - * @host: host owning this hardware interrupt
> > - * @hwirq: hardware irq number in that host space
> > - *
> > - * This is a fast path, for use by irq controller code that uses radix
> > tree
> > - * revmaps
> > - */
> > -extern unsigned int irq_radix_revmap_lookup(struct irq_host *host,
> > -					    irq_hw_number_t hwirq);
> > -
> > -/**
> > - * irq_linear_revmap - Find a linux virq from a hw irq number.
> > - * @host: host owning this hardware interrupt
> > - * @hwirq: hardware irq number in that host space
> > - *
> > - * This is a fast path, for use by irq controller code that uses
> > linear
> > - * revmaps. It does fallback to the slow path if the revmap doesn't
> > exist
> > - * yet and will create the revmap entry with appropriate locking
> > - */
> > -
> > -extern unsigned int irq_linear_revmap(struct irq_host *host,
> > -				      irq_hw_number_t hwirq);
> > -
> > -
> > -
> > -/**
> > - * irq_alloc_virt - Allocate virtual irq numbers
> > - * @host: host owning these new virtual irqs
> > - * @count: number of consecutive numbers to allocate
> > - * @hint: pass a hint number, the allocator will try to use a 1:1
> > mapping
> > - *
> > - * This is a low level function that is used internally by
> > irq_create_mapping()
> > - * and that can be used by some irq controllers implementations for
> > things
> > - * like allocating ranges of numbers for MSIs. The revmaps are left
> > untouched.
> > - */
> > -extern unsigned int irq_alloc_virt(struct irq_host *host,
> > -				   unsigned int count,
> > -				   unsigned int hint);
> > -
> > -/**
> > - * irq_free_virt - Free virtual irq numbers
> > - * @virq: virtual irq number of the first interrupt to free
> > - * @count: number of interrupts to free
> > - *
> > - * This function is the opposite of irq_alloc_virt. It will not clear
> > reverse
> > - * maps, this should be done previously by unmap'ing the interrupt. In
> > fact,
> > - * all interrupts covered by the range being freed should have been
> > unmapped
> > - * prior to calling this.
> > - */
> > -extern void irq_free_virt(unsigned int virq, unsigned int count);
> > -
> > -/**
> > - * irq_early_init - Init irq remapping subsystem
> > - */
> > -extern void irq_early_init(void);
> > -
> >  static __inline__ int irq_canonicalize(int irq)
> >  {
> >  	return irq;
> > @@ -342,5 +74,7 @@ extern int call_handle_irq(int irq, void *p1,
> >  			   struct thread_info *tp, void *func);
> >  extern void do_IRQ(struct pt_regs *regs);
> > 
> > +#include <linux/virq.h>
> > +
> >  #endif /* _ASM_IRQ_H */
> >  #endif /* __KERNEL__ */
> > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> > index 4a65386..86d8e42 100644
> > --- a/arch/powerpc/kernel/irq.c
> > +++ b/arch/powerpc/kernel/irq.c
> > @@ -523,553 +523,6 @@ void do_softirq(void)
> >  }
> > 
> > 
> > -/*
> > - * IRQ controller and virtual interrupts
> > - */
> > -
> > -static LIST_HEAD(irq_hosts);
> > -static DEFINE_RAW_SPINLOCK(irq_big_lock);
> > -static unsigned int revmap_trees_allocated;
> > -static DEFINE_MUTEX(revmap_trees_mutex);
> > -struct irq_map_entry irq_map[NR_IRQS];
> > -static unsigned int irq_virq_count = NR_IRQS;
> > -static struct irq_host *irq_default_host;
> > -
> > -irq_hw_number_t virq_to_hw(unsigned int virq)
> > -{
> > -	return irq_map[virq].hwirq;
> > -}
> > -EXPORT_SYMBOL_GPL(virq_to_hw);
> > -
> > -static int default_irq_host_match(struct irq_host *h, struct
> > device_node *np)
> > -{
> > -	return h->of_node != NULL && h->of_node == np;
> > -}
> > -
> > -struct irq_host *irq_alloc_host(struct device_node *of_node,
> > -				unsigned int revmap_type,
> > -				unsigned int revmap_arg,
> > -				struct irq_host_ops *ops,
> > -				irq_hw_number_t inval_irq)
> > -{
> > -	struct irq_host *host;
> > -	unsigned int size = sizeof(struct irq_host);
> > -	unsigned int i;
> > -	unsigned int *rmap;
> > -	unsigned long flags;
> > -
> > -	/* Allocate structure and revmap table if using linear mapping */
> > -	if (revmap_type == IRQ_HOST_MAP_LINEAR)
> > -		size += revmap_arg * sizeof(unsigned int);
> > -	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
> > -	if (host == NULL)
> > -		return NULL;
> > -
> > -	/* Fill structure */
> > -	host->revmap_type = revmap_type;
> > -	host->inval_irq = inval_irq;
> > -	host->ops = ops;
> > -	host->of_node = of_node_get(of_node);
> > -
> > -	if (host->ops->match == NULL)
> > -		host->ops->match = default_irq_host_match;
> > -
> > -	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > -
> > -	/* If it's a legacy controller, check for duplicates and
> > -	 * mark it as allocated (we use irq 0 host pointer for that
> > -	 */
> > -	if (revmap_type == IRQ_HOST_MAP_LEGACY) {
> > -		if (irq_map[0].host != NULL) {
> > -			raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > -			/* If we are early boot, we can't free the
> structure,
> > -			 * too bad...
> > -			 * this will be fixed once slab is made available
> > early
> > -			 * instead of the current cruft
> > -			 */
> > -			if (mem_init_done)
> > -				kfree(host);
> > -			return NULL;
> > -		}
> > -		irq_map[0].host = host;
> > -	}
> > -
> > -	list_add(&host->link, &irq_hosts);
> > -	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > -
> > -	/* Additional setups per revmap type */
> > -	switch(revmap_type) {
> > -	case IRQ_HOST_MAP_LEGACY:
> > -		/* 0 is always the invalid number for legacy */
> > -		host->inval_irq = 0;
> > -		/* setup us as the host for all legacy interrupts */
> > -		for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
> > -			irq_map[i].hwirq = i;
> > -			smp_wmb();
> > -			irq_map[i].host = host;
> > -			smp_wmb();
> > -
> > -			/* Clear norequest flags */
> > -			irq_to_desc(i)->status &= ~IRQ_NOREQUEST;
> > -
> > -			/* Legacy flags are left to default at this point,
> > -			 * one can then use irq_create_mapping() to
> > -			 * explicitly change them
> > -			 */
> > -			ops->map(host, i, i);
> > -		}
> > -		break;
> > -	case IRQ_HOST_MAP_LINEAR:
> > -		rmap = (unsigned int *)(host + 1);
> > -		for (i = 0; i < revmap_arg; i++)
> > -			rmap[i] = NO_IRQ;
> > -		host->revmap_data.linear.size = revmap_arg;
> > -		smp_wmb();
> > -		host->revmap_data.linear.revmap = rmap;
> > -		break;
> > -	default:
> > -		break;
> > -	}
> > -
> > -	pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type,
> > host);
> > -
> > -	return host;
> > -}
> > -
> > -struct irq_host *irq_find_host(struct device_node *node)
> > -{
> > -	struct irq_host *h, *found = NULL;
> > -	unsigned long flags;
> > -
> > -	/* We might want to match the legacy controller last since
> > -	 * it might potentially be set to match all interrupts in
> > -	 * the absence of a device node. This isn't a problem so far
> > -	 * yet though...
> > -	 */
> > -	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > -	list_for_each_entry(h, &irq_hosts, link)
> > -		if (h->ops->match(h, node)) {
> > -			found = h;
> > -			break;
> > -		}
> > -	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > -	return found;
> > -}
> > -EXPORT_SYMBOL_GPL(irq_find_host);
> > -
> > -void irq_set_default_host(struct irq_host *host)
> > -{
> > -	pr_debug("irq: Default host set to @0x%p\n", host);
> > -
> > -	irq_default_host = host;
> > -}
> > -
> > -void irq_set_virq_count(unsigned int count)
> > -{
> > -	pr_debug("irq: Trying to set virq count to %d\n", count);
> > -
> > -	BUG_ON(count < NUM_ISA_INTERRUPTS);
> > -	if (count < NR_IRQS)
> > -		irq_virq_count = count;
> > -}
> > -
> > -static int irq_setup_virq(struct irq_host *host, unsigned int virq,
> > -			    irq_hw_number_t hwirq)
> > -{
> > -	struct irq_desc *desc;
> > -
> > -	desc = irq_to_desc_alloc_node(virq, 0);
> > -	if (!desc) {
> > -		pr_debug("irq: -> allocating desc failed\n");
> > -		goto error;
> > -	}
> > -
> > -	/* Clear IRQ_NOREQUEST flag */
> > -	desc->status &= ~IRQ_NOREQUEST;
> > -
> > -	/* map it */
> > -	smp_wmb();
> > -	irq_map[virq].hwirq = hwirq;
> > -	smp_mb();
> > -
> > -	if (host->ops->map(host, virq, hwirq)) {
> > -		pr_debug("irq: -> mapping failed, freeing\n");
> > -		goto error;
> > -	}
> > -
> > -	return 0;
> > -
> > -error:
> > -	irq_free_virt(virq, 1);
> > -	return -1;
> > -}
> > -
> > -unsigned int irq_create_direct_mapping(struct irq_host *host)
> > -{
> > -	unsigned int virq;
> > -
> > -	if (host == NULL)
> > -		host = irq_default_host;
> > -
> > -	BUG_ON(host == NULL);
> > -	WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP);
> > -
> > -	virq = irq_alloc_virt(host, 1, 0);
> > -	if (virq == NO_IRQ) {
> > -		pr_debug("irq: create_direct virq allocation failed\n");
> > -		return NO_IRQ;
> > -	}
> > -
> > -	pr_debug("irq: create_direct obtained virq %d\n", virq);
> > -
> > -	if (irq_setup_virq(host, virq, virq))
> > -		return NO_IRQ;
> > -
> > -	return virq;
> > -}
> > -
> > -unsigned int irq_create_mapping(struct irq_host *host,
> > -				irq_hw_number_t hwirq)
> > -{
> > -	unsigned int virq, hint;
> > -
> > -	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
> > -
> > -	/* Look for default host if nececssary */
> > -	if (host == NULL)
> > -		host = irq_default_host;
> > -	if (host == NULL) {
> > -		printk(KERN_WARNING "irq_create_mapping called for"
> > -		       " NULL host, hwirq=%lx\n", hwirq);
> > -		WARN_ON(1);
> > -		return NO_IRQ;
> > -	}
> > -	pr_debug("irq: -> using host @%p\n", host);
> > -
> > -	/* Check if mapping already exist, if it does, call
> > -	 * host->ops->map() to update the flags
> > -	 */
> > -	virq = irq_find_mapping(host, hwirq);
> > -	if (virq != NO_IRQ) {
> > -		if (host->ops->remap)
> > -			host->ops->remap(host, virq, hwirq);
> > -		pr_debug("irq: -> existing mapping on virq %d\n", virq);
> > -		return virq;
> > -	}
> > -
> > -	/* Get a virtual interrupt number */
> > -	if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
> > -		/* Handle legacy */
> > -		virq = (unsigned int)hwirq;
> > -		if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
> > -			return NO_IRQ;
> > -		return virq;
> > -	} else {
> > -		/* Allocate a virtual interrupt number */
> > -		hint = hwirq % irq_virq_count;
> > -		virq = irq_alloc_virt(host, 1, hint);
> > -		if (virq == NO_IRQ) {
> > -			pr_debug("irq: -> virq allocation failed\n");
> > -			return NO_IRQ;
> > -		}
> > -	}
> > -
> > -	if (irq_setup_virq(host, virq, hwirq))
> > -		return NO_IRQ;
> > -
> > -	printk(KERN_DEBUG "irq: irq %lu on host %s mapped to virtual irq
> > %u\n",
> > -		hwirq, host->of_node ? host->of_node->full_name : "null",
> > virq);
> > -
> > -	return virq;
> > -}
> > -EXPORT_SYMBOL_GPL(irq_create_mapping);
> > -
> > -unsigned int irq_create_of_mapping(struct device_node *controller,
> > -				   const u32 *intspec, unsigned int intsize)
> > -{
> > -	struct irq_host *host;
> > -	irq_hw_number_t hwirq;
> > -	unsigned int type = IRQ_TYPE_NONE;
> > -	unsigned int virq;
> > -
> > -	if (controller == NULL)
> > -		host = irq_default_host;
> > -	else
> > -		host = irq_find_host(controller);
> > -	if (host == NULL) {
> > -		printk(KERN_WARNING "irq: no irq host found for %s !\n",
> > -		       controller->full_name);
> > -		return NO_IRQ;
> > -	}
> > -
> > -	/* If host has no translation, then we assume interrupt line */
> > -	if (host->ops->xlate == NULL)
> > -		hwirq = intspec[0];
> > -	else {
> > -		if (host->ops->xlate(host, controller, intspec, intsize,
> > -				     &hwirq, &type))
> > -			return NO_IRQ;
> > -	}
> > -
> > -	/* Create mapping */
> > -	virq = irq_create_mapping(host, hwirq);
> > -	if (virq == NO_IRQ)
> > -		return virq;
> > -
> > -	/* Set type if specified and different than the current one */
> > -	if (type != IRQ_TYPE_NONE &&
> > -	    type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
> > -		set_irq_type(virq, type);
> > -	return virq;
> > -}
> > -EXPORT_SYMBOL_GPL(irq_create_of_mapping);
> > -
> > -void irq_dispose_mapping(unsigned int virq)
> > -{
> > -	struct irq_host *host;
> > -	irq_hw_number_t hwirq;
> > -
> > -	if (virq == NO_IRQ)
> > -		return;
> > -
> > -	host = irq_map[virq].host;
> > -	WARN_ON (host == NULL);
> > -	if (host == NULL)
> > -		return;
> > -
> > -	/* Never unmap legacy interrupts */
> > -	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
> > -		return;
> > -
> > -	/* remove chip and handler */
> > -	set_irq_chip_and_handler(virq, NULL, NULL);
> > -
> > -	/* Make sure it's completed */
> > -	synchronize_irq(virq);
> > -
> > -	/* Tell the PIC about it */
> > -	if (host->ops->unmap)
> > -		host->ops->unmap(host, virq);
> > -	smp_mb();
> > -
> > -	/* Clear reverse map */
> > -	hwirq = irq_map[virq].hwirq;
> > -	switch(host->revmap_type) {
> > -	case IRQ_HOST_MAP_LINEAR:
> > -		if (hwirq < host->revmap_data.linear.size)
> > -			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
> > -		break;
> > -	case IRQ_HOST_MAP_TREE:
> > -		/*
> > -		 * Check if radix tree allocated yet, if not then nothing
> > to
> > -		 * remove.
> > -		 */
> > -		smp_rmb();
> > -		if (revmap_trees_allocated < 1)
> > -			break;
> > -		mutex_lock(&revmap_trees_mutex);
> > -		radix_tree_delete(&host->revmap_data.tree, hwirq);
> > -		mutex_unlock(&revmap_trees_mutex);
> > -		break;
> > -	}
> > -
> > -	/* Destroy map */
> > -	smp_mb();
> > -	irq_map[virq].hwirq = host->inval_irq;
> > -
> > -	/* Set some flags */
> > -	irq_to_desc(virq)->status |= IRQ_NOREQUEST;
> > -
> > -	/* Free it */
> > -	irq_free_virt(virq, 1);
> > -}
> > -EXPORT_SYMBOL_GPL(irq_dispose_mapping);
> > -
> > -unsigned int irq_find_mapping(struct irq_host *host,
> > -			      irq_hw_number_t hwirq)
> > -{
> > -	unsigned int i;
> > -	unsigned int hint = hwirq % irq_virq_count;
> > -
> > -	/* Look for default host if nececssary */
> > -	if (host == NULL)
> > -		host = irq_default_host;
> > -	if (host == NULL)
> > -		return NO_IRQ;
> > -
> > -	/* legacy -> bail early */
> > -	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
> > -		return hwirq;
> > -
> > -	/* Slow path does a linear search of the map */
> > -	if (hint < NUM_ISA_INTERRUPTS)
> > -		hint = NUM_ISA_INTERRUPTS;
> > -	i = hint;
> > -	do  {
> > -		if (irq_map[i].host == host &&
> > -		    irq_map[i].hwirq == hwirq)
> > -			return i;
> > -		i++;
> > -		if (i >= irq_virq_count)
> > -			i = NUM_ISA_INTERRUPTS;
> > -	} while(i != hint);
> > -	return NO_IRQ;
> > -}
> > -EXPORT_SYMBOL_GPL(irq_find_mapping);
> > -
> > -
> > -unsigned int irq_radix_revmap_lookup(struct irq_host *host,
> > -				     irq_hw_number_t hwirq)
> > -{
> > -	struct irq_map_entry *ptr;
> > -	unsigned int virq;
> > -
> > -	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
> > -
> > -	/*
> > -	 * Check if the radix tree exists and has bee initialized.
> > -	 * If not, we fallback to slow mode
> > -	 */
> > -	if (revmap_trees_allocated < 2)
> > -		return irq_find_mapping(host, hwirq);
> > -
> > -	/* Now try to resolve */
> > -	/*
> > -	 * No rcu_read_lock(ing) needed, the ptr returned can't go under
> > us
> > -	 * as it's referencing an entry in the static irq_map table.
> > -	 */
> > -	ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq);
> > -
> > -	/*
> > -	 * If found in radix tree, then fine.
> > -	 * Else fallback to linear lookup - this should not happen in
> > practice
> > -	 * as it means that we failed to insert the node in the radix
> > tree.
> > -	 */
> > -	if (ptr)
> > -		virq = ptr - irq_map;
> > -	else
> > -		virq = irq_find_mapping(host, hwirq);
> > -
> > -	return virq;
> > -}
> > -
> > -void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
> > -			     irq_hw_number_t hwirq)
> > -{
> > -
> > -	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
> > -
> > -	/*
> > -	 * Check if the radix tree exists yet.
> > -	 * If not, then the irq will be inserted into the tree when it
> > gets
> > -	 * initialized.
> > -	 */
> > -	smp_rmb();
> > -	if (revmap_trees_allocated < 1)
> > -		return;
> > -
> > -	if (virq != NO_IRQ) {
> > -		mutex_lock(&revmap_trees_mutex);
> > -		radix_tree_insert(&host->revmap_data.tree, hwirq,
> > -				  &irq_map[virq]);
> > -		mutex_unlock(&revmap_trees_mutex);
> > -	}
> > -}
> > -
> > -unsigned int irq_linear_revmap(struct irq_host *host,
> > -			       irq_hw_number_t hwirq)
> > -{
> > -	unsigned int *revmap;
> > -
> > -	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
> > -
> > -	/* Check revmap bounds */
> > -	if (unlikely(hwirq >= host->revmap_data.linear.size))
> > -		return irq_find_mapping(host, hwirq);
> > -
> > -	/* Check if revmap was allocated */
> > -	revmap = host->revmap_data.linear.revmap;
> > -	if (unlikely(revmap == NULL))
> > -		return irq_find_mapping(host, hwirq);
> > -
> > -	/* Fill up revmap with slow path if no mapping found */
> > -	if (unlikely(revmap[hwirq] == NO_IRQ))
> > -		revmap[hwirq] = irq_find_mapping(host, hwirq);
> > -
> > -	return revmap[hwirq];
> > -}
> > -
> > -unsigned int irq_alloc_virt(struct irq_host *host,
> > -			    unsigned int count,
> > -			    unsigned int hint)
> > -{
> > -	unsigned long flags;
> > -	unsigned int i, j, found = NO_IRQ;
> > -
> > -	if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
> > -		return NO_IRQ;
> > -
> > -	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > -
> > -	/* Use hint for 1 interrupt if any */
> > -	if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
> > -	    hint < irq_virq_count && irq_map[hint].host == NULL) {
> > -		found = hint;
> > -		goto hint_found;
> > -	}
> > -
> > -	/* Look for count consecutive numbers in the allocatable
> > -	 * (non-legacy) space
> > -	 */
> > -	for (i = NUM_ISA_INTERRUPTS, j = 0; i < irq_virq_count; i++) {
> > -		if (irq_map[i].host != NULL)
> > -			j = 0;
> > -		else
> > -			j++;
> > -
> > -		if (j == count) {
> > -			found = i - count + 1;
> > -			break;
> > -		}
> > -	}
> > -	if (found == NO_IRQ) {
> > -		raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > -		return NO_IRQ;
> > -	}
> > - hint_found:
> > -	for (i = found; i < (found + count); i++) {
> > -		irq_map[i].hwirq = host->inval_irq;
> > -		smp_wmb();
> > -		irq_map[i].host = host;
> > -	}
> > -	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > -	return found;
> > -}
> > -
> > -void irq_free_virt(unsigned int virq, unsigned int count)
> > -{
> > -	unsigned long flags;
> > -	unsigned int i;
> > -
> > -	WARN_ON (virq < NUM_ISA_INTERRUPTS);
> > -	WARN_ON (count == 0 || (virq + count) > irq_virq_count);
> > -
> > -	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > -	for (i = virq; i < (virq + count); i++) {
> > -		struct irq_host *host;
> > -
> > -		if (i < NUM_ISA_INTERRUPTS ||
> > -		    (virq + count) > irq_virq_count)
> > -			continue;
> > -
> > -		host = irq_map[i].host;
> > -		irq_map[i].hwirq = host->inval_irq;
> > -		smp_wmb();
> > -		irq_map[i].host = NULL;
> > -	}
> > -	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > -}
> > -
> >  int arch_early_irq_init(void)
> >  {
> >  	struct irq_desc *desc;
> > @@ -1090,118 +543,6 @@ int arch_init_chip_data(struct irq_desc *desc,
> > int node)
> >  	return 0;
> >  }
> > 
> > -/* We need to create the radix trees late */
> > -static int irq_late_init(void)
> > -{
> > -	struct irq_host *h;
> > -	unsigned int i;
> > -
> > -	/*
> > -	 * No mutual exclusion with respect to accessors of the tree is
> > needed
> > -	 * here as the synchronization is done via the state variable
> > -	 * revmap_trees_allocated.
> > -	 */
> > -	list_for_each_entry(h, &irq_hosts, link) {
> > -		if (h->revmap_type == IRQ_HOST_MAP_TREE)
> > -			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
> > -	}
> > -
> > -	/*
> > -	 * Make sure the radix trees inits are visible before setting
> > -	 * the flag
> > -	 */
> > -	smp_wmb();
> > -	revmap_trees_allocated = 1;
> > -
> > -	/*
> > -	 * Insert the reverse mapping for those interrupts already
> > present
> > -	 * in irq_map[].
> > -	 */
> > -	mutex_lock(&revmap_trees_mutex);
> > -	for (i = 0; i < irq_virq_count; i++) {
> > -		if (irq_map[i].host &&
> > -		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
> > -
> radix_tree_insert(&irq_map[i].host->revmap_data.tree,
> > -					  irq_map[i].hwirq, &irq_map[i]);
> > -	}
> > -	mutex_unlock(&revmap_trees_mutex);
> > -
> > -	/*
> > -	 * Make sure the radix trees insertions are visible before
> > setting
> > -	 * the flag
> > -	 */
> > -	smp_wmb();
> > -	revmap_trees_allocated = 2;
> > -
> > -	return 0;
> > -}
> > -arch_initcall(irq_late_init);
> > -
> > -#ifdef CONFIG_VIRQ_DEBUG
> > -static int virq_debug_show(struct seq_file *m, void *private)
> > -{
> > -	unsigned long flags;
> > -	struct irq_desc *desc;
> > -	const char *p;
> > -	char none[] = "none";
> > -	int i;
> > -
> > -	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
> > -		      "chip name", "host name");
> > -
> > -	for (i = 1; i < nr_irqs; i++) {
> > -		desc = irq_to_desc(i);
> > -		if (!desc)
> > -			continue;
> > -
> > -		raw_spin_lock_irqsave(&desc->lock, flags);
> > -
> > -		if (desc->action && desc->action->handler) {
> > -			seq_printf(m, "%5d  ", i);
> > -			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
> > -
> > -			if (desc->chip && desc->chip->name)
> > -				p = desc->chip->name;
> > -			else
> > -				p = none;
> > -			seq_printf(m, "%-15s  ", p);
> > -
> > -			if (irq_map[i].host && irq_map[i].host->of_node)
> > -				p = irq_map[i].host->of_node->full_name;
> > -			else
> > -				p = none;
> > -			seq_printf(m, "%s\n", p);
> > -		}
> > -
> > -		raw_spin_unlock_irqrestore(&desc->lock, flags);
> > -	}
> > -
> > -	return 0;
> > -}
> > -
> > -static int virq_debug_open(struct inode *inode, struct file *file)
> > -{
> > -	return single_open(file, virq_debug_show, inode->i_private);
> > -}
> > -
> > -static const struct file_operations virq_debug_fops = {
> > -	.open = virq_debug_open,
> > -	.read = seq_read,
> > -	.llseek = seq_lseek,
> > -	.release = single_release,
> > -};
> > -
> > -static int __init irq_debugfs_init(void)
> > -{
> > -	if (debugfs_create_file("virq_mapping", S_IRUGO,
> > powerpc_debugfs_root,
> > -				 NULL, &virq_debug_fops) == NULL)
> > -		return -ENOMEM;
> > -
> > -	return 0;
> > -}
> > -__initcall(irq_debugfs_init);
> > -#endif /* CONFIG_VIRQ_DEBUG */
> > -
> >  #ifdef CONFIG_PPC64
> >  static int __init setup_noirqdistrib(char *str)
> >  {
> > diff --git a/include/linux/virq.h b/include/linux/virq.h
> > new file mode 100644
> > index 0000000..06035ef
> > --- /dev/null
> > +++ b/include/linux/virq.h
> > @@ -0,0 +1,302 @@
> > +/*
> > + * Virtual IRQ infrastructure
> > + *
> > + * Virtual IRQs provides support for dynamically allocating ranges of
> > IRQ
> > + * numbers for use by interrupt controllers.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of version 2 of the GNU General Public
> > + * License as published by the Free Software Foundation.
> > + */
> > +
> > +
> > +#ifdef __KERNEL__
> > +#ifndef _LINUX_VIRQ_H
> > +#define _LINUX_VIRQ_H
> > +
> > +#include <asm/irq.h>
> > +
> > +#ifdef CONFIG_VIRQ
> > +
> > +/* Define a way to iterate across irqs. */
> > +#define for_each_irq(i) \
> > +	for ((i) = 0; (i) < NR_IRQS; ++(i))
> > +
> > +/* This type is the placeholder for a hardware interrupt number. It
> > has to
> > + * be big enough to enclose whatever representation is used by a given
> > + * platform.
> > + */
> > +typedef unsigned long irq_hw_number_t;
> > +
> > +/* Interrupt controller "host" data structure. This could be defined
> > as a
> > + * irq domain controller. That is, it handles the mapping between
> > hardware
> > + * and virtual interrupt numbers for a given interrupt domain. The
> > host
> > + * structure is generally created by the PIC code for a given PIC
> > instance
> > + * (though a host can cover more than one PIC if they have a flat
> > number
> > + * model). It's the host callbacks that are responsible for setting
> > the
> > + * irq_chip on a given irq_desc after it's been mapped.
> > + *
> > + * The host code and data structures are fairly agnostic to the fact
> > that
> > + * we use an open firmware device-tree. We do have references to
> > struct
> > + * device_node in two places: in irq_find_host() to find the host
> > matching
> > + * a given interrupt controller node, and of course as an argument to
> > its
> > + * counterpart host->ops->match() callback. However, those are treated
> > as
> > + * generic pointers by the core and the fact that it's actually a
> > device-node
> > + * pointer is purely a convention between callers and implementation.
> > This
> > + * code could thus be used on other architectures by replacing those
> > two
> > + * by some sort of arch-specific void * "token" used to identify
> > interrupt
> > + * controllers.
> > + */
> > +struct irq_host;
> > +struct radix_tree_root;
> > +struct device_node;
> > +
> > +/**
> > + * struct irq_host_ops - operations for managing per-domain hw irq
> > numbers
> > + *
> > + * Functions below are provided by the host and called whenever a new
> > mapping
> > + * is created or an old mapping is disposed. The host can then proceed
> > to
> > + * whatever internal data structures management is required. It also
> > needs
> > + * to setup the irq_desc when returning from map().
> > + */
> > +struct irq_host_ops {
> > +	/* Match an interrupt controller device node to a host, returns
> > +	 * 1 on a match
> > +	 */
> > +	int (*match)(struct irq_host *h, struct device_node *node);
> > +
> > +	/* Create or update a mapping between a virtual irq number and a
> > hw
> > +	 * irq number. This is called only once for a given mapping.
> > +	 */
> > +	int (*map)(struct irq_host *h, unsigned int virq, irq_hw_number_t
> > hw);
> > +
> > +	/* Dispose of such a mapping */
> > +	void (*unmap)(struct irq_host *h, unsigned int virq);
> > +
> > +	/* Update of such a mapping  */
> > +	void (*remap)(struct irq_host *h, unsigned int virq,
> > irq_hw_number_t hw);
> > +
> > +	/* Translate device-tree interrupt specifier from raw format
> > coming
> > +	 * from the firmware to a irq_hw_number_t (interrupt line number)
> > and
> > +	 * type (sense) that can be passed to set_irq_type(). In the
> > absence
> > +	 * of this callback, irq_create_of_mapping() and
> > irq_of_parse_and_map()
> > +	 * will return the hw number in the first cell and IRQ_TYPE_NONE
> > for
> > +	 * the type (which amount to keeping whatever default value the
> > +	 * interrupt controller has for that line)
> > +	 */
> > +	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
> > +		     const u32 *intspec, unsigned int intsize,
> > +		     irq_hw_number_t *out_hwirq, unsigned int *out_type);
> > +};
> > +
> > +/**
> > + * struct irq_host - a single irq domain. maps hw irq numbers to Linux
> > irq.
> > + * @link: entry in global irq_host list
> > + * @revmap_type: Method of reverse mapping hwirq to Linux irq number
> > + * @revmap_data: reverse map data
> > + * @ops: irq domain operations (documented above)
> > + * @host_data: irq controller driver data; core does not touch this
> > pointer
> > + * @inval_irq: hw irq number used for unassigned virqs
> > + * @of_node: Optional pointer to the irq controllers device tree node.
> > + *
> > + * One irq_host is allocated for each range (domain) of Linux irq
> > numbers
> > + * allocated.  Typically, one irq_host is allocated per controller,
> > but it
> > + * is perfectly valid to manage multiple controllers with a single
> > irq_host
> > + * instance if need be.
> > + */
> > +struct irq_host {
> > +	struct list_head	link;
> > +
> > +	/* type of reverse mapping technique */
> > +	unsigned int		revmap_type;
> > +#define IRQ_HOST_MAP_LEGACY     0 /* legacy 8259, gets irqs 1..15 */
> > +#define IRQ_HOST_MAP_NOMAP	1 /* no fast reverse mapping */
> > +#define IRQ_HOST_MAP_LINEAR	2 /* linear map of interrupts */
> > +#define IRQ_HOST_MAP_TREE	3 /* radix tree */
> > +	union {
> > +		struct {
> > +			unsigned int size;
> > +			unsigned int *revmap;
> > +		} linear;
> > +		struct radix_tree_root tree;
> > +	} revmap_data;
> > +	struct irq_host_ops	*ops;
> > +	void			*host_data;
> > +	irq_hw_number_t		inval_irq;
> > +
> > +	/* Optional device node pointer */
> > +	struct device_node	*of_node;
> > +};
> > +
> > +/**
> > + * irq_alloc_host() - Allocate a new irq_host data structure
> > + * @of_node: optional device-tree node of the interrupt controller
> > + * @revmap_type: type of reverse mapping to use
> > + * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
> > + * @ops: map/unmap host callbacks
> > + * @inval_irq: provide a hw number in that host space that is always
> > invalid
> > + *
> > + * Allocates and initialize and irq_host structure. Note that in the
> > case of
> > + * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this
> > returns
> > + * for all legacy interrupts except 0 (which is always the invalid irq
> > for
> > + * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is
> > allocated by
> > + * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be
> > allocated
> > + * later during boot automatically (the reverse mapping will use the
> > slow path
> > + * until that happens).
> > + */
> > +extern struct irq_host *irq_alloc_host(struct device_node *of_node,
> > +				       unsigned int revmap_type,
> > +				       unsigned int revmap_arg,
> > +				       struct irq_host_ops *ops,
> > +				       irq_hw_number_t inval_irq);
> > +
> > +/* The main irq map itself is an array of NR_IRQ entries containing
> > the
> > + * associate host and irq number. An entry with a host of NULL is
> > free.
> > + * An entry can be allocated if it's free, the allocator always then
> > sets
> > + * hwirq first to the host's invalid irq number and then fills ops.
> > + */
> > +struct irq_map_entry {
> > +	irq_hw_number_t	hwirq;
> > +	struct irq_host	*host;
> > +};
> > +extern struct irq_map_entry irq_map[NR_IRQS];
> > +
> > +extern irq_hw_number_t virq_to_hw(unsigned int virq);
> > +
> > +/**
> > + * irq_find_host - Locates a host for a given device node
> > + * @node: device-tree node of the interrupt controller
> > + */
> > +extern struct irq_host *irq_find_host(struct device_node *node);
> > +
> > +/**
> > + * irq_set_default_host - Set a "default" host
> > + * @host: default host pointer
> > + *
> > + * For convenience, it's possible to set a "default" host that will be
> > used
> > + * whenever NULL is passed to irq_create_mapping(). It makes life
> > easier for
> > + * platforms that want to manipulate a few hard coded interrupt
> > numbers that
> > + * aren't properly represented in the device-tree.
> > + */
> > +extern void irq_set_default_host(struct irq_host *host);
> > +
> > +/**
> > + * irq_set_virq_count - Set the maximum number of virt irqs
> > + * @count: number of linux virtual irqs, capped with NR_IRQS
> > + *
> > + * This is mainly for use by platforms like iSeries who want to
> > program
> > + * the virtual irq number in the controller to avoid the reverse
> > mapping
> > + */
> > +extern void irq_set_virq_count(unsigned int count);
> > +
> > +/**
> > + * irq_create_mapping - Map a hardware interrupt into linux virq space
> > + * @host: host owning this hardware interrupt or NULL for default host
> > + * @hwirq: hardware irq number in that host space
> > + *
> > + * Only one mapping per hardware interrupt is permitted. Returns a
> > linux
> > + * virq number.
> > + * If the sense/trigger is to be specified, set_irq_type() should be
> > called
> > + * on the number returned from that call.
> > + */
> > +extern unsigned int irq_create_mapping(struct irq_host *host,
> > +				       irq_hw_number_t hwirq);
> > +
> > +/**
> > + * irq_dispose_mapping - Unmap an interrupt
> > + * @virq: linux virq number of the interrupt to unmap
> > + */
> > +extern void irq_dispose_mapping(unsigned int virq);
> > +
> > +/**
> > + * irq_find_mapping - Find a linux virq from an hw irq number.
> > + * @host: host owning this hardware interrupt
> > + * @hwirq: hardware irq number in that host space
> > + *
> > + * This is a slow path, for use by generic code. It's expected that an
> > + * irq controller implementation directly calls the appropriate low
> > level
> > + * mapping function.
> > + */
> > +extern unsigned int irq_find_mapping(struct irq_host *host,
> > +				     irq_hw_number_t hwirq);
> > +
> > +/**
> > + * irq_create_direct_mapping - Allocate a virq for direct mapping
> > + * @host: host to allocate the virq for or NULL for default host
> > + *
> > + * This routine is used for irq controllers which can choose the
> > hardware
> > + * interrupt numbers they generate. In such a case it's simplest to
> > use
> > + * the linux virq as the hardware interrupt number.
> > + */
> > +extern unsigned int irq_create_direct_mapping(struct irq_host *host);
> > +
> > +/**
> > + * irq_radix_revmap_insert - Insert a hw irq to linux virq number
> > mapping.
> > + * @host: host owning this hardware interrupt
> > + * @virq: linux irq number
> > + * @hwirq: hardware irq number in that host space
> > + *
> > + * This is for use by irq controllers that use a radix tree reverse
> > + * mapping for fast lookup.
> > + */
> > +extern void irq_radix_revmap_insert(struct irq_host *host, unsigned
> > int virq,
> > +				    irq_hw_number_t hwirq);
> > +
> > +/**
> > + * irq_radix_revmap_lookup - Find a linux virq from a hw irq number.
> > + * @host: host owning this hardware interrupt
> > + * @hwirq: hardware irq number in that host space
> > + *
> > + * This is a fast path, for use by irq controller code that uses radix
> > tree
> > + * revmaps
> > + */
> > +extern unsigned int irq_radix_revmap_lookup(struct irq_host *host,
> > +					    irq_hw_number_t hwirq);
> > +
> > +/**
> > + * irq_linear_revmap - Find a linux virq from a hw irq number.
> > + * @host: host owning this hardware interrupt
> > + * @hwirq: hardware irq number in that host space
> > + *
> > + * This is a fast path, for use by irq controller code that uses
> > linear
> > + * revmaps. It does fallback to the slow path if the revmap doesn't
> > exist
> > + * yet and will create the revmap entry with appropriate locking
> > + */
> > +
> > +extern unsigned int irq_linear_revmap(struct irq_host *host,
> > +				      irq_hw_number_t hwirq);
> > +
> > +
> > +
> > +/**
> > + * irq_alloc_virt - Allocate virtual irq numbers
> > + * @host: host owning these new virtual irqs
> > + * @count: number of consecutive numbers to allocate
> > + * @hint: pass a hint number, the allocator will try to use a 1:1
> > mapping
> > + *
> > + * This is a low level function that is used internally by
> > irq_create_mapping()
> > + * and that can be used by some irq controllers implementations for
> > things
> > + * like allocating ranges of numbers for MSIs. The revmaps are left
> > untouched.
> > + */
> > +extern unsigned int irq_alloc_virt(struct irq_host *host,
> > +				   unsigned int count,
> > +				   unsigned int hint);
> > +
> > +/**
> > + * irq_free_virt - Free virtual irq numbers
> > + * @virq: virtual irq number of the first interrupt to free
> > + * @count: number of interrupts to free
> > + *
> > + * This function is the opposite of irq_alloc_virt. It will not clear
> > reverse
> > + * maps, this should be done previously by unmap'ing the interrupt. In
> > fact,
> > + * all interrupts covered by the range being freed should have been
> > unmapped
> > + * prior to calling this.
> > + */
> > +extern void irq_free_virt(unsigned int virq, unsigned int count);
> > +
> > +
> > +#endif /* CONFIG_VIRQ */
> > +
> > +#endif /* _LINUX_VIRQ_H */
> > +#endif /* __KERNEL__ */
> > +
> > diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
> > index 7d04780..f5207dc 100644
> > --- a/kernel/irq/Makefile
> > +++ b/kernel/irq/Makefile
> > @@ -1,5 +1,6 @@
> > 
> >  obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
> > +obj-$(CONFIG_VIRQ) += virq.o
> >  obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
> >  obj-$(CONFIG_PROC_FS) += proc.o
> >  obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
> > diff --git a/kernel/irq/virq.c b/kernel/irq/virq.c
> > new file mode 100644
> > index 0000000..b3c0db3
> > --- /dev/null
> > +++ b/kernel/irq/virq.c
> > @@ -0,0 +1,687 @@
> > +/*
> > + * Mapping support from per-controller hw irq numbers to linux irqs
> > + *
> > + *  Derived from arch/i386/kernel/irq.c
> > + *    Copyright (C) 1992 Linus Torvalds
> > + *  Adapted from arch/i386 by Gary Thomas
> > + *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
> > + *  Updated and modified by Cort Dougan <cort@fsmlabs.com>
> > + *    Copyright (C) 1996-2001 Cort Dougan
> > + *  Adapted for Power Macintosh by Paul Mackerras
> > + *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
> > + *  Generalized for virtual irq mapping on all platformes by Grant
> > Likely
> > + *    Copyright (C) 2010 Secret Lab Technologies Ltd.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version
> > + * 2 of the License, or (at your option) any later version.
> > + */
> > +
> > +#include <linux/interrupt.h>
> > +#include <linux/irq.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/slab.h>
> > +#include <linux/radix-tree.h>
> > +#include <linux/virq.h>
> > +#include <linux/of_irq.h>
> > +
> > +/*
> > + * IRQ controller and virtual interrupts
> > + */
> > +static LIST_HEAD(irq_hosts);
> > +static DEFINE_RAW_SPINLOCK(irq_big_lock);
> > +static unsigned int revmap_trees_allocated;
> > +static DEFINE_MUTEX(revmap_trees_mutex);
> > +struct irq_map_entry irq_map[NR_IRQS];
> > +static unsigned int irq_virq_count = NR_IRQS;
> > +static struct irq_host *irq_default_host;
> > +
> > +irq_hw_number_t virq_to_hw(unsigned int virq)
> > +{
> > +	return irq_map[virq].hwirq;
> > +}
> > +EXPORT_SYMBOL_GPL(virq_to_hw);
> > +
> > +static int default_irq_host_match(struct irq_host *h, struct
> > device_node *np)
> > +{
> > +	return h->of_node != NULL && h->of_node == np;
> > +}
> > +
> > +struct irq_host *irq_alloc_host(struct device_node *of_node,
> > +				unsigned int revmap_type,
> > +				unsigned int revmap_arg,
> > +				struct irq_host_ops *ops,
> > +				irq_hw_number_t inval_irq)
> > +{
> > +	struct irq_host *host;
> > +	unsigned int size = sizeof(struct irq_host);
> > +	unsigned int i;
> > +	unsigned int *rmap;
> > +	unsigned long flags;
> > +
> > +	/* Allocate structure and revmap table if using linear mapping */
> > +	if (revmap_type == IRQ_HOST_MAP_LINEAR)
> > +		size += revmap_arg * sizeof(unsigned int);
> > +	host = zalloc_maybe_bootmem(size, GFP_KERNEL);
> > +	if (host == NULL)
> > +		return NULL;
> > +
> > +	/* Fill structure */
> > +	host->revmap_type = revmap_type;
> > +	host->inval_irq = inval_irq;
> > +	host->ops = ops;
> > +	host->of_node = of_node_get(of_node);
> > +
> > +	if (host->ops->match == NULL)
> > +		host->ops->match = default_irq_host_match;
> > +
> > +	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > +
> > +	/* If it's a legacy controller, check for duplicates and
> > +	 * mark it as allocated (we use irq 0 host pointer for that
> > +	 */
> > +	if (revmap_type == IRQ_HOST_MAP_LEGACY) {
> > +		if (irq_map[0].host != NULL) {
> > +			raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > +			/* If we are early boot, we can't free the
> structure,
> > +			 * too bad...
> > +			 * this will be fixed once slab is made available
> > early
> > +			 * instead of the current cruft
> > +			 */
> > +			if (mem_init_done)
> > +				kfree(host);
> > +			return NULL;
> > +		}
> > +		irq_map[0].host = host;
> > +	}
> > +
> > +	list_add(&host->link, &irq_hosts);
> > +	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > +
> > +	/* Additional setups per revmap type */
> > +	switch(revmap_type) {
> > +	case IRQ_HOST_MAP_LEGACY:
> > +		/* 0 is always the invalid number for legacy */
> > +		host->inval_irq = 0;
> > +		/* setup us as the host for all legacy interrupts */
> > +		for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
> > +			irq_map[i].hwirq = i;
> > +			smp_wmb();
> > +			irq_map[i].host = host;
> > +			smp_wmb();
> > +
> > +			/* Clear norequest flags */
> > +			irq_to_desc(i)->status &= ~IRQ_NOREQUEST;
> > +
> > +			/* Legacy flags are left to default at this point,
> > +			 * one can then use irq_create_mapping() to
> > +			 * explicitly change them
> > +			 */
> > +			ops->map(host, i, i);
> > +		}
> > +		break;
> > +	case IRQ_HOST_MAP_LINEAR:
> > +		rmap = (unsigned int *)(host + 1);
> > +		for (i = 0; i < revmap_arg; i++)
> > +			rmap[i] = NO_IRQ;
> > +		host->revmap_data.linear.size = revmap_arg;
> > +		smp_wmb();
> > +		host->revmap_data.linear.revmap = rmap;
> > +		break;
> > +	default:
> > +		break;
> > +	}
> > +
> > +	pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type,
> > host);
> > +
> > +	return host;
> > +}
> > +
> > +struct irq_host *irq_find_host(struct device_node *node)
> > +{
> > +	struct irq_host *h, *found = NULL;
> > +	unsigned long flags;
> > +
> > +	/* We might want to match the legacy controller last since
> > +	 * it might potentially be set to match all interrupts in
> > +	 * the absence of a device node. This isn't a problem so far
> > +	 * yet though...
> > +	 */
> > +	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > +	list_for_each_entry(h, &irq_hosts, link)
> > +		if (h->ops->match(h, node)) {
> > +			found = h;
> > +			break;
> > +		}
> > +	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > +	return found;
> > +}
> > +EXPORT_SYMBOL_GPL(irq_find_host);
> > +
> > +void irq_set_default_host(struct irq_host *host)
> > +{
> > +	pr_debug("irq: Default host set to @0x%p\n", host);
> > +
> > +	irq_default_host = host;
> > +}
> > +
> > +void irq_set_virq_count(unsigned int count)
> > +{
> > +	pr_debug("irq: Trying to set virq count to %d\n", count);
> > +
> > +	BUG_ON(count < NUM_ISA_INTERRUPTS);
> > +	if (count < NR_IRQS)
> > +		irq_virq_count = count;
> > +}
> > +
> > +static int irq_setup_virq(struct irq_host *host, unsigned int virq,
> > +			    irq_hw_number_t hwirq)
> > +{
> > +	struct irq_desc *desc;
> > +
> > +	desc = irq_to_desc_alloc_node(virq, 0);
> > +	if (!desc) {
> > +		pr_debug("irq: -> allocating desc failed\n");
> > +		goto error;
> > +	}
> > +
> > +	/* Clear IRQ_NOREQUEST flag */
> > +	desc->status &= ~IRQ_NOREQUEST;
> > +
> > +	/* map it */
> > +	smp_wmb();
> > +	irq_map[virq].hwirq = hwirq;
> > +	smp_mb();
> > +
> > +	if (host->ops->map(host, virq, hwirq)) {
> > +		pr_debug("irq: -> mapping failed, freeing\n");
> > +		goto error;
> > +	}
> > +
> > +	return 0;
> > +
> > +error:
> > +	irq_free_virt(virq, 1);
> > +	return -1;
> > +}
> > +
> > +unsigned int irq_create_direct_mapping(struct irq_host *host)
> > +{
> > +	unsigned int virq;
> > +
> > +	if (host == NULL)
> > +		host = irq_default_host;
> > +
> > +	BUG_ON(host == NULL);
> > +	WARN_ON(host->revmap_type != IRQ_HOST_MAP_NOMAP);
> > +
> > +	virq = irq_alloc_virt(host, 1, 0);
> > +	if (virq == NO_IRQ) {
> > +		pr_debug("irq: create_direct virq allocation failed\n");
> > +		return NO_IRQ;
> > +	}
> > +
> > +	pr_debug("irq: create_direct obtained virq %d\n", virq);
> > +
> > +	if (irq_setup_virq(host, virq, virq))
> > +		return NO_IRQ;
> > +
> > +	return virq;
> > +}
> > +
> > +unsigned int irq_create_mapping(struct irq_host *host,
> > +				irq_hw_number_t hwirq)
> > +{
> > +	unsigned int virq, hint;
> > +
> > +	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
> > +
> > +	/* Look for default host if nececssary */
> > +	if (host == NULL)
> > +		host = irq_default_host;
> > +	if (host == NULL) {
> > +		printk(KERN_WARNING "irq_create_mapping called for"
> > +		       " NULL host, hwirq=%lx\n", hwirq);
> > +		WARN_ON(1);
> > +		return NO_IRQ;
> > +	}
> > +	pr_debug("irq: -> using host @%p\n", host);
> > +
> > +	/* Check if mapping already exist, if it does, call
> > +	 * host->ops->map() to update the flags
> > +	 */
> > +	virq = irq_find_mapping(host, hwirq);
> > +	if (virq != NO_IRQ) {
> > +		if (host->ops->remap)
> > +			host->ops->remap(host, virq, hwirq);
> > +		pr_debug("irq: -> existing mapping on virq %d\n", virq);
> > +		return virq;
> > +	}
> > +
> > +	/* Get a virtual interrupt number */
> > +	if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
> > +		/* Handle legacy */
> > +		virq = (unsigned int)hwirq;
> > +		if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
> > +			return NO_IRQ;
> > +		return virq;
> > +	} else {
> > +		/* Allocate a virtual interrupt number */
> > +		hint = hwirq % irq_virq_count;
> > +		virq = irq_alloc_virt(host, 1, hint);
> > +		if (virq == NO_IRQ) {
> > +			pr_debug("irq: -> virq allocation failed\n");
> > +			return NO_IRQ;
> > +		}
> > +	}
> > +
> > +	if (irq_setup_virq(host, virq, hwirq))
> > +		return NO_IRQ;
> > +
> > +	printk(KERN_DEBUG "irq: irq %lu on host %s mapped to virtual irq
> > %u\n",
> > +		hwirq, host->of_node ? host->of_node->full_name : "null",
> > virq);
> > +
> > +	return virq;
> > +}
> > +EXPORT_SYMBOL_GPL(irq_create_mapping);
> > +
> > +unsigned int irq_create_of_mapping(struct device_node *controller,
> > +				   const u32 *intspec, unsigned int intsize)
> > +{
> > +	struct irq_host *host;
> > +	irq_hw_number_t hwirq;
> > +	unsigned int type = IRQ_TYPE_NONE;
> > +	unsigned int virq;
> > +
> > +	if (controller == NULL)
> > +		host = irq_default_host;
> > +	else
> > +		host = irq_find_host(controller);
> > +	if (host == NULL) {
> > +		printk(KERN_WARNING "irq: no irq host found for %s !\n",
> > +		       controller->full_name);
> > +		return NO_IRQ;
> > +	}
> > +
> > +	/* If host has no translation, then we assume interrupt line */
> > +	if (host->ops->xlate == NULL)
> > +		hwirq = intspec[0];
> > +	else {
> > +		if (host->ops->xlate(host, controller, intspec, intsize,
> > +				     &hwirq, &type))
> > +			return NO_IRQ;
> > +	}
> > +
> > +	/* Create mapping */
> > +	virq = irq_create_mapping(host, hwirq);
> > +	if (virq == NO_IRQ)
> > +		return virq;
> > +
> > +	/* Set type if specified and different than the current one */
> > +	if (type != IRQ_TYPE_NONE &&
> > +	    type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
> > +		set_irq_type(virq, type);
> > +	return virq;
> > +}
> > +EXPORT_SYMBOL_GPL(irq_create_of_mapping);
> > +
> > +void irq_dispose_mapping(unsigned int virq)
> > +{
> > +	struct irq_host *host;
> > +	irq_hw_number_t hwirq;
> > +
> > +	if (virq == NO_IRQ)
> > +		return;
> > +
> > +	host = irq_map[virq].host;
> > +	WARN_ON (host == NULL);
> > +	if (host == NULL)
> > +		return;
> > +
> > +	/* Never unmap legacy interrupts */
> > +	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
> > +		return;
> > +
> > +	/* remove chip and handler */
> > +	set_irq_chip_and_handler(virq, NULL, NULL);
> > +
> > +	/* Make sure it's completed */
> > +	synchronize_irq(virq);
> > +
> > +	/* Tell the PIC about it */
> > +	if (host->ops->unmap)
> > +		host->ops->unmap(host, virq);
> > +	smp_mb();
> > +
> > +	/* Clear reverse map */
> > +	hwirq = irq_map[virq].hwirq;
> > +	switch(host->revmap_type) {
> > +	case IRQ_HOST_MAP_LINEAR:
> > +		if (hwirq < host->revmap_data.linear.size)
> > +			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
> > +		break;
> > +	case IRQ_HOST_MAP_TREE:
> > +		/*
> > +		 * Check if radix tree allocated yet, if not then nothing
> > to
> > +		 * remove.
> > +		 */
> > +		smp_rmb();
> > +		if (revmap_trees_allocated < 1)
> > +			break;
> > +		mutex_lock(&revmap_trees_mutex);
> > +		radix_tree_delete(&host->revmap_data.tree, hwirq);
> > +		mutex_unlock(&revmap_trees_mutex);
> > +		break;
> > +	}
> > +
> > +	/* Destroy map */
> > +	smp_mb();
> > +	irq_map[virq].hwirq = host->inval_irq;
> > +
> > +	/* Set some flags */
> > +	irq_to_desc(virq)->status |= IRQ_NOREQUEST;
> > +
> > +	/* Free it */
> > +	irq_free_virt(virq, 1);
> > +}
> > +EXPORT_SYMBOL_GPL(irq_dispose_mapping);
> > +
> > +unsigned int irq_find_mapping(struct irq_host *host,
> > +			      irq_hw_number_t hwirq)
> > +{
> > +	unsigned int i;
> > +	unsigned int hint = hwirq % irq_virq_count;
> > +
> > +	/* Look for default host if nececssary */
> > +	if (host == NULL)
> > +		host = irq_default_host;
> > +	if (host == NULL)
> > +		return NO_IRQ;
> > +
> > +	/* legacy -> bail early */
> > +	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
> > +		return hwirq;
> > +
> > +	/* Slow path does a linear search of the map */
> > +	if (hint < NUM_ISA_INTERRUPTS)
> > +		hint = NUM_ISA_INTERRUPTS;
> > +	i = hint;
> > +	do  {
> > +		if (irq_map[i].host == host &&
> > +		    irq_map[i].hwirq == hwirq)
> > +			return i;
> > +		i++;
> > +		if (i >= irq_virq_count)
> > +			i = NUM_ISA_INTERRUPTS;
> > +	} while(i != hint);
> > +	return NO_IRQ;
> > +}
> > +EXPORT_SYMBOL_GPL(irq_find_mapping);
> > +
> > +
> > +unsigned int irq_radix_revmap_lookup(struct irq_host *host,
> > +				     irq_hw_number_t hwirq)
> > +{
> > +	struct irq_map_entry *ptr;
> > +	unsigned int virq;
> > +
> > +	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
> > +
> > +	/*
> > +	 * Check if the radix tree exists and has bee initialized.
> > +	 * If not, we fallback to slow mode
> > +	 */
> > +	if (revmap_trees_allocated < 2)
> > +		return irq_find_mapping(host, hwirq);
> > +
> > +	/* Now try to resolve */
> > +	/*
> > +	 * No rcu_read_lock(ing) needed, the ptr returned can't go under
> > us
> > +	 * as it's referencing an entry in the static irq_map table.
> > +	 */
> > +	ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq);
> > +
> > +	/*
> > +	 * If found in radix tree, then fine.
> > +	 * Else fallback to linear lookup - this should not happen in
> > practice
> > +	 * as it means that we failed to insert the node in the radix
> > tree.
> > +	 */
> > +	if (ptr)
> > +		virq = ptr - irq_map;
> > +	else
> > +		virq = irq_find_mapping(host, hwirq);
> > +
> > +	return virq;
> > +}
> > +
> > +void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq,
> > +			     irq_hw_number_t hwirq)
> > +{
> > +
> > +	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
> > +
> > +	/*
> > +	 * Check if the radix tree exists yet.
> > +	 * If not, then the irq will be inserted into the tree when it
> > gets
> > +	 * initialized.
> > +	 */
> > +	smp_rmb();
> > +	if (revmap_trees_allocated < 1)
> > +		return;
> > +
> > +	if (virq != NO_IRQ) {
> > +		mutex_lock(&revmap_trees_mutex);
> > +		radix_tree_insert(&host->revmap_data.tree, hwirq,
> > +				  &irq_map[virq]);
> > +		mutex_unlock(&revmap_trees_mutex);
> > +	}
> > +}
> > +
> > +unsigned int irq_linear_revmap(struct irq_host *host,
> > +			       irq_hw_number_t hwirq)
> > +{
> > +	unsigned int *revmap;
> > +
> > +	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
> > +
> > +	/* Check revmap bounds */
> > +	if (unlikely(hwirq >= host->revmap_data.linear.size))
> > +		return irq_find_mapping(host, hwirq);
> > +
> > +	/* Check if revmap was allocated */
> > +	revmap = host->revmap_data.linear.revmap;
> > +	if (unlikely(revmap == NULL))
> > +		return irq_find_mapping(host, hwirq);
> > +
> > +	/* Fill up revmap with slow path if no mapping found */
> > +	if (unlikely(revmap[hwirq] == NO_IRQ))
> > +		revmap[hwirq] = irq_find_mapping(host, hwirq);
> > +
> > +	return revmap[hwirq];
> > +}
> > +
> > +unsigned int irq_alloc_virt(struct irq_host *host,
> > +			    unsigned int count,
> > +			    unsigned int hint)
> > +{
> > +	unsigned long flags;
> > +	unsigned int i, j, found = NO_IRQ;
> > +
> > +	if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
> > +		return NO_IRQ;
> > +
> > +	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > +
> > +	/* Use hint for 1 interrupt if any */
> > +	if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
> > +	    hint < irq_virq_count && irq_map[hint].host == NULL) {
> > +		found = hint;
> > +		goto hint_found;
> > +	}
> > +
> > +	/* Look for count consecutive numbers in the allocatable
> > +	 * (non-legacy) space
> > +	 */
> > +	for (i = NUM_ISA_INTERRUPTS, j = 0; i < irq_virq_count; i++) {
> > +		if (irq_map[i].host != NULL)
> > +			j = 0;
> > +		else
> > +			j++;
> > +
> > +		if (j == count) {
> > +			found = i - count + 1;
> > +			break;
> > +		}
> > +	}
> > +	if (found == NO_IRQ) {
> > +		raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > +		return NO_IRQ;
> > +	}
> > + hint_found:
> > +	for (i = found; i < (found + count); i++) {
> > +		irq_map[i].hwirq = host->inval_irq;
> > +		smp_wmb();
> > +		irq_map[i].host = host;
> > +	}
> > +	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > +	return found;
> > +}
> > +
> > +void irq_free_virt(unsigned int virq, unsigned int count)
> > +{
> > +	unsigned long flags;
> > +	unsigned int i;
> > +
> > +	WARN_ON (virq < NUM_ISA_INTERRUPTS);
> > +	WARN_ON (count == 0 || (virq + count) > irq_virq_count);
> > +
> > +	raw_spin_lock_irqsave(&irq_big_lock, flags);
> > +	for (i = virq; i < (virq + count); i++) {
> > +		struct irq_host *host;
> > +
> > +		if (i < NUM_ISA_INTERRUPTS ||
> > +		    (virq + count) > irq_virq_count)
> > +			continue;
> > +
> > +		host = irq_map[i].host;
> > +		irq_map[i].hwirq = host->inval_irq;
> > +		smp_wmb();
> > +		irq_map[i].host = NULL;
> > +	}
> > +	raw_spin_unlock_irqrestore(&irq_big_lock, flags);
> > +}
> > +
> > +/* We need to create the radix trees late */
> > +static int irq_late_init(void)
> > +{
> > +	struct irq_host *h;
> > +	unsigned int i;
> > +
> > +	/*
> > +	 * No mutual exclusion with respect to accessors of the tree is
> > needed
> > +	 * here as the synchronization is done via the state variable
> > +	 * revmap_trees_allocated.
> > +	 */
> > +	list_for_each_entry(h, &irq_hosts, link) {
> > +		if (h->revmap_type == IRQ_HOST_MAP_TREE)
> > +			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL);
> > +	}
> > +
> > +	/*
> > +	 * Make sure the radix trees inits are visible before setting
> > +	 * the flag
> > +	 */
> > +	smp_wmb();
> > +	revmap_trees_allocated = 1;
> > +
> > +	/*
> > +	 * Insert the reverse mapping for those interrupts already
> > present
> > +	 * in irq_map[].
> > +	 */
> > +	mutex_lock(&revmap_trees_mutex);
> > +	for (i = 0; i < irq_virq_count; i++) {
> > +		if (irq_map[i].host &&
> > +		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE))
> > +
> radix_tree_insert(&irq_map[i].host->revmap_data.tree,
> > +					  irq_map[i].hwirq, &irq_map[i]);
> > +	}
> > +	mutex_unlock(&revmap_trees_mutex);
> > +
> > +	/*
> > +	 * Make sure the radix trees insertions are visible before
> > setting
> > +	 * the flag
> > +	 */
> > +	smp_wmb();
> > +	revmap_trees_allocated = 2;
> > +
> > +	return 0;
> > +}
> > +arch_initcall(irq_late_init);
> > +
> > +#ifdef CONFIG_VIRQ_DEBUG
> > +static int virq_debug_show(struct seq_file *m, void *private)
> > +{
> > +	unsigned long flags;
> > +	struct irq_desc *desc;
> > +	const char *p;
> > +	char none[] = "none";
> > +	int i;
> > +
> > +	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
> > +		      "chip name", "host name");
> > +
> > +	for (i = 1; i < nr_irqs; i++) {
> > +		desc = irq_to_desc(i);
> > +		if (!desc)
> > +			continue;
> > +
> > +		raw_spin_lock_irqsave(&desc->lock, flags);
> > +
> > +		if (desc->action && desc->action->handler) {
> > +			seq_printf(m, "%5d  ", i);
> > +			seq_printf(m, "0x%05lx  ", virq_to_hw(i));
> > +
> > +			if (desc->chip && desc->chip->name)
> > +				p = desc->chip->name;
> > +			else
> > +				p = none;
> > +			seq_printf(m, "%-15s  ", p);
> > +
> > +			if (irq_map[i].host && irq_map[i].host->of_node)
> > +				p = irq_map[i].host->of_node->full_name;
> > +			else
> > +				p = none;
> > +			seq_printf(m, "%s\n", p);
> > +		}
> > +
> > +		raw_spin_unlock_irqrestore(&desc->lock, flags);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int virq_debug_open(struct inode *inode, struct file *file)
> > +{
> > +	return single_open(file, virq_debug_show, inode->i_private);
> > +}
> > +
> > +static const struct file_operations virq_debug_fops = {
> > +	.open = virq_debug_open,
> > +	.read = seq_read,
> > +	.llseek = seq_lseek,
> > +	.release = single_release,
> > +};
> > +
> > +static int __init irq_debugfs_init(void)
> > +{
> > +	if (debugfs_create_file("virq_mapping", S_IRUGO,
> > powerpc_debugfs_root,
> > +				 NULL, &virq_debug_fops) == NULL)
> > +		return -ENOMEM;
> > +
> > +	return 0;
> > +}
> > +__initcall(irq_debugfs_init);
> > +#endif /* CONFIG_VIRQ_DEBUG */
> > +
> > 
> > _______________________________________________
> > devicetree-discuss mailing list
> > devicetree-discuss@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/devicetree-discuss
> 

^ permalink raw reply

* Re: [patch 1/1] powerpc: enable ARCH_DMA_ADDR_T_64BIT with ARCH_PHYS_ADDR_T_64BIT
From: Josh Boyer @ 2010-10-02  1:31 UTC (permalink / raw)
  To: akpm; +Cc: fujita.tomonori, linuxppc-dev
In-Reply-To: <201010012112.o91LCtEk020866@imap1.linux-foundation.org>

On Fri, Oct 1, 2010 at 5:12 PM,  <akpm@linux-foundation.org> wrote:
> From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
>
> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
>
> =A0arch/powerpc/Kconfig | =A0 =A03 +++
> =A01 file changed, 3 insertions(+)
>
> diff -puN arch/powerpc/Kconfig~powerpc-enable-arch_dma_addr_t_64bit-with-=
arch_phys_addr_t_64bit arch/powerpc/Kconfig
> --- a/arch/powerpc/Kconfig~powerpc-enable-arch_dma_addr_t_64bit-with-arch=
_phys_addr_t_64bit
> +++ a/arch/powerpc/Kconfig
> @@ -16,6 +16,9 @@ config WORD_SIZE
> =A0config ARCH_PHYS_ADDR_T_64BIT
> =A0 =A0 =A0 =A0def_bool PPC64 || PHYS_64BIT
>
> +config ARCH_DMA_ADDR_T_64BIT
> + =A0 =A0 =A0 def_bool ARCH_PHYS_ADDR_T_64BIT
> +

I seemed to have missed what this is about entirely.  Is there some
place I can look that describes what that is supposed to do?  The PPC
4xx boards set PHYS_ADDR_T_64BIT because the MMU uses 36 bit
addressing, but the CPU is only 32 bits.  I want to make sure this DMA
thing isn't going to cause problems.

josh

^ permalink raw reply

* Re: [PATCH v6 0/8] ptp: IEEE 1588 hardware clock support
From: M. Warner Losh @ 2010-10-02  1:44 UTC (permalink / raw)
  To: cl
  Cc: richardcochran, alan, peterz, johnstul, devicetree-discuss,
	linuxppc-dev, linux-kernel, christian, netdev, tglx, linux-api,
	giometti, davem, linux-arm-kernel, khc
In-Reply-To: <alpine.DEB.2.00.1009271035110.9258@router.home>

In message: <alpine.DEB.2.00.1009271035110.9258@router.home>
            Christoph Lameter <cl@linux.com> writes:
: On Thu, 23 Sep 2010, Christian Riesch wrote:
: 
: > > > It implies clock tuning in userspace for a potential sub microsecond
: > > > accurate clock. The clock accuracy will be limited by user space
: > > > latencies and noise. You wont be able to discipline the system clock
: > > > accurately.
: > >
: > > Noise matters, latency doesn't.
: >
: > Well put! That's why we need hardware support for PTP timestamping to reduce
: > the noise, but get along well with the clock servo that is steering the PHC in
: > user space.
: 
: Even if I buy into the catch phrase above: User space is subject to noise
: that the in kernel code is not. If you do the tuning over long intervals
: then it hopefully averages out but it still causes jitter effects that
: affects the degree of accuracy (or sync) that you can reach. And the noise
: varies with the load on the system.

Please see the earlier posts in this thread about why this doesn't
matter as much as you might think.  What matters is the measurements
(which are done in hardware and the results buffered), not the latency
in processing those messages through your servo.  This is due to the
fact that the errors that even long latencies introduce are
proportional to the change in fractional frequency[*] of the clock being
steered.  This change is usually on the order of a part per million.
Even with 10ms of latency would mean that you're introducing on the
order of sub-nanoseconds of phase error that will be measured in the
next cycle and steered out.

That's why latency doesn't matter.  Do you have other math to show
that it does?

Warner

[*] abs(1 - (clock_freq_old / clock_freq_new)) where clock_freq_old is
the old estimate of the clock and clock_freq_new is the new frequency
estimate of the clock.  Second to second, these change on the order of
a part per million or less...

^ permalink raw reply

* Re: [patch 1/1] powerpc: enable ARCH_DMA_ADDR_T_64BIT with ARCH_PHYS_ADDR_T_64BIT
From: Benjamin Herrenschmidt @ 2010-10-02  3:20 UTC (permalink / raw)
  To: Josh Boyer; +Cc: fujita.tomonori, linuxppc-dev, akpm
In-Reply-To: <AANLkTikrgu5LrYfveax_EavObThNw2M4ukVB3ka1y0Hj@mail.gmail.com>

On Fri, 2010-10-01 at 21:31 -0400, Josh Boyer wrote:
> > +config ARCH_DMA_ADDR_T_64BIT
> > +       def_bool ARCH_PHYS_ADDR_T_64BIT
> > +
> 
> I seemed to have missed what this is about entirely.  Is there some
> place I can look that describes what that is supposed to do?  The PPC
> 4xx boards set PHYS_ADDR_T_64BIT because the MMU uses 36 bit
> addressing, but the CPU is only 32 bits.  I want to make sure this DMA
> thing isn't going to cause problems. 

Yes, we need to test a bit. Our dma_addr_t has remained 32-bit so far
because despite the fact that we've had routinely to deal with >32-bit
physical addresses for MMIO, physical memory support has been
constrained afaik to 32-bit.

We might be better off keeping this as something individual platforms
can select...

Cheers,
Ben.
 

^ permalink raw reply

* [PATCH] usb: gadget: fsl_udc_core: Fix error path
From: Rahul Ruikar @ 2010-10-02  5:41 UTC (permalink / raw)
  To: Li Yang, David Brownell, Greg Kroah-Hartman, Dinh Nguyen,
	Anton Vorontsov
  Cc: Rahul Ruikar, linux-usb, linuxppc-dev, linux-kernel, Rahul Ruikar

call put_device() when device_register() fails.

Signed-off-by: Rahul Ruikar <raul.ruikar@gmail.com>
---
 drivers/usb/gadget/fsl_udc_core.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c
index 08a9a62..491fc7a 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -2340,8 +2340,10 @@ static int __init fsl_udc_probe(struct platform_device *pdev)
 	udc_controller->gadget.dev.release = fsl_udc_release;
 	udc_controller->gadget.dev.parent = &pdev->dev;
 	ret = device_register(&udc_controller->gadget.dev);
-	if (ret < 0)
+	if (ret < 0) {
+		put_device(&udc_controller->gadget.dev);
 		goto err_free_irq;
+	}
 
 	/* setup QH and epctrl for ep0 */
 	ep0_setup(udc_controller);
-- 
1.7.2.3

^ permalink raw reply related

* Re: Serial RapidIO Maintaintance read causes lock up
From: Bastiaan Nijkamp @ 2010-10-02  7:20 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <AANLkTikSaBR4vEikhGp0fsY3FnZCJQT2D-vw9=Kh4UHn@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 6487 bytes --]

Hi,

It seems i forgot to include the relevant TLB entries in U-Boot and the
Device Tree in the e-mail, so here they are:

The TLB entries in U-Boot:

/*
 * TLB 3: 256M Non-cacheable, guarded
 * 0xc0000000 256M Rapid IO MEM First half
 */
SET_TLB_ENTRY(1, CONFIG_SYS_RIO_MEM_VIRT, CONFIG_SYS_RIO_MEM_PHYS,
      MAS3_SX|MAS3_SW|MAS3_SR, MAS2_I|MAS2_G,
      0, 3, BOOKE_PAGESZ_256M, 1),

/*
 * TLB 4: 256M Non-cacheable, guarded
 * 0xd0000000 256M Rapid IO MEM Second half
 */
SET_TLB_ENTRY(1, CONFIG_SYS_RIO_MEM_VIRT + 0x10000000,
CONFIG_SYS_RIO_MEM_PHYS + 0x10000000,
      MAS3_SX|MAS3_SW|MAS3_SR, MAS2_I|MAS2_G,
      0, 4, BOOKE_PAGESZ_256M, 1),


And the device tree entry:

 rapidio0:rapidio@c0000 {
           #address-cells = <1>;
           #size-cells = <1>;
           compatible = "fsl,rapidio-delta";
           reg = <0xc0000 0x20000>;
           ranges = <0x0 0xc0000000 0x20000000>;
           interrupt-parent = <&mpic>;
           /* err_irq bell_outb_irq bell_inb_irq
                   msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq */
           interrupts = <0x30 0x2 0x31 0x2 0x32 0x2 0x35 0x2 0x36 0x2 0x37
0x2 0x38 0x2>;
  };

Regards,
Bastiaan Nijkamp

2010/10/2 Bastiaan Nijkamp <bastiaan.nijkamp@gmail.com>

> Hi,
>
> We are currently evaluating Serial RapidIO on two WindRiver SBC8548 boards
> that use a Freescale Powerquicc III processor (MPC8548E rev. 2). We are
> running U-Boot version 2010.09 as bootloader and are using kernel version
> 2.6.35.6 stable.
>
> We have consulted multiple resources to collect al the requirements for
> a successful RapidIO connection (LAW, TLB, Registers) and we seem to have
> configured everything correctly. However, as soon as the board that is
> configured as the host starts the enumeration process, the system locks up.
> It locks in such a manner that we cannot use a JTAG interface to read any of
> the registers.  We have also added a breakpoint just before the command that
> causes the lock up, to make sure the registers are correctly set at that
> point, and it seems they are.
>
> We have tripple checked everything that we could possibly think of and
> everything seems to be configured as required but the system keeps
> locking-up so there must be something that we are missing. I really hope
> that someone could point us in the right direction. The lock-up occurs when
> __fsl_read_rio_config is called by fsl_rio_config_read in fsl-rio.c.
>
> The LAW and TLB entries we have added to U-Boot are as follows:
>
> #define CONFIG_RIO 1
> #define CONFIG_SYS_RIO_MEM_VIRT 0xc0000000 /* base address */
> #define CONFIG_SYS_RIO_MEM_BUS 0xc0000000 /* base address */
> #define CONFIG_SYS_RIO_MEM_PHYS 0xc0000000
> #define CONFIG_SYS_RIO_MEM_SIZE 0x20000000 /* 512M */
>
> SET_LAW(CONFIG_SYS_RIO_MEM_PHYS, LAW_SIZE_512M, LAW_TRGT_IF_RIO),
>
> -------------
>
> Here is the kernel log:
>
> Using SBC8548 machine description
> Memory CAM mapping: 256 Mb, residual: 0Mb
> Linux version 2.6.35.6 (dl704@lxws006) (gcc version 4.1.2 (Wind River
> Linux Sourcery G++ 4.1-91)) #7 We
> d Sep 29 13:27:18 CEST 2010
> bootconsole [udbg0] enabled
> setup_arch: bootmem
> sbc8548_setup_arch()
> arch: exit
> Zone PFN ranges:
>  DMA      0x00000000 -> 0x00010000
>  Normal   empty
> Movable zone start PFN for each node
> early_node_map[1] active PFN ranges
>    0: 0x00000000 -> 0x00010000
> MMU: Allocated 1088 bytes of context maps for 255 contexts
> Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 65024
> Kernel command line: root=/dev/nfs rw nfsroot=192.168.100.21:/thales/target/rfs/sbc8548_wrlinux4
> ip=192
> .168.100.151:192.168.100.21:192.168.100.21:255.255.255.0:sbc8548_1:eth0:off
> console=ttyS0,115200 riohdid=1
> PID hash table entries: 1024 (order: 0, 4096 bytes)
> Dentry cache hash table entries: 32768 (order: 5, 131072 bytes)
> Inode-cache hash table entries: 16384 (order: 4, 65536 bytes)
> Memory: 256884k/262144k available (2712k kernel code, 5260k reserved, 112k
> data, 77k bss, 144k init)
> Kernel virtual memory layout:
>  * 0xfffdf000..0xfffff000  : fixmap
>  * 0xfc7f9000..0xfe000000  : early ioremap
>  * 0xd1000000..0xfc7f9000  : vmalloc & ioremap
> Hierarchical RCU implementation.
>        RCU-based detection of stalled CPUs is disabled.
>        Verbose stalled-CPUs detection is disabled.
> NR_IRQS:512 nr_irqs:512
> mpic: Setting up MPIC " OpenPIC  " version 1.2 at e0040000, max 1 CPUs
> mpic: ISU size: 80, shift: 7, mask: 7f
> mpic: Initializing for 80 sources
> clocksource: timebase mult[50cede6] shift[22] registered
> pid_max: default: 32768 minimum: 301
> Mount-cache hash table entries: 512
> NET: Registered protocol family 16
>
> PCI: Probing PCI hardware
> bio: create slab <bio-0> at 0
> vgaarb: loaded
> Switching to clocksource timebase
> NET: Registered protocol family 2
> IP route cache hash table entries: 2048 (order: 1, 8192 bytes)
> TCP established hash table entries: 8192 (order: 4, 65536 bytes)
> TCP bind hash table entries: 8192 (order: 3, 32768 bytes)
> TCP: Hash tables configured (established 8192 bind 8192)
> TCP reno registered
> UDP hash table entries: 256 (order: 0, 4096 bytes)
> UDP-Lite hash table entries: 256 (order: 0, 4096 bytes)
> NET: Registered protocol family 1
> RPC: Registered udp transport module.
> RPC: Registered tcp transport module.
> RPC: Registered tcp NFSv4.1 backchannel transport module.
> Setting up RapidIO peer-to-peer network /soc8548@e0000000/rapidio@c0000
> fsl-of-rio e00c0000.rapidio: Of-device full name /soc8548@e0000000
> /rapidio@c0000
> fsl-of-rio e00c0000.rapidio: Regs: [mem 0xe00c0000-0xe00dffff]
> fsl-of-rio e00c0000.rapidio: LAW start 0x00000000c0000000, size
> 0x0000000020000000.
> fsl-of-rio e00c0000.rapidio: pwirq: 48, bellirq: 50, txirq: 53, rxirq 54
> fsl-of-rio e00c0000.rapidio: DeviceID is 0x1
> fsl-of-rio e00c0000.rapidio: Configured as HOST
> fsl-of-rio e00c0000.rapidio: RapidIO PHY type: serial
> fsl-of-rio e00c0000.rapidio: Hardware port width: 4
> fsl-of-rio e00c0000.rapidio: Training connection status: Four-lane
> fsl-of-rio e00c0000.rapidio: RapidIO Common Transport System size: 256
> RIO: enumerate master port 0, RIO0 mport
> fsl_rio_config_read: index 0 destid 255 hopcount 0 offset 00000068 len 4
> fsl_rio_config_read: Passed IS_ALIGNED.
> fsl_rio_config_read: Passed 'out_be32_1'
> fsl_rio_config_read: Passed 'out_be32_2'
> fsl_rio_config_read: len is 4
> fsl_rio_config_read: about to trigger '__fsl_read_rio_config'
>
> Regards,
>  Bastiaan Nijkamp
>

[-- Attachment #2: Type: text/html, Size: 8642 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox