LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] powerpc/85xx: dts - add ranges property for SEC
From: Po Liu @ 2013-02-19  0:29 UTC (permalink / raw)
  To: linuxppc-dev, Kumar Gala; +Cc: Po Liu

This facilitates getting the physical address of the SEC node.

Signed-off-by: Liu po <po.liu@freescale.com>
---
 arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
index d4c9d5d..ffadcb5 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
@@ -36,6 +36,7 @@ crypto@30000 {
 	compatible = "fsl,sec-v4.4", "fsl,sec-v4.0";
 	#address-cells = <1>;
 	#size-cells = <1>;
+	ranges		 = <0x0 0x30000 0x10000>;
 	reg		 = <0x30000 0x10000>;
 	interrupts	 = <58 2 0 0>;
 
-- 
1.6.4

^ permalink raw reply related

* Re: [PATCH v5 00/45] CPU hotplug: stop_machine()-free CPU hotplug
From: Steven Rostedt @ 2013-02-18 19:53 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
	linux-arch, Russell King - ARM Linux, xiaoguangrong, wangyun,
	paulmck, nikunj, linux-pm, Rusty Russell, rjw, namhyung, tglx,
	linux-arm-kernel, netdev, oleg, sbw, Srivatsa S. Bhat, tj, akpm,
	linuxppc-dev
In-Reply-To: <CAKfTPtC4hbXXWO=_zsGYryM9hpWaGFLbiY+jiLf=koLa4xGJOQ@mail.gmail.com>

On Mon, 2013-02-18 at 17:50 +0100, Vincent Guittot wrote:

> yes for sure.
> The problem is more linked to cpuidle and function tracer.
> 
> cpu hotplug and function tracer work when cpuidle is disable.
> cpu hotplug and cpuidle works if i don't enable function tracer.
> my platform is dead as soon as I enable function tracer if cpuidle is
> enabled. It looks like some notrace are missing in my platform driver
> but we haven't completely fix the issue yet
> 

You can bisect to find out exactly what function is the problem:

 cat /debug/tracing/available_filter_functions > t
 
f(t) {
 num=`wc -l t`
 sed -ne "1,${num}p" t > t1
 let num=num+1
 sed -ne "${num},$p" t > t2

 cat t1 > /debug/tracing/set_ftrace_filter
 # note this may take a long time to finish

 echo function > /debug/tracing/current_tracer

 <failed? bisect f(t1), if not bisect f(t2)>
}

-- Steve

^ permalink raw reply

* Re: [PATCH v5 00/45] CPU hotplug: stop_machine()-free CPU hotplug
From: Steven Rostedt @ 2013-02-18 19:53 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
	linux-arch, Russell King - ARM Linux, xiaoguangrong, wangyun,
	paulmck, nikunj, linux-pm, Rusty Russell, rjw, namhyung, tglx,
	linux-arm-kernel, netdev, oleg, sbw, Srivatsa S. Bhat, tj, akpm,
	linuxppc-dev
In-Reply-To: <CAKfTPtC4hbXXWO=_zsGYryM9hpWaGFLbiY+jiLf=koLa4xGJOQ@mail.gmail.com>

On Mon, 2013-02-18 at 17:50 +0100, Vincent Guittot wrote:

> yes for sure.
> The problem is more linked to cpuidle and function tracer.
> 
> cpu hotplug and function tracer work when cpuidle is disable.
> cpu hotplug and cpuidle works if i don't enable function tracer.
> my platform is dead as soon as I enable function tracer if cpuidle is
> enabled. It looks like some notrace are missing in my platform driver
> but we haven't completely fix the issue yet
> 

You can bisect to find out exactly what function is the problem:

 cat /debug/tracing/available_filter_functions > t
 
f(t) {
 num=`wc -l t`
 sed -ne "1,${num}p" t > t1
 let num=num+1
 sed -ne "${num},$p" t > t2

 cat t1 > /debug/tracing/set_ftrace_filter
 # note this may take a long time to finish

 echo function > /debug/tracing/current_tracer

 <failed? bisect f(t1), if not bisect f(t2)>
}

^ permalink raw reply

* Re: [PATCH][UPSTEAM] powerpc/mpic: add irq_set_wake support
From: Kumar Gala @ 2013-02-18 19:43 UTC (permalink / raw)
  To: Wang Dongsheng; +Cc: linuxppc-dev
In-Reply-To: <1359601823-9861-1-git-send-email-dongsheng.wang@freescale.com>


On Jan 30, 2013, at 9:10 PM, Wang Dongsheng wrote:

> Add irq_set_wake support. Just add IRQF_NO_SUSPEND to =
desc->action->flag.
> So the wake up interrupt will not be disable in suspend_device_irqs.
>=20
> Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> ---
> arch/powerpc/sysdev/mpic.c |   15 +++++++++++++++
> 1 files changed, 15 insertions(+), 0 deletions(-)

Why are we doing this globally for all interrupts?  Don't we only have =
some specific interrupts that wake us up?

Also, I'm guessing the wake behavior for interrupts is FSL specific so =
should not apply to ALL users of MPIC.

- k

>=20
> diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
> index 9c6e535..2ed0220 100644
> --- a/arch/powerpc/sysdev/mpic.c
> +++ b/arch/powerpc/sysdev/mpic.c
> @@ -920,6 +920,18 @@ int mpic_set_irq_type(struct irq_data *d, =
unsigned int flow_type)
> 	return IRQ_SET_MASK_OK_NOCOPY;
> }
>=20
> +static int mpic_irq_set_wake(struct irq_data *d, unsigned int on)
> +{
> +	struct irq_desc *desc =3D container_of(d, struct irq_desc, =
irq_data);
> +
> +	if (on)
> +		desc->action->flags |=3D IRQF_NO_SUSPEND;
> +	else
> +		desc->action->flags &=3D ~IRQF_NO_SUSPEND;
> +
> +	return 0;
> +}
> +
> void mpic_set_vector(unsigned int virq, unsigned int vector)
> {
> 	struct mpic *mpic =3D mpic_from_irq(virq);
> @@ -957,6 +969,7 @@ static struct irq_chip mpic_irq_chip =3D {
> 	.irq_unmask	=3D mpic_unmask_irq,
> 	.irq_eoi	=3D mpic_end_irq,
> 	.irq_set_type	=3D mpic_set_irq_type,
> +	.irq_set_wake	=3D mpic_irq_set_wake,
> };
>=20
> #ifdef CONFIG_SMP
> @@ -971,6 +984,7 @@ static struct irq_chip mpic_tm_chip =3D {
> 	.irq_mask	=3D mpic_mask_tm,
> 	.irq_unmask	=3D mpic_unmask_tm,
> 	.irq_eoi	=3D mpic_end_irq,
> +	.irq_set_wake	=3D mpic_irq_set_wake,
> };
>=20
> #ifdef CONFIG_MPIC_U3_HT_IRQS
> @@ -981,6 +995,7 @@ static struct irq_chip mpic_irq_ht_chip =3D {
> 	.irq_unmask	=3D mpic_unmask_ht_irq,
> 	.irq_eoi	=3D mpic_end_ht_irq,
> 	.irq_set_type	=3D mpic_set_irq_type,
> +	.irq_set_wake	=3D mpic_irq_set_wake,
> };
> #endif /* CONFIG_MPIC_U3_HT_IRQS */
>=20
> --
> 1.7.5.1
>=20
>=20
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH v6 08/46] CPU hotplug: Provide APIs to prevent CPU offline from atomic context
From: Srivatsa S. Bhat @ 2013-02-18 18:50 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <CANN689FJPLfz06MFRJ9T+BuToejAzjYP_yrw_H4iGi1Kb3p2-g@mail.gmail.com>

On 02/18/2013 10:51 PM, Michel Lespinasse wrote:
> On Tue, Feb 19, 2013 at 12:43 AM, Srivatsa S. Bhat
> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>> On 02/18/2013 09:53 PM, Michel Lespinasse wrote:
>>> I am wondering though, if you could take care of recursive uses in
>>> get/put_online_cpus_atomic() instead of doing it as a property of your
>>> rwlock:
>>>
>>> get_online_cpus_atomic()
>>> {
>>>     unsigned long flags;
>>>     local_irq_save(flags);
>>>     if (this_cpu_inc_return(hotplug_recusion_count) == 1)
>>>         percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
>>>     local_irq_restore(flags);
>>> }
>>>
>>> Once again, the idea there is to avoid baking the reader side
>>> recursive properties into your rwlock itself, so that it won't be
>>> impossible to implement reader/writer fairness into your rwlock in the
>>> future (which may be not be very important for the hotplug use, but
>>> could be when other uses get introduced).
>>
>> Hmm, your proposal above looks good to me, at first glance.
>> (Sorry, I had mistaken your earlier mails to mean that you were against
>> recursive reader-side, while you actually meant that you didn't like
>> implementing the recursive reader-side logic using the recursive property
>> of rwlocks).
> 
> To be honest, I was replying as I went through the series, so I hadn't
> digested your hotplug use case yet :)
> 
> But yes - I don't like having the rwlock itself be recursive, but I do
> understand that you have a legitimate requirement for
> get_online_cpus_atomic() to be recursive. This IMO points to the
> direction I suggested, of explicitly handling the recusion in
> get_online_cpus_atomic() so that the underlying rwlock doesn't have to
> support recursive reader side itself.
> 
> (And this would work for the idea of making writers own the reader
> side as well - you can do it with the hotplug_recursion_count instead
> of with the underlying rwlock).
> 
>> While your idea above looks good, it might introduce more complexity
>> in the unlock path, since this would allow nesting of heterogeneous readers
>> (ie., if hotplug_recursion_count == 1, you don't know whether you need to
>> simply decrement the counter or unlock the rwlock as well).
> 
> Well, I think the idea doesn't make the underlying rwlock more
> complex, since you could in principle keep your existing
> percpu_read_lock_irqsafe implementation as is and just remove the
> recursive behavior from its documentation.
> 
> Now ideally if we're adding a bit of complexity in
> get_online_cpus_atomic() it'd be nice if we could remove some from
> percpu_read_lock_irqsafe, but I haven't thought about that deeply
> either. I think you'd still want to have the equivalent of a percpu
> reader_refcnt, except it could now be a bool instead of an int, and
> percpu_read_lock_irqsafe would still set it to back to 0/false after
> acquiring the global read side if a writer is signaled. Basically your
> existing percpu_read_lock_irqsafe code should still work, and we could
> remove just the parts that were only there to deal with the recursive
> property.
> 

But, the whole intention behind removing the parts depending on the
recursive property of rwlocks would be to make it easier to make rwlocks
fair (going forward) right? Then, that won't work for CPU hotplug, because,
just like we have a legitimate reason to have recursive
get_online_cpus_atomic(), we also have a legitimate reason to have
unfairness in locking (i.e., for deadlock-safety). So we simply can't
afford to make the locking fair - we'll end up in too many deadlock
possibilities, as hinted in the changelog of patch 1.

(Remember, we are replacing preempt_disable(), which had absolutely no
special nesting rules or locking implications. That is why we are forced
to provide maximum locking flexibility and safety against new/previously
non-existent deadlocks, in the new synchronization scheme).

So the only long-term solution I can think of is to decouple
percpu-rwlocks and rwlock_t (like what Tejun suggested) by implementing
our own unfair locking scheme inside. What do you think?

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Srivatsa S. Bhat @ 2013-02-18 18:14 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <CANN689FSxOz+0Cu7EG_yKD8ZE1OpT4kyT+ybLfXSqaifodJRpw@mail.gmail.com>

On 02/18/2013 11:37 PM, Michel Lespinasse wrote:
> On Tue, Feb 19, 2013 at 1:56 AM, Srivatsa S. Bhat
> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>> On 02/18/2013 09:51 PM, Srivatsa S. Bhat wrote:
>>> On 02/18/2013 09:15 PM, Michel Lespinasse wrote:
>>>> I don't see anything preventing a race with the corresponding code in
>>>> percpu_write_unlock() that sets writer_signal back to false. Did I
>>>> miss something here ? It seems to me we don't have any guarantee that
>>>> all writer signals will be set to true at the end of the loop...
>>>
>>> Ah, thanks for pointing that out! IIRC Oleg had pointed this issue in the last
>>> version, but back then, I hadn't fully understood what he meant. Your
>>> explanation made it clear. I'll work on fixing this.
>>
>> We can fix this by using the simple patch (untested) shown below.
>> The alternative would be to acquire the rwlock for write, update the
>> ->writer_signal values, release the lock, wait for readers to switch,
>> again acquire the rwlock for write with interrupts disabled etc... which
>> makes it kinda messy, IMHO. So I prefer the simple version shown below.
> 
> Looks good.
> 
> Another alternative would be to make writer_signal an atomic integer
> instead of a bool. That way writers can increment it before locking
> and decrement it while unlocking.
> 

Yep, that would also do. But the spinlock version looks simpler - no need
to check if the atomic counter is non-zero, no need to explicitly spin in
a tight-loop etc.

> To reduce the number of atomic ops during writer lock/unlock, the
> writer_signal could also be a global read_mostly variable (I don't see
> any downsides to that compared to having it percpu - or is it because
> you wanted all the fastpath state to be in one single cacheline ?)
> 

Yes, we (Oleg and I) debated for a while about global vs percpu, and then
finally decided to go with percpu to have cache benefits.

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Michel Lespinasse @ 2013-02-18 18:07 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, namhyung, mingo,
	linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rostedt, rjw, vincent.guittot, tglx,
	linux-arm-kernel, netdev, oleg, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <51226B46.9080707@linux.vnet.ibm.com>

On Tue, Feb 19, 2013 at 1:56 AM, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> On 02/18/2013 09:51 PM, Srivatsa S. Bhat wrote:
>> On 02/18/2013 09:15 PM, Michel Lespinasse wrote:
>>> I don't see anything preventing a race with the corresponding code in
>>> percpu_write_unlock() that sets writer_signal back to false. Did I
>>> miss something here ? It seems to me we don't have any guarantee that
>>> all writer signals will be set to true at the end of the loop...
>>
>> Ah, thanks for pointing that out! IIRC Oleg had pointed this issue in the last
>> version, but back then, I hadn't fully understood what he meant. Your
>> explanation made it clear. I'll work on fixing this.
>
> We can fix this by using the simple patch (untested) shown below.
> The alternative would be to acquire the rwlock for write, update the
> ->writer_signal values, release the lock, wait for readers to switch,
> again acquire the rwlock for write with interrupts disabled etc... which
> makes it kinda messy, IMHO. So I prefer the simple version shown below.

Looks good.

Another alternative would be to make writer_signal an atomic integer
instead of a bool. That way writers can increment it before locking
and decrement it while unlocking.

To reduce the number of atomic ops during writer lock/unlock, the
writer_signal could also be a global read_mostly variable (I don't see
any downsides to that compared to having it percpu - or is it because
you wanted all the fastpath state to be in one single cacheline ?)

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Srivatsa S. Bhat @ 2013-02-18 17:56 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: linux-doc, peterz, fweisbec, linux-kernel, namhyung, mingo,
	linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rostedt, rjw, vincent.guittot, tglx,
	linux-arm-kernel, netdev, oleg, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <5122551E.1080703@linux.vnet.ibm.com>

On 02/18/2013 09:51 PM, Srivatsa S. Bhat wrote:
> Hi Michel,
> 
> On 02/18/2013 09:15 PM, Michel Lespinasse wrote:
>> Hi Srivasta,
>>
>> I admit not having followed in detail the threads about the previous
>> iteration, so some of my comments may have been discussed already
>> before - apologies if that is the case.
>>
>> On Mon, Feb 18, 2013 at 8:38 PM, Srivatsa S. Bhat
>> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>>> Reader-writer locks and per-cpu counters are recursive, so they can be
>>> used in a nested fashion in the reader-path, which makes per-CPU rwlocks also
>>> recursive. Also, this design of switching the synchronization scheme ensures
>>> that you can safely nest and use these locks in a very flexible manner.
[...]
>>>  void percpu_write_lock(struct percpu_rwlock *pcpu_rwlock)
>>>  {
>>> +       unsigned int cpu;
>>> +
>>> +       /*
>>> +        * Tell all readers that a writer is becoming active, so that they
>>> +        * start switching over to the global rwlock.
>>> +        */
>>> +       for_each_possible_cpu(cpu)
>>> +               per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = true;
>>
>> I don't see anything preventing a race with the corresponding code in
>> percpu_write_unlock() that sets writer_signal back to false. Did I
>> miss something here ? It seems to me we don't have any guarantee that
>> all writer signals will be set to true at the end of the loop...
>>
> 
> Ah, thanks for pointing that out! IIRC Oleg had pointed this issue in the last
> version, but back then, I hadn't fully understood what he meant. Your
> explanation made it clear. I'll work on fixing this.
> 

We can fix this by using the simple patch (untested) shown below.
The alternative would be to acquire the rwlock for write, update the
->writer_signal values, release the lock, wait for readers to switch,
again acquire the rwlock for write with interrupts disabled etc... which
makes it kinda messy, IMHO. So I prefer the simple version shown below.


diff --git a/lib/percpu-rwlock.c b/lib/percpu-rwlock.c
index bf95e40..64ccd3f 100644
--- a/lib/percpu-rwlock.c
+++ b/lib/percpu-rwlock.c
@@ -50,6 +50,12 @@
 	(__this_cpu_read((pcpu_rwlock)->rw_state->writer_signal))
 
 
+/*
+ * Spinlock to synchronize access to the writer's data-structures
+ * (->writer_signal) from multiple writers.
+ */
+static DEFINE_SPINLOCK(writer_side_lock);
+
 int __percpu_init_rwlock(struct percpu_rwlock *pcpu_rwlock,
 			 const char *name, struct lock_class_key *rwlock_key)
 {
@@ -191,6 +197,8 @@ void percpu_write_lock_irqsave(struct percpu_rwlock *pcpu_rwlock,
 {
 	unsigned int cpu;
 
+	spin_lock(&writer_side_lock);
+
 	/*
 	 * Tell all readers that a writer is becoming active, so that they
 	 * start switching over to the global rwlock.
@@ -252,5 +260,6 @@ void percpu_write_unlock_irqrestore(struct percpu_rwlock *pcpu_rwlock,
 		per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = false;
 
 	write_unlock_irqrestore(&pcpu_rwlock->global_rwlock, *flags);
+	spin_unlock(&writer_side_lock);
 }

^ permalink raw reply related

* Re: [PATCH v6 08/46] CPU hotplug: Provide APIs to prevent CPU offline from atomic context
From: Michel Lespinasse @ 2013-02-18 17:21 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <51225A36.40600@linux.vnet.ibm.com>

On Tue, Feb 19, 2013 at 12:43 AM, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> On 02/18/2013 09:53 PM, Michel Lespinasse wrote:
>> I am wondering though, if you could take care of recursive uses in
>> get/put_online_cpus_atomic() instead of doing it as a property of your
>> rwlock:
>>
>> get_online_cpus_atomic()
>> {
>>     unsigned long flags;
>>     local_irq_save(flags);
>>     if (this_cpu_inc_return(hotplug_recusion_count) == 1)
>>         percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
>>     local_irq_restore(flags);
>> }
>>
>> Once again, the idea there is to avoid baking the reader side
>> recursive properties into your rwlock itself, so that it won't be
>> impossible to implement reader/writer fairness into your rwlock in the
>> future (which may be not be very important for the hotplug use, but
>> could be when other uses get introduced).
>
> Hmm, your proposal above looks good to me, at first glance.
> (Sorry, I had mistaken your earlier mails to mean that you were against
> recursive reader-side, while you actually meant that you didn't like
> implementing the recursive reader-side logic using the recursive property
> of rwlocks).

To be honest, I was replying as I went through the series, so I hadn't
digested your hotplug use case yet :)

But yes - I don't like having the rwlock itself be recursive, but I do
understand that you have a legitimate requirement for
get_online_cpus_atomic() to be recursive. This IMO points to the
direction I suggested, of explicitly handling the recusion in
get_online_cpus_atomic() so that the underlying rwlock doesn't have to
support recursive reader side itself.

(And this would work for the idea of making writers own the reader
side as well - you can do it with the hotplug_recursion_count instead
of with the underlying rwlock).

> While your idea above looks good, it might introduce more complexity
> in the unlock path, since this would allow nesting of heterogeneous readers
> (ie., if hotplug_recursion_count == 1, you don't know whether you need to
> simply decrement the counter or unlock the rwlock as well).

Well, I think the idea doesn't make the underlying rwlock more
complex, since you could in principle keep your existing
percpu_read_lock_irqsafe implementation as is and just remove the
recursive behavior from its documentation.

Now ideally if we're adding a bit of complexity in
get_online_cpus_atomic() it'd be nice if we could remove some from
percpu_read_lock_irqsafe, but I haven't thought about that deeply
either. I think you'd still want to have the equivalent of a percpu
reader_refcnt, except it could now be a bool instead of an int, and
percpu_read_lock_irqsafe would still set it to back to 0/false after
acquiring the global read side if a writer is signaled. Basically your
existing percpu_read_lock_irqsafe code should still work, and we could
remove just the parts that were only there to deal with the recursive
property.

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply

* Accumulating PPC_FEATURE_ARCH bits
From: Richard Henderson @ 2013-02-18 17:06 UTC (permalink / raw)
  To: linuxppc-dev

If one wants to test the PPC_FEATURE_ARCH_ISA_2_0[56] bits, one has to 
be careful that if 2_06 is set, 2_05 won't be.  This seems illogical to 
me: given that 2.06 includes all of the 2.05 instructions, it would make 
most sense if both bits were set in the hwcap.

Now, obviously I have to code around existing practice, but going 
forward this isn't tennable.  When a Power 2.07 ISA is released, I'd 
rather not have existing programs degrade because suddenly the 2_06 bit 
is no longer set.

Thanks,

r~

^ permalink raw reply

* Re: [PATCH v5 00/45] CPU hotplug: stop_machine()-free CPU hotplug
From: Vincent Guittot @ 2013-02-18 16:50 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
	linux-arch, Russell King - ARM Linux, xiaoguangrong, wangyun,
	paulmck, nikunj, linux-pm, Rusty Russell, rjw, namhyung, tglx,
	linux-arm-kernel, netdev, oleg, sbw, Srivatsa S. Bhat, tj, akpm,
	linuxppc-dev
In-Reply-To: <1361201422.23152.154.camel@gandalf.local.home>

On 18 February 2013 16:30, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Mon, 2013-02-18 at 11:58 +0100, Vincent Guittot wrote:
>
>> My tests have been done without cpuidle because i have some issues
>> with function tracer and cpuidle
>>
>> But the cpu hotplug and cpuidle work well when I run the tests without
>> enabling the function tracer
>>
>
> I know suspend and resume has issues with function tracing (because it
> makes things like calling smp_processor_id() crash the system), but I'm
> unaware of issues with hotplug itself. Could be some of the same issues.
>
> Can you give me more details, I'll try to investigate it.

yes for sure.
The problem is more linked to cpuidle and function tracer.

cpu hotplug and function tracer work when cpuidle is disable.
cpu hotplug and cpuidle works if i don't enable function tracer.
my platform is dead as soon as I enable function tracer if cpuidle is
enabled. It looks like some notrace are missing in my platform driver
but we haven't completely fix the issue yet

Vincent

>
> Thanks,
>
> -- Steve
>
>

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Srivatsa S. Bhat @ 2013-02-18 16:46 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-doc, peterz, fweisbec, linux-kernel, Michel Lespinasse,
	mingo, linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rjw, namhyung, tglx, linux-arm-kernel, netdev,
	oleg, vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <1361205087.23152.159.camel@gandalf.local.home>

On 02/18/2013 10:01 PM, Steven Rostedt wrote:
> On Mon, 2013-02-18 at 21:51 +0530, Srivatsa S. Bhat wrote:
>> Hi Michel,
> 
>> Yes.. I don't think we can avoid that. Moreover, since we _want_ unfair
>> reader/writer semantics to allow flexible locking rules and guarantee
>> deadlock-safety, having a recursive reader side is not even an issue, IMHO.
> 
> Recursive unfair reader lock may guarantee deadlock-safety, but
> remember, it adds a higher probability of live-locking the write_lock.
> Which is another argument to keep this separate to cpu hotplug only.
> 

True.
 
Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH v6 08/46] CPU hotplug: Provide APIs to prevent CPU offline from atomic context
From: Srivatsa S. Bhat @ 2013-02-18 16:43 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <CANN689HhVxDv+3Bn9UAxOMzj0egTvVp_c7oAmQ2nnQ8Xjn_aMA@mail.gmail.com>

On 02/18/2013 09:53 PM, Michel Lespinasse wrote:
> On Mon, Feb 18, 2013 at 8:39 PM, Srivatsa S. Bhat
> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>> Some important design requirements and considerations:
>> -----------------------------------------------------
[...]
>> +/*
>> + * Invoked by atomic hotplug reader (a task which wants to prevent
>> + * CPU offline, but which can't afford to sleep), to prevent CPUs from
>> + * going offline. So, you can call this function from atomic contexts
>> + * (including interrupt handlers).
>> + *
>> + * Note: This does NOT prevent CPUs from coming online! It only prevents
>> + * CPUs from going offline.
>> + *
>> + * You can call this function recursively.
>> + *
>> + * Returns with preemption disabled (but interrupts remain as they are;
>> + * they are not disabled).
>> + */
>> +void get_online_cpus_atomic(void)
>> +{
>> +       percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
>> +}
>> +EXPORT_SYMBOL_GPL(get_online_cpus_atomic);
>> +
>> +void put_online_cpus_atomic(void)
>> +{
>> +       percpu_read_unlock_irqsafe(&hotplug_pcpu_rwlock);
>> +}
>> +EXPORT_SYMBOL_GPL(put_online_cpus_atomic);
> 
> So, you made it clear why you want a recursive read side here.
> 
> I am wondering though, if you could take care of recursive uses in
> get/put_online_cpus_atomic() instead of doing it as a property of your
> rwlock:
> 
> get_online_cpus_atomic()
> {
>     unsigned long flags;
>     local_irq_save(flags);
>     if (this_cpu_inc_return(hotplug_recusion_count) == 1)
>         percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
>     local_irq_restore(flags);
> }
> 
> Once again, the idea there is to avoid baking the reader side
> recursive properties into your rwlock itself, so that it won't be
> impossible to implement reader/writer fairness into your rwlock in the
> future (which may be not be very important for the hotplug use, but
> could be when other uses get introduced).
> 

Hmm, your proposal above looks good to me, at first glance.
(Sorry, I had mistaken your earlier mails to mean that you were against
recursive reader-side, while you actually meant that you didn't like
implementing the recursive reader-side logic using the recursive property
of rwlocks).

While your idea above looks good, it might introduce more complexity
in the unlock path, since this would allow nesting of heterogeneous readers
(ie., if hotplug_recursion_count == 1, you don't know whether you need to
simply decrement the counter or unlock the rwlock as well).

But I'll give this some more thought to see if we can implement this
without making it too complex. Thank you!

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH v6 07/46] percpu_rwlock: Allow writers to be readers, and add lockdep annotations
From: Srivatsa S. Bhat @ 2013-02-18 16:31 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: linux-doc, peterz, fweisbec, linux-kernel, namhyung, mingo,
	linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rostedt, rjw, vincent.guittot, tglx,
	linux-arm-kernel, netdev, oleg, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <CANN689Ec4g7jMapv8ChAEom+gEDQ8RDVLa07CKwJ==YO+MUUrA@mail.gmail.com>

On 02/18/2013 09:21 PM, Michel Lespinasse wrote:
> On Mon, Feb 18, 2013 at 8:39 PM, Srivatsa S. Bhat
> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>> @@ -200,6 +217,16 @@ void percpu_write_lock_irqsave(struct percpu_rwlock *pcpu_rwlock,
>>
>>         smp_mb(); /* Complete the wait-for-readers, before taking the lock */
>>         write_lock_irqsave(&pcpu_rwlock->global_rwlock, *flags);
>> +
>> +       /*
>> +        * It is desirable to allow the writer to acquire the percpu-rwlock
>> +        * for read (if necessary), without deadlocking or getting complaints
>> +        * from lockdep. To achieve that, just increment the reader_refcnt of
>> +        * this CPU - that way, any attempt by the writer to acquire the
>> +        * percpu-rwlock for read, will get treated as a case of nested percpu
>> +        * reader, which is safe, from a locking perspective.
>> +        */
>> +       this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);
> 
> I find this quite disgusting, but once again this may be because I
> don't like unfair recursive rwlocks.
> 

:-)

> In my opinion, the alternative of explicitly not taking the read lock
> when one already has the write lock sounds *much* nicer.

I don't seem to recall any strong reasons to do it this way, so I don't have
any strong opinions on doing it this way. But one of the things to note is that,
in the CPU Hotplug case, the readers are *way* more hotter than the writer.
So avoiding extra checks/'if' conditions/memory barriers in the reader-side
is very welcome. (If we slow down the read-side, we get a performance hit
even when *not* doing hotplug!). Considering this, the logic used in this
patchset seems better, IMHO.

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Steven Rostedt @ 2013-02-18 16:31 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, Michel Lespinasse,
	mingo, linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rjw, namhyung, tglx, linux-arm-kernel, netdev,
	oleg, vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <5122551E.1080703@linux.vnet.ibm.com>

On Mon, 2013-02-18 at 21:51 +0530, Srivatsa S. Bhat wrote:
> Hi Michel,

> Yes.. I don't think we can avoid that. Moreover, since we _want_ unfair
> reader/writer semantics to allow flexible locking rules and guarantee
> deadlock-safety, having a recursive reader side is not even an issue, IMHO.

Recursive unfair reader lock may guarantee deadlock-safety, but
remember, it adds a higher probability of live-locking the write_lock.
Which is another argument to keep this separate to cpu hotplug only.

-- Steve

^ permalink raw reply

* Re: [PATCH v6 08/46] CPU hotplug: Provide APIs to prevent CPU offline from atomic context
From: Michel Lespinasse @ 2013-02-18 16:23 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <20130218123920.26245.56709.stgit@srivatsabhat.in.ibm.com>

On Mon, Feb 18, 2013 at 8:39 PM, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> Some important design requirements and considerations:
> -----------------------------------------------------
>
> 1. Scalable synchronization at the reader-side, especially in the fast-path
>
>    Any synchronization at the atomic hotplug readers side must be highly
>    scalable - avoid global single-holder locks/counters etc. Because, these
>    paths currently use the extremely fast preempt_disable(); our replacement
>    to preempt_disable() should not become ridiculously costly and also should
>    not serialize the readers among themselves needlessly.
>
>    At a minimum, the new APIs must be extremely fast at the reader side
>    atleast in the fast-path, when no CPU offline writers are active.
>
> 2. preempt_disable() was recursive. The replacement should also be recursive.
>
> 3. No (new) lock-ordering restrictions
>
>    preempt_disable() was super-flexible. It didn't impose any ordering
>    restrictions or rules for nesting. Our replacement should also be equally
>    flexible and usable.
>
> 4. No deadlock possibilities
>
>    Regular per-cpu locking is not the way to go if we want to have relaxed
>    rules for lock-ordering. Because, we can end up in circular-locking
>    dependencies as explained in https://lkml.org/lkml/2012/12/6/290
>
>    So, avoid the usual per-cpu locking schemes (per-cpu locks/per-cpu atomic
>    counters with spin-on-contention etc) as much as possible, to avoid
>    numerous deadlock possibilities from creeping in.
>
>
> Implementation of the design:
> ----------------------------
>
> We use per-CPU reader-writer locks for synchronization because:
>
>   a. They are quite fast and scalable in the fast-path (when no writers are
>      active), since they use fast per-cpu counters in those paths.
>
>   b. They are recursive at the reader side.
>
>   c. They provide a good amount of safety against deadlocks; they don't
>      spring new deadlock possibilities on us from out of nowhere. As a
>      result, they have relaxed locking rules and are quite flexible, and
>      thus are best suited for replacing usages of preempt_disable() or
>      local_irq_disable() at the reader side.
>
> Together, these satisfy all the requirements mentioned above.

Thanks for this detailed design explanation.

> +/*
> + * Invoked by atomic hotplug reader (a task which wants to prevent
> + * CPU offline, but which can't afford to sleep), to prevent CPUs from
> + * going offline. So, you can call this function from atomic contexts
> + * (including interrupt handlers).
> + *
> + * Note: This does NOT prevent CPUs from coming online! It only prevents
> + * CPUs from going offline.
> + *
> + * You can call this function recursively.
> + *
> + * Returns with preemption disabled (but interrupts remain as they are;
> + * they are not disabled).
> + */
> +void get_online_cpus_atomic(void)
> +{
> +       percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
> +}
> +EXPORT_SYMBOL_GPL(get_online_cpus_atomic);
> +
> +void put_online_cpus_atomic(void)
> +{
> +       percpu_read_unlock_irqsafe(&hotplug_pcpu_rwlock);
> +}
> +EXPORT_SYMBOL_GPL(put_online_cpus_atomic);

So, you made it clear why you want a recursive read side here.

I am wondering though, if you could take care of recursive uses in
get/put_online_cpus_atomic() instead of doing it as a property of your
rwlock:

get_online_cpus_atomic()
{
    unsigned long flags;
    local_irq_save(flags);
    if (this_cpu_inc_return(hotplug_recusion_count) == 1)
        percpu_read_lock_irqsafe(&hotplug_pcpu_rwlock);
    local_irq_restore(flags);
}

Once again, the idea there is to avoid baking the reader side
recursive properties into your rwlock itself, so that it won't be
impossible to implement reader/writer fairness into your rwlock in the
future (which may be not be very important for the hotplug use, but
could be when other uses get introduced).

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Srivatsa S. Bhat @ 2013-02-18 16:21 UTC (permalink / raw)
  To: Michel Lespinasse
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <CANN689F9S7c1M8+cEpz3tsxGF34+NTRBLvxgPUOtbvav5u+RRA@mail.gmail.com>

Hi Michel,

On 02/18/2013 09:15 PM, Michel Lespinasse wrote:
> Hi Srivasta,
> 
> I admit not having followed in detail the threads about the previous
> iteration, so some of my comments may have been discussed already
> before - apologies if that is the case.
> 
> On Mon, Feb 18, 2013 at 8:38 PM, Srivatsa S. Bhat
> <srivatsa.bhat@linux.vnet.ibm.com> wrote:
>> Reader-writer locks and per-cpu counters are recursive, so they can be
>> used in a nested fashion in the reader-path, which makes per-CPU rwlocks also
>> recursive. Also, this design of switching the synchronization scheme ensures
>> that you can safely nest and use these locks in a very flexible manner.
> 
> I like the general idea of switching between per-cpu and global
> rwlocks as needed; however I dislike unfair locks, and especially
> unfair recursive rwlocks.
> 
> If you look at rwlock_t, the main reason we haven't been able to
> implement reader/writer fairness there is because tasklist_lock makes
> use of the recursive nature of the rwlock_t read side. I'm worried
> about introducing more lock usages that would make use of the same
> property for your proposed lock.
> 
> I am fine with your proposal not implementing reader/writer fairness
> from day 1, but I am worried about your proposal having a recursive
> reader side. Or, to put it another way: if your proposal didn't have a
> recursive reader side, and rwlock_t could somehow be changed to
> implement reader/writer fairness, then this property could
> automatically propagate into your proposed rwlock; but if anyone makes
> use of the recursive nature of your proposal then implementing
> reader/writer fairness later won't be as easy.
>

Actually, we don't want reader/writer fairness in this particular case.
We want deadlock safety - and in this particular case, this is guaranteed
by the unfair nature of rwlocks today.

I understand that you want to make rwlocks fair. So, I am thinking of
going ahead with Tejun's proposal - implementing our own unfair locking
scheme inside percpu-rwlocks using atomic ops or something like that, and
being completely independent of rwlock_t. That way, you can easily go
ahead with making rwlocks fair without fear of breaking CPU hotplug.
However I would much prefer making that change to percpu-rwlocks as a
separate patchset, after this patchset goes in, so that we can also see
how well this unfair logic performs in practice.

And regarding recursive reader side,... the way I see it, having a
recursive reader side is a primary requirement in this case. The reason is
that the existing reader side (with stop_machine) uses preempt_disable(),
which is recursive. So our replacement also has to be recursive.
 
> I see that the very next change in this series is talking about
> acquiring the read side from interrupts, so it does look like you're
> planning to make use of the recursive nature of the read side.

Yes.. I don't think we can avoid that. Moreover, since we _want_ unfair
reader/writer semantics to allow flexible locking rules and guarantee
deadlock-safety, having a recursive reader side is not even an issue, IMHO.

> I kinda
> wish you didn't, as this is exactly replicating the design of
> tasklist_lock which is IMO problematic. Your prior proposal of
> disabling interrupts during the read side had other disadvantages, but
> I think it was nice that it didn't rely on having a recursive read
> side.
> 

We can have readers from non-interrupt contexts too, which depend on the
recursive property...

>> +#define reader_yet_to_switch(pcpu_rwlock, cpu)                             \
>> +       (ACCESS_ONCE(per_cpu_ptr((pcpu_rwlock)->rw_state, cpu)->reader_refcnt))
>> +
>> +#define reader_percpu_nesting_depth(pcpu_rwlock)                 \
>> +       (__this_cpu_read((pcpu_rwlock)->rw_state->reader_refcnt))
>> +
>> +#define reader_uses_percpu_refcnt(pcpu_rwlock)                         \
>> +                               reader_percpu_nesting_depth(pcpu_rwlock)
>> +
>> +#define reader_nested_percpu(pcpu_rwlock)                              \
>> +                       (reader_percpu_nesting_depth(pcpu_rwlock) > 1)
>> +
>> +#define writer_active(pcpu_rwlock)                                     \
>> +       (__this_cpu_read((pcpu_rwlock)->rw_state->writer_signal))
> 
> I'm personally not a fan of such one-line shorthand functions - I
> think they tend to make the code harder to read instead of easier, as
> one constantly has to refer to them to understand what's actually
> going on.
> 

I got rid of most of the helper functions in this version. But I would rather
prefer retaining the above ones, because they are unwieldy and long. And IMHO
the short-hand names are pretty descriptive, so you might not actually need
to keep referring to their implementations all the time.

>>  void percpu_write_lock(struct percpu_rwlock *pcpu_rwlock)
>>  {
>> +       unsigned int cpu;
>> +
>> +       /*
>> +        * Tell all readers that a writer is becoming active, so that they
>> +        * start switching over to the global rwlock.
>> +        */
>> +       for_each_possible_cpu(cpu)
>> +               per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = true;
> 
> I don't see anything preventing a race with the corresponding code in
> percpu_write_unlock() that sets writer_signal back to false. Did I
> miss something here ? It seems to me we don't have any guarantee that
> all writer signals will be set to true at the end of the loop...
> 

Ah, thanks for pointing that out! IIRC Oleg had pointed this issue in the last
version, but back then, I hadn't fully understood what he meant. Your
explanation made it clear. I'll work on fixing this.

Thanks a lot for your review Michel!

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH v6 07/46] percpu_rwlock: Allow writers to be readers, and add lockdep annotations
From: Michel Lespinasse @ 2013-02-18 15:51 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <20130218123913.26245.7713.stgit@srivatsabhat.in.ibm.com>

On Mon, Feb 18, 2013 at 8:39 PM, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> @@ -200,6 +217,16 @@ void percpu_write_lock_irqsave(struct percpu_rwlock *pcpu_rwlock,
>
>         smp_mb(); /* Complete the wait-for-readers, before taking the lock */
>         write_lock_irqsave(&pcpu_rwlock->global_rwlock, *flags);
> +
> +       /*
> +        * It is desirable to allow the writer to acquire the percpu-rwlock
> +        * for read (if necessary), without deadlocking or getting complaints
> +        * from lockdep. To achieve that, just increment the reader_refcnt of
> +        * this CPU - that way, any attempt by the writer to acquire the
> +        * percpu-rwlock for read, will get treated as a case of nested percpu
> +        * reader, which is safe, from a locking perspective.
> +        */
> +       this_cpu_inc(pcpu_rwlock->rw_state->reader_refcnt);

I find this quite disgusting, but once again this may be because I
don't like unfair recursive rwlocks.

In my opinion, the alternative of explicitly not taking the read lock
when one already has the write lock sounds *much* nicer.

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply

* Re: [PATCH v6 04/46] percpu_rwlock: Implement the core design of Per-CPU Reader-Writer Locks
From: Michel Lespinasse @ 2013-02-18 15:45 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, mingo, linux-arch,
	linux, xiaoguangrong, wangyun, paulmck, nikunj, linux-pm, rusty,
	rostedt, rjw, namhyung, tglx, linux-arm-kernel, netdev, oleg,
	vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <20130218123856.26245.46705.stgit@srivatsabhat.in.ibm.com>

Hi Srivasta,

I admit not having followed in detail the threads about the previous
iteration, so some of my comments may have been discussed already
before - apologies if that is the case.

On Mon, Feb 18, 2013 at 8:38 PM, Srivatsa S. Bhat
<srivatsa.bhat@linux.vnet.ibm.com> wrote:
> Reader-writer locks and per-cpu counters are recursive, so they can be
> used in a nested fashion in the reader-path, which makes per-CPU rwlocks also
> recursive. Also, this design of switching the synchronization scheme ensures
> that you can safely nest and use these locks in a very flexible manner.

I like the general idea of switching between per-cpu and global
rwlocks as needed; however I dislike unfair locks, and especially
unfair recursive rwlocks.

If you look at rwlock_t, the main reason we haven't been able to
implement reader/writer fairness there is because tasklist_lock makes
use of the recursive nature of the rwlock_t read side. I'm worried
about introducing more lock usages that would make use of the same
property for your proposed lock.

I am fine with your proposal not implementing reader/writer fairness
from day 1, but I am worried about your proposal having a recursive
reader side. Or, to put it another way: if your proposal didn't have a
recursive reader side, and rwlock_t could somehow be changed to
implement reader/writer fairness, then this property could
automatically propagate into your proposed rwlock; but if anyone makes
use of the recursive nature of your proposal then implementing
reader/writer fairness later won't be as easy.

I see that the very next change in this series is talking about
acquiring the read side from interrupts, so it does look like you're
planning to make use of the recursive nature of the read side. I kinda
wish you didn't, as this is exactly replicating the design of
tasklist_lock which is IMO problematic. Your prior proposal of
disabling interrupts during the read side had other disadvantages, but
I think it was nice that it didn't rely on having a recursive read
side.

> +#define reader_yet_to_switch(pcpu_rwlock, cpu)                             \
> +       (ACCESS_ONCE(per_cpu_ptr((pcpu_rwlock)->rw_state, cpu)->reader_refcnt))
> +
> +#define reader_percpu_nesting_depth(pcpu_rwlock)                 \
> +       (__this_cpu_read((pcpu_rwlock)->rw_state->reader_refcnt))
> +
> +#define reader_uses_percpu_refcnt(pcpu_rwlock)                         \
> +                               reader_percpu_nesting_depth(pcpu_rwlock)
> +
> +#define reader_nested_percpu(pcpu_rwlock)                              \
> +                       (reader_percpu_nesting_depth(pcpu_rwlock) > 1)
> +
> +#define writer_active(pcpu_rwlock)                                     \
> +       (__this_cpu_read((pcpu_rwlock)->rw_state->writer_signal))

I'm personally not a fan of such one-line shorthand functions - I
think they tend to make the code harder to read instead of easier, as
one constantly has to refer to them to understand what's actually
going on.

>  void percpu_write_lock(struct percpu_rwlock *pcpu_rwlock)
>  {
> +       unsigned int cpu;
> +
> +       /*
> +        * Tell all readers that a writer is becoming active, so that they
> +        * start switching over to the global rwlock.
> +        */
> +       for_each_possible_cpu(cpu)
> +               per_cpu_ptr(pcpu_rwlock->rw_state, cpu)->writer_signal = true;

I don't see anything preventing a race with the corresponding code in
percpu_write_unlock() that sets writer_signal back to false. Did I
miss something here ? It seems to me we don't have any guarantee that
all writer signals will be set to true at the end of the loop...

-- 
Michel "Walken" Lespinasse
A program is never fully debugged until the last user dies.

^ permalink raw reply

* Re: [PATCH v5 00/45] CPU hotplug: stop_machine()-free CPU hotplug
From: Steven Rostedt @ 2013-02-18 15:30 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
	linux-arch, Russell King - ARM Linux, xiaoguangrong, wangyun,
	paulmck, nikunj, linux-pm, Rusty Russell, rjw, namhyung, tglx,
	linux-arm-kernel, netdev, oleg, sbw, Srivatsa S. Bhat, tj, akpm,
	linuxppc-dev
In-Reply-To: <CAKfTPtA1aTseD6Gntkf_zbCEbfZzt3P2-P1un6iF5_1TdVLjRQ@mail.gmail.com>

On Mon, 2013-02-18 at 11:58 +0100, Vincent Guittot wrote:

> My tests have been done without cpuidle because i have some issues
> with function tracer and cpuidle
> 
> But the cpu hotplug and cpuidle work well when I run the tests without
> enabling the function tracer
> 

I know suspend and resume has issues with function tracing (because it
makes things like calling smp_processor_id() crash the system), but I'm
unaware of issues with hotplug itself. Could be some of the same issues.

Can you give me more details, I'll try to investigate it.

Thanks,

-- Steve

^ permalink raw reply

* Re: [PATCH v6 33/46] cris/smp: Use get/put_online_cpus_atomic() to prevent CPU offline
From: Jesper Nilsson @ 2013-02-18 13:07 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: linux-doc, peterz, fweisbec, linux-kernel, walken, mingo,
	linux-arch, linux, xiaoguangrong, wangyun, paulmck, nikunj,
	linux-pm, rusty, rostedt, rjw, namhyung, tglx, linux-arm-kernel,
	netdev, oleg, vincent.guittot, sbw, tj, akpm, linuxppc-dev
In-Reply-To: <20130218124254.26245.4577.stgit@srivatsabhat.in.ibm.com>

On Mon, Feb 18, 2013 at 06:12:54PM +0530, Srivatsa S. Bhat wrote:
> Once stop_machine() is gone from the CPU offline path, we won't be able to
> depend on preempt_disable() or local_irq_disable() to prevent CPUs from
> going offline from under us.
> 
> Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
> while invoking from atomic context.
> 
> Cc: Mikael Starvik <starvik@axis.com>

> Cc: linux-cris-kernel@axis.com
> Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>

/^JN - Jesper Nilsson
-- 
               Jesper Nilsson -- jesper.nilsson@axis.com

^ permalink raw reply

* [PATCH 6/6 v8] iommu/fsl: Freescale PAMU driver and IOMMU API implementation.
From: Varun Sethi @ 2013-02-18 12:52 UTC (permalink / raw)
  To: iommu, linuxppc-dev, linux-kernel, scottwood, joro, stuart.yoder
  Cc: Varun Sethi
In-Reply-To: <1361191939-21260-1-git-send-email-Varun.Sethi@freescale.com>

Following is a brief description of the PAMU hardware:
PAMU determines what action to take and whether to authorize the action on
the basis of the memory address, a Logical IO Device Number (LIODN), and
PAACT table (logically) indexed by LIODN and address. Hardware devices which
need to access memory must provide an LIODN in addition to the memory address.

Peripheral Access Authorization and Control Tables (PAACTs) are the primary
data structures used by PAMU. A PAACT is a table of peripheral access
authorization and control entries (PAACE).Each PAACE defines the range of
I/O bus address space that is accessible by the LIOD and the associated access
capabilities.

There are two types of PAACTs: primary PAACT (PPAACT) and secondary PAACT 
(SPAACT).A given physical I/O device may be able to act as one or more
independent logical I/O devices (LIODs). Each such logical I/O device is
assigned an identifier called logical I/O device number (LIODN). A LIODN is
allocated a contiguous portion of the I/O bus address space called the DSA window
for performing DSA operations. The DSA window may optionally be divided into
multiple sub-windows, each of which may be used to map to a region in system
storage space. The first sub-window is referred to as the primary sub-window
and the remaining are called secondary sub-windows.

This patch provides the PAMU driver (fsl_pamu.c) and the corresponding IOMMU
API implementation (fsl_pamu_domain.c). The PAMU hardware driver (fsl_pamu.c)
has been derived from the work done by Ashish Kalra and Timur Tabi.


Signed-off-by: Timur Tabi

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
changes in v8:
- implemented the new API for window based IOMMUs.
changes in v7:
- Set max_subwidows in the geometry attribute.
- Add checking for maximum supported LIODN value.
- Use upper_32_bits and lower_32_bits macros while
  intializing PAMU data structures.
changes in v6:
- Simplified complex conditional statements.
- Fixed indentation issues.
- Added comments for IOMMU API implementation.
changes in v5:
- Addressed comments from Timur.
changes in v4:
- Addressed comments from Timur and Scott.
changes in v3:
- Addressed comments by Kumar Gala
- dynamic fspi allocation
- fixed alignment check in map and unmap
 drivers/iommu/Kconfig           |    8 +
 drivers/iommu/Makefile          |    1 +
 drivers/iommu/fsl_pamu.c        | 1260 +++++++++++++++++++++++++++++++++++++++
 drivers/iommu/fsl_pamu.h        |  398 ++++++++++++
 drivers/iommu/fsl_pamu_domain.c | 1135 +++++++++++++++++++++++++++++++++++
 drivers/iommu/fsl_pamu_domain.h |   89 +++
 6 files changed, 2891 insertions(+), 0 deletions(-)
 create mode 100644 drivers/iommu/fsl_pamu.c
 create mode 100644 drivers/iommu/fsl_pamu.h
 create mode 100644 drivers/iommu/fsl_pamu_domain.c
 create mode 100644 drivers/iommu/fsl_pamu_domain.h

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3f0b15a1..a7dedcb 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -17,6 +17,14 @@ config OF_IOMMU
        def_bool y
        depends on OF
 
+config FSL_PAMU
+	bool "Freescale IOMMU support"
+	depends on PPC_E500MC
+	select IOMMU_API
+	select GENERIC_ALLOCATOR
+	help
+	  Freescale PAMU support.
+
 # MSM IOMMU support
 config MSM_IOMMU
 	bool "MSM IOMMU Support"
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index f66b816..4bc664e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
+obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
new file mode 100644
index 0000000..55f90d1
--- /dev/null
+++ b/drivers/iommu/fsl_pamu.c
@@ -0,0 +1,1260 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ */
+
+#define pr_fmt(fmt)    "fsl-pamu: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/bootmem.h>
+#include <linux/genalloc.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+#include <asm/fsl_guts.h>
+
+#include "fsl_pamu.h"
+
+/* define indexes for each operation mapping scenario */
+#define OMI_QMAN        0x00
+#define OMI_FMAN        0x01
+#define OMI_QMAN_PRIV   0x02
+#define OMI_CAAM        0x03
+
+/* Handling access violations */
+#define make64(high, low) (((u64)(high) << 32) | (low))
+
+struct pamu_isr_data {
+	void __iomem *pamu_reg_base;	/* Base address of PAMU regs*/
+	unsigned int count;		/* The number of PAMUs */
+};
+
+static struct paace *ppaact;
+static struct paace *spaact;
+static struct ome *omt;
+
+/* maximum subwindows permitted per liodn */
+unsigned int max_subwindow_count;
+/* Number of SPAACT entries */
+unsigned long max_subwins;
+
+/* Pool for fspi allocation */
+struct gen_pool *spaace_pool;
+
+static unsigned long get_spaact_size(void)
+{
+	return max_subwins * sizeof(struct paace);
+}
+
+/** 
+ * pamu_get_ppaace() - Return the primary PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns the ppace pointer upon success else return
+ * null.
+ */
+static struct paace *pamu_get_ppaace(int liodn)
+{
+	if (!ppaact || liodn > PAACE_NUMBER_ENTRIES) {
+		pr_err("PPAACT doesn't exist\n");
+		return NULL;
+	}
+
+	return &ppaact[liodn];
+}
+
+/** 
+ * pamu_enable_liodn() - Set valid bit of PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_enable_liodn(int liodn)
+{
+	struct paace *ppaace;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_err("Invalid primary paace entry\n");
+		return -ENOENT;
+	}
+
+	if (!get_bf(ppaace->addr_bitfields, PPAACE_AF_WSE)) {
+		pr_err("liodn %d not configured\n", liodn);
+		return -EINVAL;
+	}
+
+	/* Ensure that all other stores to the ppaace complete first */
+	mb();
+
+	ppaace->addr_bitfields |= PAACE_V_VALID;
+	mb();
+
+	return 0;
+}
+
+/** 
+ * pamu_disable_liodn() - Clears valid bit of PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_disable_liodn(int liodn)
+{
+	struct paace *ppaace;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_err("Invalid primary paace entry\n");
+		return -ENOENT;
+	}
+
+	set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID);
+	mb();
+
+	return 0;
+}
+
+/* Derive the window size encoding for a particular PAACE entry */
+static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)
+{
+	/* Bug if not a power of 2 */
+	BUG_ON((addrspace_size & (addrspace_size - 1)));
+
+	/* window size is 2^(WSE+1) bytes */
+	return __ffs(addrspace_size >> PAMU_PAGE_SHIFT) + PAMU_PAGE_SHIFT - 1;
+}
+
+/* Derive the PAACE window count encoding for the subwindow count */
+static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt)
+{
+       /* window count is 2^(WCE+1) bytes */
+       return __ffs(subwindow_cnt) - 1;
+}
+
+/* 
+ * Set the PAACE type as primary and set the coherency required domain
+ * attribute
+ */
+static void pamu_setup_default_xfer_to_host_ppaace(struct paace *ppaace)
+{
+	set_bf(ppaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_PRIMARY);
+
+	set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+	       PAACE_M_COHERENCE_REQ);
+}
+
+/*
+ * Set the PAACE type as secondary and set the coherency required domain
+ * attribute.
+ */
+static void pamu_setup_default_xfer_to_host_spaace(struct paace *spaace)
+{
+	set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY);
+	set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+	       PAACE_M_COHERENCE_REQ);
+}
+
+/*
+ * Return the spaace (corresponding to the secondary window index)
+ * for a particular ppaace.
+ */
+static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum)
+{
+	u32 subwin_cnt;
+	struct paace *spaace = NULL;
+
+	subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1);
+
+	if (wnum < subwin_cnt)
+		spaace = &spaact[paace->fspi + wnum];
+	else
+		pr_err("secondary paace out of bounds\n");
+
+	return spaace;
+}
+
+/**
+ * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows
+ *                                required for primary PAACE in the secondary
+ *                                PAACE table.
+ * @subwin_cnt: Number of subwindows to be reserved.
+ *
+ * A PPAACE entry may have a number of associated subwindows. A subwindow
+ * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores
+ * the index (fspi) of the first SPAACE entry in the SPAACT table. This
+ * function returns the index of the first SPAACE entry. The remaining
+ * SPAACE entries are reserved contiguously from that index.
+ *
+ * Returns a valid fspi index in the range of 0 - max_subwins on success.
+ * If no SPAACE entry is available or the allocator can not reserve the required
+ * number of contiguous entries function returns ULONG_MAX indicating a failure.
+ *
+*/
+static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt)
+{
+	unsigned long spaace_addr;
+
+	spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace));
+	if (!spaace_addr)
+		return ULONG_MAX;
+
+	return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));
+}
+
+/* Release the subwindows reserved for a particular LIODN */
+void pamu_free_subwins(int liodn)
+{
+	struct paace *ppaace;
+	u32 subwin_cnt, size;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_err("Invalid liodn entry\n");
+		return;
+	}
+
+	if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) {
+		subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1);
+		size = (subwin_cnt - 1) * sizeof(struct paace);
+		gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size);
+		set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0);
+	}
+}
+
+/* 
+ * Function used for updating stash destination for the coressponding
+ * LIODN.
+ */
+int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value)
+{
+	struct paace *paace;
+
+	paace = pamu_get_ppaace(liodn);
+	if (!paace) {
+		pr_err("Invalid liodn entry\n");
+		return -ENOENT;
+	}
+	if (subwin) {
+		paace = pamu_get_spaace(paace, subwin - 1);
+		if (!paace) {
+			return -ENOENT;
+		}
+	}
+	set_bf(paace->impl_attr, PAACE_IA_CID, value);
+
+	mb();
+
+	return 0;
+}
+
+/* Disable a subwindow corresponding to the LIODN */
+int pamu_disable_spaace(int liodn, u32 subwin)
+{
+	struct paace *paace;
+
+	paace = pamu_get_ppaace(liodn);
+	if (!paace) {
+		pr_err("Invalid liodn entry\n");
+		return -ENOENT;
+	}
+	if (subwin) {
+		paace = pamu_get_spaace(paace, subwin - 1);
+		if (!paace) {
+			return -ENOENT;
+		}
+		set_bf(paace->addr_bitfields, PAACE_AF_V,
+			 PAACE_V_INVALID);
+	} else {
+		set_bf(paace->addr_bitfields, PAACE_AF_AP,
+			 PAACE_AP_PERMS_DENIED);
+	}
+
+	mb();
+
+	return 0;
+}
+
+
+/** 
+ * pamu_config_paace() - Sets up PPAACE entry for specified liodn
+ *
+ * @liodn: Logical IO device number
+ * @win_addr: starting address of DSA window
+ * @win-size: size of DSA window
+ * @omi: Operation mapping index -- if ~omi == 0 then omi not defined
+ * @rpn: real (true physical) page number
+ * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then
+ *	     stashid not defined
+ * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then
+ *	     snoopid not defined
+ * @subwin_cnt: number of sub-windows
+ * @prot: window permissions
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
+		       u32 omi, unsigned long rpn, u32 snoopid, u32 stashid,
+		       u32 subwin_cnt, int prot)
+{
+	struct paace *ppaace;
+	unsigned long fspi;
+
+	if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) {
+		pr_err("window size too small or not a power of two %llx\n", win_size);
+		return -EINVAL;
+	}
+
+	if (win_addr & (win_size - 1)) {
+		pr_err("window address is not aligned with window size\n");
+		return -EINVAL;
+	}
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		return -ENOENT;
+	}
+
+	/* window size is 2^(WSE+1) bytes */
+	set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE,
+           map_addrspace_size_to_wse(win_size));
+
+	pamu_setup_default_xfer_to_host_ppaace(ppaace);
+
+	ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20);
+	set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL,
+	       (win_addr >> PAMU_PAGE_SHIFT));
+
+	/* set up operation mapping if it's configured */
+	if (omi < OME_NUMBER_ENTRIES) {
+		set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+		ppaace->op_encode.index_ot.omi = omi;
+	} else if (~omi != 0) {
+		pr_err("bad operation mapping index: %d\n", omi);
+		return -EINVAL;
+	}
+
+	/* configure stash id */
+	if (~stashid != 0)
+		set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid);
+
+	/* configure snoop id */
+	if (~snoopid != 0)
+		ppaace->domain_attr.to_host.snpid = snoopid;
+
+	if (subwin_cnt) {
+		/* The first entry is in the primary PAACE instead */
+		fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1);
+		if (fspi == ULONG_MAX) {
+			pr_err("spaace indexes exhausted\n");
+			return -EINVAL;
+		}
+
+		/* window count is 2^(WCE+1) bytes */
+		set_bf(ppaace->impl_attr, PAACE_IA_WCE,
+		       map_subwindow_cnt_to_wce(subwin_cnt));
+		set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1);
+		ppaace->fspi = fspi;
+	} else {
+		set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE);
+		ppaace->twbah = rpn >> 20;
+		set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn);
+		set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot);
+		set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0);
+		set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0);
+	}
+	mb();
+
+	return 0;
+}
+
+/**
+ * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow
+ *
+ * @liodn:  Logical IO device number
+ * @subwin_cnt:  number of sub-windows associated with dma-window
+ * @subwin_addr: starting address of subwindow
+ * @subwin_size: size of subwindow
+ * @omi: Operation mapping index
+ * @rpn: real (true physical) page number
+ * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then
+ *			  snoopid not defined
+ * @stashid: cache stash id for associated cpu
+ * @enable: enable/disable subwindow after reconfiguration
+ * @prot: sub window permissions
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr,
+		       phys_addr_t subwin_size, u32 omi, unsigned long rpn,
+		       u32 snoopid, u32 stashid, int enable, int prot)
+{
+	struct paace *paace;
+
+	/* setup sub-windows */
+	if (!subwin_cnt) {
+		pr_err("Invalid subwindow count\n");
+		return -EINVAL;
+	}
+
+	paace = pamu_get_ppaace(liodn);
+	if (subwin_addr > 0 && subwin_addr < subwin_cnt && paace) {
+		paace = pamu_get_spaace(paace, subwin_addr - 1);
+
+		if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) {
+			pamu_setup_default_xfer_to_host_spaace(paace);
+			set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn);
+		}
+	}
+
+	if (!paace) {
+		pr_err("Invalid liodn entry\n");
+		return -ENOENT;
+	}
+
+	if (!enable && prot == PAACE_AP_PERMS_DENIED) {
+		if (subwin_addr > 0)
+			set_bf(paace->addr_bitfields, PAACE_AF_V,
+				 PAACE_V_INVALID);
+		else
+			set_bf(paace->addr_bitfields, PAACE_AF_AP,
+				 prot);
+		mb();
+		return 0;
+	}
+
+	if (subwin_size & (subwin_size - 1) || subwin_size < PAMU_PAGE_SIZE) {
+		pr_err("subwindow size out of range, or not a power of 2\n");
+		return -EINVAL;
+	}
+
+	if (rpn == ULONG_MAX) {
+		pr_err("real page number out of range\n");
+		return -EINVAL;
+	}
+
+	/* window size is 2^(WSE+1) bytes */
+	set_bf(paace->win_bitfields, PAACE_WIN_SWSE,
+	       map_addrspace_size_to_wse(subwin_size));
+
+	set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE);
+	paace->twbah = rpn >> 20;
+	set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn);
+	set_bf(paace->addr_bitfields, PAACE_AF_AP, prot);
+
+	/* configure snoop id */
+	if (~snoopid != 0)
+		paace->domain_attr.to_host.snpid = snoopid;
+
+	/* set up operation mapping if it's configured */
+	if (omi < OME_NUMBER_ENTRIES) {
+		set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+		paace->op_encode.index_ot.omi = omi;
+	} else if (~omi != 0) {
+		pr_err("bad operation mapping index: %d\n", omi);
+		return -EINVAL;
+	}
+
+	if (~stashid != 0)
+		set_bf(paace->impl_attr, PAACE_IA_CID, stashid);
+
+	smp_wmb();
+
+	if (enable)
+		paace->addr_bitfields |= PAACE_V_VALID;
+
+	mb();
+
+	return 0;
+}
+
+/**
+* get_ome_index() - Returns the index in the operation mapping table
+*                   for device.
+* @*omi_index: pointer for storing the index value
+*
+*/
+void get_ome_index(u32 *omi_index, struct device *dev)
+{
+	if (of_device_is_compatible(dev->of_node, "fsl,qman-portal"))
+		*omi_index = OMI_QMAN;
+	if (of_device_is_compatible(dev->of_node, "fsl,qman"))
+		*omi_index = OMI_QMAN_PRIV;
+}
+
+/**
+ * get_stash_id - Returns stash destination id corresponding to a
+ *                cache type and vcpu.
+ * @stash_dest_hint: L1, L2 or L3
+ * @vcpu: vpcu target for a particular cache type.  
+ *
+ * Returs stash on success or ~(u32)0 on failure.
+ *
+ */
+u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
+{
+	const u32 *prop;
+	struct device_node *node;
+	u32 cache_level;
+	int len;
+
+	/* Fastpath, exit early if L3/CPC cache is target for stashing */
+	if (stash_dest_hint == IOMMU_ATTR_CACHE_L3) {
+		node = of_find_compatible_node(NULL, NULL,
+				"fsl,p4080-l3-cache-controller");
+		if (node) {
+			prop = of_get_property(node, "cache-stash-id", 0);
+			if (!prop) {
+				pr_err("missing cache-stash-id at %s\n", node->full_name);
+				of_node_put(node);
+				return ~(u32)0;
+			}
+			of_node_put(node);
+			return be32_to_cpup(prop);
+		}
+		return ~(u32)0;
+	}
+
+	for_each_node_by_type(node, "cpu") {
+		prop = of_get_property(node, "reg", &len);
+		if (be32_to_cpup(prop) == vcpu)
+			break;
+	}
+
+	/* find the hwnode that represents the cache */
+	for (cache_level = IOMMU_ATTR_CACHE_L1; cache_level < IOMMU_ATTR_CACHE_L3; cache_level++) {
+		if (stash_dest_hint == cache_level) {
+			prop = of_get_property(node, "cache-stash-id", 0);
+			if (!prop) {
+				pr_err("missing cache-stash-id at %s\n", node->full_name);
+				of_node_put(node);
+				return ~(u32)0;
+			}
+			of_node_put(node);
+			return be32_to_cpup(prop);
+		}
+
+		prop = of_get_property(node, "next-level-cache", 0);
+		if (!prop) {
+			pr_err("can't find next-level-cache at %s\n",
+			          node->full_name);
+			of_node_put(node);
+			return ~(u32)0;  /* can't traverse any further */
+		}
+		of_node_put(node);
+
+		/* advance to next node in cache hierarchy */
+		node = of_find_node_by_phandle(*prop);
+		if (!node) {
+			pr_err("Invalid node for cache hierarchy %s\n",
+				node->full_name);
+			return ~(u32)0;
+		}
+	}
+
+	pr_err("stash dest not found for %d on vcpu %d\n",
+	          stash_dest_hint, vcpu);
+	return ~(u32)0;
+}
+
+/* Identify if the PAACT table entry belongs to QMAN, BMAN or QMAN Portal */
+#define QMAN_PAACE 1
+#define QMAN_PORTAL_PAACE 2
+#define BMAN_PAACE 3
+
+/**
+ * Setup operation mapping and stash destinations for QMAN and QMAN portal.
+ * Memory accesses to QMAN and BMAN private memory need not be coherent, so
+ * clear the PAACE entry coherency attribute for them.
+ */
+static void setup_qbman_paace(struct paace *ppaace, int  paace_type)
+{
+	switch(paace_type) {
+		case QMAN_PAACE:
+			set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+			ppaace->op_encode.index_ot.omi = OMI_QMAN_PRIV;
+			/* setup QMAN Private data stashing for the L3 cache */
+			set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(IOMMU_ATTR_CACHE_L3, 0));
+			set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+			       0);
+			break;
+		case QMAN_PORTAL_PAACE:
+			set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+			ppaace->op_encode.index_ot.omi = OMI_QMAN;
+			/*Set DQRR and Frame stashing for the L3 cache */
+			set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(IOMMU_ATTR_CACHE_L3, 0));
+			break;
+		case BMAN_PAACE:
+			set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+			       0);
+			break;
+	}
+}
+
+/**
+ * Setup the operation mapping table for various devices. This is a static
+ * table where each table index corresponds to a particular device. PAMU uses
+ * this table to translate device transaction to appropriate corenet
+ * transaction.
+ */
+static void __init setup_omt(struct ome *omt)
+{
+	struct ome *ome;
+
+	/* Configure OMI_QMAN */
+	ome = &omt[OMI_QMAN];
+
+	ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ;
+	ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+	ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSAO;
+
+	ome->moe[IOE_DIRECT0_IDX] = EOE_VALID | EOE_LDEC;
+	ome->moe[IOE_DIRECT1_IDX] = EOE_VALID | EOE_LDECPE;
+
+	/* Configure OMI_FMAN */
+	ome = &omt[OMI_FMAN];
+	ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READI;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+
+	/* Configure OMI_QMAN private */
+	ome = &omt[OMI_QMAN_PRIV];
+	ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READ;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+	ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA;
+	ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSA;
+
+	/* Configure OMI_CAAM */
+	ome = &omt[OMI_CAAM];
+	ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READI;
+	ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+}
+
+/*
+ * Get the maximum number of PAACT table entries
+ * and subwindows supported by PAMU
+ */
+static void get_pamu_cap_values(unsigned long pamu_reg_base)
+{
+	u32 pc_val;
+
+	pc_val = in_be32((u32 *)(pamu_reg_base + PAMU_PC3));
+	/* Maximum number of subwindows per liodn */
+	max_subwindow_count = 1 << (1 + PAMU_PC3_MWCE(pc_val));
+	/* Total number of SPACCT entries */
+	max_subwins = PAACE_NUMBER_ENTRIES * max_subwindow_count;
+}
+
+/* Setup PAMU registers pointing to PAACT, SPAACT and OMT */
+int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size,
+	           phys_addr_t ppaact_phys, phys_addr_t spaact_phys,
+		   phys_addr_t omt_phys)
+{
+	u32 *pc;
+	struct pamu_mmap_regs *pamu_regs;
+
+	pc = (u32 *) (pamu_reg_base + PAMU_PC);
+	pamu_regs = (struct pamu_mmap_regs *)
+		(pamu_reg_base + PAMU_MMAP_REGS_BASE);
+
+	/* set up pointers to corenet control blocks */
+
+	out_be32(&pamu_regs->ppbah, upper_32_bits(ppaact_phys));
+	out_be32(&pamu_regs->ppbal, lower_32_bits(ppaact_phys));
+	ppaact_phys = ppaact_phys + PAACT_SIZE;
+	out_be32(&pamu_regs->pplah, upper_32_bits(ppaact_phys));
+	out_be32(&pamu_regs->pplal, lower_32_bits(ppaact_phys));
+
+	out_be32(&pamu_regs->spbah, upper_32_bits(spaact_phys));
+	out_be32(&pamu_regs->spbal, lower_32_bits(spaact_phys));
+	spaact_phys = spaact_phys + get_spaact_size();
+	out_be32(&pamu_regs->splah, upper_32_bits(spaact_phys));
+	out_be32(&pamu_regs->splal, lower_32_bits(spaact_phys));
+
+	out_be32(&pamu_regs->obah, upper_32_bits(omt_phys));
+	out_be32(&pamu_regs->obal, lower_32_bits(omt_phys));
+	omt_phys = omt_phys + OMT_SIZE;
+	out_be32(&pamu_regs->olah, upper_32_bits(omt_phys));
+	out_be32(&pamu_regs->olal, lower_32_bits(omt_phys));
+
+	/*
+	 * set PAMU enable bit,
+	 * allow ppaact & omt to be cached
+	 * & enable PAMU access violation interrupts.
+	 */
+
+	out_be32((u32 *)(pamu_reg_base + PAMU_PICS),
+			PAMU_ACCESS_VIOLATION_ENABLE);
+	out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC);
+	return 0;
+}
+
+/* Enable all device LIODNS */
+static void __init setup_liodns(void)
+{
+	int i, len;
+	struct paace *ppaace;
+	struct device_node *node = NULL;
+	const u32 *prop;
+
+	for_each_node_with_property(node, "fsl,liodn") {
+		prop = of_get_property(node, "fsl,liodn", &len);
+		for (i = 0; i < len / sizeof(u32); i++) {
+			int liodn;
+
+			liodn = be32_to_cpup(&prop[i]);
+			if (liodn > PAACE_NUMBER_ENTRIES) {
+				pr_err("Invalid LIODN value %d\n", liodn);
+				continue;
+			}
+			ppaace = pamu_get_ppaace(liodn);
+			pamu_setup_default_xfer_to_host_ppaace(ppaace);
+			/* window size is 2^(WSE+1) bytes */
+			set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
+			ppaace->wbah = 0;
+			set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
+			set_bf(ppaace->impl_attr, PAACE_IA_ATM,
+				PAACE_ATM_NO_XLATE);
+			set_bf(ppaace->addr_bitfields, PAACE_AF_AP, 
+				PAACE_AP_PERMS_ALL);
+			if (of_device_is_compatible(node, "fsl,qman-portal"))
+				setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
+			if (of_device_is_compatible(node, "fsl,qman"))
+				setup_qbman_paace(ppaace, QMAN_PAACE);
+			if (of_device_is_compatible(node, "fsl,bman"))
+				setup_qbman_paace(ppaace, BMAN_PAACE);
+			mb();
+			pamu_enable_liodn(liodn);
+		}
+	}
+}
+
+/* TBD: PAMU access violation interrupt handler */
+irqreturn_t pamu_av_isr(int irq, void *arg)
+{
+	struct pamu_isr_data *data = arg;
+	phys_addr_t phys;
+	unsigned int i, j;
+
+	pr_emerg("fsl-pamu: access violation interrupt\n");
+
+	for (i = 0; i < data->count; i++) {
+		void __iomem *p = data->pamu_reg_base + i * PAMU_OFFSET;
+		u32 pics = in_be32(p + PAMU_PICS);
+
+		if (pics & PAMU_ACCESS_VIOLATION_STAT) {
+			pr_emerg("POES1=%08x\n", in_be32(p + PAMU_POES1));
+			pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2));
+			pr_emerg("AVS1=%08x\n", in_be32(p + PAMU_AVS1));
+			pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2));
+			pr_emerg("AVA=%016llx\n", make64(in_be32(p + PAMU_AVAH),
+				in_be32(p + PAMU_AVAL)));
+			pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD));
+			pr_emerg("POEA=%016llx\n", make64(in_be32(p + PAMU_POEAH),
+				in_be32(p + PAMU_POEAL)));
+
+			phys = make64(in_be32(p + PAMU_POEAH),
+				in_be32(p + PAMU_POEAL));
+
+			/* Assume that POEA points to a PAACE */
+			if (phys) {
+				u32 *paace = phys_to_virt(phys);
+
+				/* Only the first four words are relevant */
+				for (j = 0; j < 4; j++)
+					pr_emerg("PAACE[%u]=%08x\n", j, in_be32(paace + j));
+			}
+		}
+	}
+
+	panic("\n");
+	/* NOTREACHED */
+
+	return IRQ_HANDLED;
+}
+
+#define LAWAR_EN		0x80000000
+#define LAWAR_TARGET_MASK	0x0FF00000
+#define LAWAR_TARGET_SHIFT	20
+#define LAWAR_SIZE_MASK		0x0000003F
+#define LAWAR_CSDID_MASK	0x000FF000
+#define LAWAR_CSDID_SHIFT	12
+
+#define LAW_SIZE_4K		0xb
+
+struct ccsr_law {
+	u32	lawbarh;	/* LAWn base address high */
+	u32	lawbarl;	/* LAWn base address low */
+	u32	lawar;		/* LAWn attributes */
+	u32	reserved;
+};
+
+#define make64(high, low) (((u64)(high) << 32) | (low))
+
+/*
+ * Create a coherence subdomain for a given memory block.
+ */
+static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id)
+{
+	struct device_node *np;
+	const __be32 *iprop;
+	void __iomem *lac = NULL;	/* Local Access Control registers */
+	struct ccsr_law __iomem *law;
+	void __iomem *ccm = NULL;
+	u32 __iomem *csdids;
+	unsigned int i, num_laws, num_csds;
+	u32 law_target = 0;
+	u32 csd_id = 0;
+	int ret = 0;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,corenet-law");
+	if (!np)
+		return -ENODEV;
+
+	iprop = of_get_property(np, "fsl,num-laws", NULL);
+	if (!iprop) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	num_laws = be32_to_cpup(iprop);
+	if (!num_laws) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	lac = of_iomap(np, 0);
+	if (!lac) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	/* LAW registers are at offset 0xC00 */
+	law = lac + 0xC00;
+
+	of_node_put(np);
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,corenet-cf");
+	if (!np) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	iprop = of_get_property(np, "fsl,ccf-num-csdids", NULL);
+	if (!iprop) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	num_csds = be32_to_cpup(iprop);
+	if (!num_csds) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	ccm = of_iomap(np, 0);
+	if (!ccm) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* The undocumented CSDID registers are at offset 0x600 */
+	csdids = ccm + 0x600;
+
+	of_node_put(np);
+	np = NULL;
+
+	/* Find an unused coherence subdomain ID */
+	for (csd_id = 0; csd_id < num_csds; csd_id++) {
+		if (!csdids[csd_id])
+			break;
+	}
+
+	/* Store the Port ID in the (undocumented) proper CIDMRxx register */
+	csdids[csd_id] = csd_port_id;
+
+	/* Find the DDR LAW that maps to our buffer. */
+	for (i = 0; i < num_laws; i++) {
+		if (law[i].lawar & LAWAR_EN) {
+			phys_addr_t law_start, law_end;
+
+			law_start = make64(law[i].lawbarh, law[i].lawbarl);
+			law_end = law_start +
+				(2ULL << (law[i].lawar & LAWAR_SIZE_MASK));
+
+			if (law_start <= phys && phys < law_end) {
+				law_target = law[i].lawar & LAWAR_TARGET_MASK;
+				break;
+			}
+		}
+	}
+
+	if (i == 0 || i == num_laws) {
+		/* This should never happen*/
+		ret = -ENOENT;
+		goto error;
+	}
+
+	/* Find a free LAW entry */
+	while (law[--i].lawar & LAWAR_EN) {
+		if (i == 0) {
+			/* No higher priority LAW slots available */
+			ret = -ENOENT;
+			goto error;
+		}
+	}
+
+	law[i].lawbarh = upper_32_bits(phys);
+	law[i].lawbarl = lower_32_bits(phys);
+	wmb();
+	law[i].lawar = LAWAR_EN | law_target | (csd_id << LAWAR_CSDID_SHIFT) |
+		(LAW_SIZE_4K + get_order(size));
+	wmb();
+
+error:
+	if (ccm)
+		iounmap(ccm);
+
+	if (lac)
+		iounmap(lac);
+
+	if (np)
+		of_node_put(np);
+
+	return ret;
+}
+
+/*
+ * Table of SVRs and the corresponding PORT_ID values.
+ *
+ * All future CoreNet-enabled SOCs will have this erratum fixed, so this table
+ * should never need to be updated.  SVRs are guaranteed to be unique, so
+ * there is no worry that a future SOC will inadvertently have one of these
+ * values.
+ */
+static const struct {
+	u32 svr;
+	u32 port_id;
+} port_id_map[] = {
+	{0x82100010, 0xFF000000},	/* P2040 1.0 */
+	{0x82100011, 0xFF000000},	/* P2040 1.1 */
+	{0x82100110, 0xFF000000},	/* P2041 1.0 */
+	{0x82100111, 0xFF000000},	/* P2041 1.1 */
+	{0x82110310, 0xFF000000},	/* P3041 1.0 */
+	{0x82110311, 0xFF000000},	/* P3041 1.1 */
+	{0x82010020, 0xFFF80000},	/* P4040 2.0 */
+	{0x82000020, 0xFFF80000},	/* P4080 2.0 */
+	{0x82210010, 0xFC000000},       /* P5010 1.0 */
+	{0x82210020, 0xFC000000},       /* P5010 2.0 */
+	{0x82200010, 0xFC000000},	/* P5020 1.0 */
+	{0x82050010, 0xFF800000},	/* P5021 1.0 */
+	{0x82040010, 0xFF800000},	/* P5040 1.0 */
+};
+
+#define SVR_SECURITY	0x80000	/* The Security (E) bit */
+
+static int __init fsl_pamu_probe(struct platform_device *pdev)
+{
+	void __iomem *pamu_regs = NULL;
+	struct ccsr_guts __iomem *guts_regs = NULL;
+	u32 pamubypenr, pamu_counter;
+	unsigned long pamu_reg_off;
+	unsigned long pamu_reg_base;
+	struct pamu_isr_data *data;
+	struct device_node *guts_node;
+	u64 size;
+	struct page *p;
+	int ret = 0;
+	int irq;
+	phys_addr_t ppaact_phys;
+	phys_addr_t spaact_phys;
+	phys_addr_t omt_phys;
+	size_t mem_size = 0;
+	unsigned int order = 0;
+	u32 csd_port_id = 0;
+	unsigned i;
+	/*
+	 * enumerate all PAMUs and allocate and setup PAMU tables
+	 * for each of them,
+	 * NOTE : All PAMUs share the same LIODN tables.
+	 */
+
+	pamu_regs = of_iomap(pdev->dev.of_node, 0);
+	if (!pamu_regs) {
+		dev_err(&pdev->dev, "ioremap of PAMU node failed\n");
+		return -ENOMEM;
+	}
+	of_get_address(pdev->dev.of_node, 0, &size, NULL);
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (irq == NO_IRQ) {
+		dev_warn(&pdev->dev, "no interrupts listed in PAMU node\n");
+		goto error;
+	}
+
+	data = kzalloc(sizeof(struct pamu_isr_data), GFP_KERNEL);
+	if (!data) {
+		iounmap(pamu_regs);
+		return -ENOMEM;
+	}
+	data->pamu_reg_base = pamu_regs;
+	data->count = size / PAMU_OFFSET;
+
+	/* The ISR needs access to the regs, so we won't iounmap them */
+	ret = request_irq(irq, pamu_av_isr, 0, "pamu", data);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "error %i installing ISR for irq %i\n",
+			ret, irq);
+		goto error;
+	}
+
+	guts_node = of_find_compatible_node(NULL, NULL,
+			"fsl,qoriq-device-config-1.0");
+	if (!guts_node) {
+		dev_err(&pdev->dev, "could not find GUTS node %s\n",
+			pdev->dev.of_node->full_name);
+		ret = -ENODEV;
+		goto error;
+	}
+
+	guts_regs = of_iomap(guts_node, 0);
+	of_node_put(guts_node);
+	if (!guts_regs) {
+		dev_err(&pdev->dev, "ioremap of GUTS node failed\n");
+		ret = -ENODEV;
+		goto error;
+	}
+
+	/* read in the PAMU capability registers */
+	get_pamu_cap_values((unsigned long)pamu_regs);
+	/*
+	 * To simplify the allocation of a coherency domain, we allocate the
+	 * PAACT and the OMT in the same memory buffer.  Unfortunately, this
+	 * wastes more memory compared to allocating the buffers separately.
+	 */
+	/* Determine how much memory we need */
+	mem_size = (PAGE_SIZE << get_order(PAACT_SIZE)) +
+		(PAGE_SIZE << get_order(get_spaact_size())) +
+		(PAGE_SIZE << get_order(OMT_SIZE));
+	order = get_order(mem_size);
+
+	p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!p) {
+		dev_err(&pdev->dev, "unable to allocate PAACT/SPAACT/OMT block\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ppaact = page_address(p);
+	ppaact_phys = page_to_phys(p);
+
+	/* Make sure the memory is naturally aligned */
+	if (ppaact_phys & ((PAGE_SIZE << order) - 1)) {
+		dev_err(&pdev->dev, "PAACT/OMT block is unaligned\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE));
+	omt = (void *)spaact + (PAGE_SIZE << get_order(get_spaact_size()));
+
+	dev_dbg(&pdev->dev, "ppaact virt=%p phys=0x%llx\n", ppaact,
+		(unsigned long long) ppaact_phys);
+
+	/* Check to see if we need to implement the work-around on this SOC */
+
+	/* Determine the Port ID for our coherence subdomain */
+	for (i = 0; i < ARRAY_SIZE(port_id_map); i++) {
+		if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) {
+			csd_port_id = port_id_map[i].port_id;
+			dev_dbg(&pdev->dev, "found matching SVR %08x\n",
+				port_id_map[i].svr);
+			break;
+		}
+	}
+
+	if (csd_port_id) {
+		dev_dbg(&pdev->dev, "creating coherency subdomain at address "
+			"0x%llx, size %zu, port id 0x%08x", ppaact_phys,
+			mem_size, csd_port_id);
+
+		ret = create_csd(ppaact_phys, mem_size, csd_port_id);
+		if (ret) {
+			dev_err(&pdev->dev, "could not create coherence "
+				"subdomain\n");
+			return ret;
+		}
+	}
+
+	spaact_phys = virt_to_phys(spaact);
+	omt_phys = virt_to_phys(omt);
+
+	spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1);
+	if (!spaace_pool) {
+		ret = -ENOMEM;
+		dev_err(&pdev->dev, "PAMU : failed to allocate spaace gen pool\n");
+		goto error;
+	}
+
+	ret = gen_pool_add(spaace_pool, (unsigned long)spaact, get_spaact_size(), -1);
+	if (ret)
+		goto error_genpool;
+
+	pamubypenr = in_be32(&guts_regs->pamubypenr);
+
+	for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size;
+	     pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) {
+
+		pamu_reg_base = (unsigned long) pamu_regs + pamu_reg_off;
+		setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys,
+				 spaact_phys, omt_phys);
+		/* Disable PAMU bypass for this PAMU */
+		pamubypenr &= ~pamu_counter;
+	}
+
+	setup_omt(omt);
+
+	/* Enable all relevant PAMU(s) */
+	out_be32(&guts_regs->pamubypenr, pamubypenr);
+
+	iounmap(guts_regs);
+
+	/* Enable DMA for the LIODNs in the device tree*/
+
+	setup_liodns();
+
+	return 0;
+
+error_genpool:
+	gen_pool_destroy(spaace_pool);
+
+error:
+	if (irq != NO_IRQ)
+		free_irq(irq, 0);
+
+	if (pamu_regs)
+		iounmap(pamu_regs);
+
+	if (guts_regs)
+		iounmap(guts_regs);
+
+	if (ppaact)
+		free_pages((unsigned long)ppaact, order);
+
+	ppaact = NULL;
+
+	return ret;
+}
+
+static const struct of_device_id fsl_of_pamu_ids[] = {
+	{
+		.compatible = "fsl,p4080-pamu",
+	},
+	{
+		.compatible = "fsl,pamu",
+	},
+	{},
+};
+
+static struct platform_driver fsl_of_pamu_driver = {
+	.driver = {
+		.name = "fsl-of-pamu",
+		.owner = THIS_MODULE,
+	},
+	.probe = fsl_pamu_probe,
+};
+
+static __init int fsl_pamu_init(void)
+{
+	struct platform_device *pdev = NULL;
+	struct device_node *np;
+	int ret;
+
+	/*
+	 * The normal OF process calls the probe function at some
+	 * indeterminate later time, after most drivers have loaded.  This is
+	 * too late for us, because PAMU clients (like the Qman driver)
+	 * depend on PAMU being initialized early.
+	 *
+	 * So instead, we "manually" call our probe function by creating the
+	 * platform devices ourselves.
+	 */
+
+	/*
+	 * We assume that there is only one PAMU node in the device tree.  A
+	 * single PAMU node represents all of the PAMU devices in the SOC
+	 * already.   Everything else already makes that assumption, and the
+	 * binding for the PAMU nodes doesn't allow for any parent-child
+	 * relationships anyway.  In other words, support for more than one
+	 * PAMU node would require significant changes to a lot of code.
+	 */
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,pamu");
+	if (!np) {
+		pr_err("fsl-pamu: could not find a PAMU node\n");
+		return -ENODEV;
+	}
+
+	ret = platform_driver_register(&fsl_of_pamu_driver);
+	if (ret) {
+		pr_err("fsl-pamu: could not register driver (err=%i)\n", ret);
+		goto error_driver_register;
+	}
+
+	pdev = platform_device_alloc("fsl-of-pamu", 0);
+	if (!pdev) {
+		pr_err("fsl-pamu: could not allocate device %s\n",
+		       np->full_name);
+		ret = -ENOMEM;
+		goto error_device_alloc;
+	}
+	pdev->dev.of_node = of_node_get(np);
+
+	ret = pamu_domain_init();
+	if (ret)
+		goto error_device_add;
+
+	ret = platform_device_add(pdev);
+	if (ret) {
+		pr_err("fsl-pamu: could not add device %s (err=%i)\n",
+		       np->full_name, ret);
+		goto error_device_add;
+	}
+
+	return 0;
+
+error_device_add:
+	of_node_put(pdev->dev.of_node);
+	pdev->dev.of_node = NULL;
+
+	platform_device_put(pdev);
+
+error_device_alloc:
+	platform_driver_unregister(&fsl_of_pamu_driver);
+
+error_driver_register:
+	of_node_put(np);
+
+	return ret;
+}
+subsys_initcall(fsl_pamu_init);
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
new file mode 100644
index 0000000..c54bc62
--- /dev/null
+++ b/drivers/iommu/fsl_pamu.h
@@ -0,0 +1,398 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_H
+#define __FSL_PAMU_H
+
+/* Bit Field macros
+ * 	v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load
+ */
+#define set_bf(v, m, x)		(v = ((v) & ~(m)) | (((x) << (m##_SHIFT)) & (m)))
+#define get_bf(v, m)		(((v) & (m)) >> (m##_SHIFT))
+
+/* PAMU CCSR space */
+#define PAMU_PGC 0x00000000     /* Allows all peripheral accesses */
+#define PAMU_PE 0x40000000      /* enable PAMU                    */
+
+/* PAMU_OFFSET to the next pamu space in ccsr */
+#define PAMU_OFFSET 0x1000
+
+#define PAMU_MMAP_REGS_BASE 0
+
+struct pamu_mmap_regs {
+	u32 ppbah;
+	u32 ppbal;
+	u32 pplah;
+	u32 pplal;
+	u32 spbah;
+	u32 spbal;
+	u32 splah;
+	u32 splal;
+	u32 obah;
+	u32 obal;
+	u32 olah;
+	u32 olal;
+};
+
+/* PAMU Error Registers */
+#define PAMU_POES1 0x0040
+#define PAMU_POES2 0x0044
+#define PAMU_POEAH 0x0048
+#define PAMU_POEAL 0x004C
+#define PAMU_AVS1  0x0050
+#define PAMU_AVS1_AV    0x1
+#define PAMU_AVS1_OTV   0x6
+#define PAMU_AVS1_APV   0x78
+#define PAMU_AVS1_WAV   0x380
+#define PAMU_AVS1_LAV   0x1c00
+#define PAMU_AVS1_GCV   0x2000
+#define PAMU_AVS1_PDV   0x4000
+#define PAMU_AV_MASK    (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \
+			| PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV)
+#define PAMU_AVS1_LIODN_SHIFT 16
+#define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400
+
+#define PAMU_AVS2  0x0054
+#define PAMU_AVAH  0x0058
+#define PAMU_AVAL  0x005C
+#define PAMU_EECTL 0x0060
+#define PAMU_EEDIS 0x0064
+#define PAMU_EEINTEN 0x0068
+#define PAMU_EEDET 0x006C
+#define PAMU_EEATTR 0x0070
+#define PAMU_EEAHI 0x0074
+#define PAMU_EEALO 0x0078
+#define PAMU_EEDHI 0X007C
+#define PAMU_EEDLO 0x0080
+#define PAMU_EECC  0x0084
+#define PAMU_UDAD  0x0090
+
+/* PAMU Revision Registers */
+#define PAMU_PR1 0x0BF8
+#define PAMU_PR2 0x0BFC
+
+/* PAMU Capabilities Registers */
+#define PAMU_PC1 0x0C00
+#define PAMU_PC2 0x0C04
+#define PAMU_PC3 0x0C08
+#define PAMU_PC4 0x0C0C
+
+/* PAMU Control Register */
+#define PAMU_PC 0x0C10
+
+/* PAMU control defs */
+#define PAMU_CONTROL 0x0C10
+#define PAMU_PC_PGC 0x80000000  /* PAMU gate closed bit */
+#define PAMU_PC_PE   0x40000000 /* PAMU enable bit */
+#define PAMU_PC_SPCC 0x00000010 /* sPAACE cache enable */
+#define PAMU_PC_PPCC 0x00000001 /* pPAACE cache enable */
+#define PAMU_PC_OCE  0x00001000 /* OMT cache enable */
+
+#define PAMU_PFA1 0x0C14
+#define PAMU_PFA2 0x0C18
+
+#define PAMU_PC2_MLIODN(X) ((X) >> 16)
+#define PAMU_PC3_MWCE(X) (((X) >> 21) & 0xf)
+
+/* PAMU Interrupt control and Status Register */
+#define PAMU_PICS 0x0C1C
+#define PAMU_ACCESS_VIOLATION_STAT   0x8
+#define PAMU_ACCESS_VIOLATION_ENABLE 0x4
+
+/* PAMU Debug Registers */
+#define PAMU_PD1 0x0F00
+#define PAMU_PD2 0x0F04
+#define PAMU_PD3 0x0F08
+#define PAMU_PD4 0x0F0C
+
+#define PAACE_AP_PERMS_DENIED  0x0
+#define PAACE_AP_PERMS_QUERY   0x1
+#define PAACE_AP_PERMS_UPDATE  0x2
+#define PAACE_AP_PERMS_ALL     0x3
+
+#define PAACE_DD_TO_HOST       0x0
+#define PAACE_DD_TO_IO         0x1
+#define PAACE_PT_PRIMARY       0x0
+#define PAACE_PT_SECONDARY     0x1
+#define PAACE_V_INVALID        0x0
+#define PAACE_V_VALID          0x1
+#define PAACE_MW_SUBWINDOWS    0x1
+
+#define PAACE_WSE_4K           0xB
+#define PAACE_WSE_8K           0xC
+#define PAACE_WSE_16K          0xD
+#define PAACE_WSE_32K          0xE
+#define PAACE_WSE_64K          0xF
+#define PAACE_WSE_128K         0x10
+#define PAACE_WSE_256K         0x11
+#define PAACE_WSE_512K         0x12
+#define PAACE_WSE_1M           0x13
+#define PAACE_WSE_2M           0x14
+#define PAACE_WSE_4M           0x15
+#define PAACE_WSE_8M           0x16
+#define PAACE_WSE_16M          0x17
+#define PAACE_WSE_32M          0x18
+#define PAACE_WSE_64M          0x19
+#define PAACE_WSE_128M         0x1A
+#define PAACE_WSE_256M         0x1B
+#define PAACE_WSE_512M         0x1C
+#define PAACE_WSE_1G           0x1D
+#define PAACE_WSE_2G           0x1E
+#define PAACE_WSE_4G           0x1F
+
+#define PAACE_DID_PCI_EXPRESS_1 0x00
+#define PAACE_DID_PCI_EXPRESS_2 0x01
+#define PAACE_DID_PCI_EXPRESS_3 0x02
+#define PAACE_DID_PCI_EXPRESS_4 0x03
+#define PAACE_DID_LOCAL_BUS     0x04
+#define PAACE_DID_SRIO          0x0C
+#define PAACE_DID_MEM_1         0x10
+#define PAACE_DID_MEM_2         0x11
+#define PAACE_DID_MEM_3         0x12
+#define PAACE_DID_MEM_4         0x13
+#define PAACE_DID_MEM_1_2       0x14
+#define PAACE_DID_MEM_3_4       0x15
+#define PAACE_DID_MEM_1_4       0x16
+#define PAACE_DID_BM_SW_PORTAL  0x18
+#define PAACE_DID_PAMU          0x1C
+#define PAACE_DID_CAAM          0x21
+#define PAACE_DID_QM_SW_PORTAL  0x3C
+#define PAACE_DID_CORE0_INST    0x80
+#define PAACE_DID_CORE0_DATA    0x81
+#define PAACE_DID_CORE1_INST    0x82
+#define PAACE_DID_CORE1_DATA    0x83
+#define PAACE_DID_CORE2_INST    0x84
+#define PAACE_DID_CORE2_DATA    0x85
+#define PAACE_DID_CORE3_INST    0x86
+#define PAACE_DID_CORE3_DATA    0x87
+#define PAACE_DID_CORE4_INST    0x88
+#define PAACE_DID_CORE4_DATA    0x89
+#define PAACE_DID_CORE5_INST    0x8A
+#define PAACE_DID_CORE5_DATA    0x8B
+#define PAACE_DID_CORE6_INST    0x8C
+#define PAACE_DID_CORE6_DATA    0x8D
+#define PAACE_DID_CORE7_INST    0x8E
+#define PAACE_DID_CORE7_DATA    0x8F
+#define PAACE_DID_BROADCAST     0xFF
+
+#define PAACE_ATM_NO_XLATE      0x00
+#define PAACE_ATM_WINDOW_XLATE  0x01
+#define PAACE_ATM_PAGE_XLATE    0x02
+#define PAACE_ATM_WIN_PG_XLATE  \
+                ( PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE )
+#define PAACE_OTM_NO_XLATE      0x00
+#define PAACE_OTM_IMMEDIATE     0x01
+#define PAACE_OTM_INDEXED       0x02
+#define PAACE_OTM_RESERVED      0x03
+
+#define PAACE_M_COHERENCE_REQ   0x01
+
+#define PAACE_PID_0             0x0
+#define PAACE_PID_1             0x1
+#define PAACE_PID_2             0x2
+#define PAACE_PID_3             0x3
+#define PAACE_PID_4             0x4
+#define PAACE_PID_5             0x5
+#define PAACE_PID_6             0x6
+#define PAACE_PID_7             0x7
+
+#define PAACE_TCEF_FORMAT0_8B   0x00
+#define PAACE_TCEF_FORMAT1_RSVD 0x01
+
+#define PAACE_NUMBER_ENTRIES    0x1FF
+
+#define	OME_NUMBER_ENTRIES      16
+
+/* PAACE Bit Field Defines */
+#define PPAACE_AF_WBAL			0xfffff000
+#define PPAACE_AF_WBAL_SHIFT		12
+#define PPAACE_AF_WSE			0x00000fc0
+#define PPAACE_AF_WSE_SHIFT		6
+#define PPAACE_AF_MW			0x00000020
+#define PPAACE_AF_MW_SHIFT		5
+
+#define SPAACE_AF_LIODN			0xffff0000
+#define SPAACE_AF_LIODN_SHIFT		16
+
+#define PAACE_AF_AP			0x00000018
+#define PAACE_AF_AP_SHIFT		3
+#define PAACE_AF_DD			0x00000004
+#define PAACE_AF_DD_SHIFT		2
+#define PAACE_AF_PT			0x00000002
+#define PAACE_AF_PT_SHIFT		1
+#define PAACE_AF_V			0x00000001
+#define PAACE_AF_V_SHIFT		0
+
+#define PAACE_DA_HOST_CR		0x80
+#define PAACE_DA_HOST_CR_SHIFT		7
+
+#define PAACE_IA_CID			0x00FF0000
+#define PAACE_IA_CID_SHIFT		16
+#define PAACE_IA_WCE			0x000000F0
+#define PAACE_IA_WCE_SHIFT		4
+#define PAACE_IA_ATM			0x0000000C
+#define PAACE_IA_ATM_SHIFT		2
+#define PAACE_IA_OTM			0x00000003
+#define PAACE_IA_OTM_SHIFT		0
+
+#define PAACE_WIN_TWBAL			0xfffff000
+#define PAACE_WIN_TWBAL_SHIFT		12
+#define PAACE_WIN_SWSE			0x00000fc0
+#define PAACE_WIN_SWSE_SHIFT		6
+
+/* PAMU Data Structures */
+/* primary / secondary paact structure */
+struct paace {
+	/* PAACE Offset 0x00 */
+	u32 wbah;				/* only valid for Primary PAACE */
+	u32 addr_bitfields;		/* See P/S PAACE_AF_* */
+
+	/* PAACE Offset 0x08 */
+	/* Interpretation of first 32 bits dependent on DD above */
+	union {
+		struct {
+			/* Destination ID, see PAACE_DID_* defines */
+			u8 did;
+			/* Partition ID */
+			u8 pid;
+			/* Snoop ID */
+			u8 snpid;
+			/* coherency_required : 1 reserved : 7 */
+			u8 coherency_required; /* See PAACE_DA_* */
+		} to_host;
+		struct {
+			/* Destination ID, see PAACE_DID_* defines */
+			u8  did;
+			u8  reserved1;
+			u16 reserved2;
+		} to_io;
+	} domain_attr;
+
+	/* Implementation attributes + window count + address & operation translation modes */
+	u32 impl_attr;			/* See PAACE_IA_* */
+
+	/* PAACE Offset 0x10 */
+	/* Translated window base address */
+	u32 twbah;
+	u32 win_bitfields;			/* See PAACE_WIN_* */
+
+	/* PAACE Offset 0x18 */
+	/* first secondary paace entry */
+	u32 fspi;				/* only valid for Primary PAACE */
+	union {
+		struct {
+			u8 ioea;
+			u8 moea;
+			u8 ioeb;
+			u8 moeb;
+		} immed_ot;
+		struct {
+			u16 reserved;
+			u16 omi;
+		} index_ot;
+	} op_encode;
+
+	/* PAACE Offsets 0x20-0x38 */
+	u32 reserved[8];			/* not currently implemented */
+};
+
+/* OME : Operation mapping entry
+ * MOE : Mapped Operation Encodings
+ * The operation mapping table is table containing operation mapping entries (OME).
+ * The index of a particular OME is programmed in the PAACE entry for translation
+ * in bound I/O operations corresponding to an LIODN. The OMT is used for translation
+ * specifically in case of the indexed translation mode. Each OME contains a 128
+ * byte mapped operation encoding (MOE), where each byte represents an MOE. 
+ */
+#define NUM_MOE 128
+struct ome {
+	u8 moe[NUM_MOE];
+} __attribute__((packed));
+
+#define PAACT_SIZE              (sizeof(struct paace) * PAACE_NUMBER_ENTRIES)
+#define OMT_SIZE                (sizeof(struct ome) * OME_NUMBER_ENTRIES)
+
+#define PAMU_PAGE_SHIFT 12
+#define PAMU_PAGE_SIZE  4096ULL
+
+#define IOE_READ        0x00
+#define IOE_READ_IDX    0x00
+#define IOE_WRITE       0x81
+#define IOE_WRITE_IDX   0x01
+#define IOE_EREAD0      0x82    /* Enhanced read type 0 */
+#define IOE_EREAD0_IDX  0x02    /* Enhanced read type 0 */
+#define IOE_EWRITE0     0x83    /* Enhanced write type 0 */
+#define IOE_EWRITE0_IDX 0x03    /* Enhanced write type 0 */
+#define IOE_DIRECT0     0x84    /* Directive type 0 */
+#define IOE_DIRECT0_IDX 0x04    /* Directive type 0 */
+#define IOE_EREAD1      0x85    /* Enhanced read type 1 */
+#define IOE_EREAD1_IDX  0x05    /* Enhanced read type 1 */
+#define IOE_EWRITE1     0x86    /* Enhanced write type 1 */
+#define IOE_EWRITE1_IDX 0x06    /* Enhanced write type 1 */
+#define IOE_DIRECT1     0x87    /* Directive type 1 */
+#define IOE_DIRECT1_IDX 0x07    /* Directive type 1 */
+#define IOE_RAC         0x8c    /* Read with Atomic clear */
+#define IOE_RAC_IDX     0x0c    /* Read with Atomic clear */
+#define IOE_RAS         0x8d    /* Read with Atomic set */
+#define IOE_RAS_IDX     0x0d    /* Read with Atomic set */
+#define IOE_RAD         0x8e    /* Read with Atomic decrement */
+#define IOE_RAD_IDX     0x0e    /* Read with Atomic decrement */
+#define IOE_RAI         0x8f    /* Read with Atomic increment */
+#define IOE_RAI_IDX     0x0f    /* Read with Atomic increment */
+
+#define EOE_READ        0x00
+#define EOE_WRITE       0x01
+#define EOE_RAC         0x0c    /* Read with Atomic clear */
+#define EOE_RAS         0x0d    /* Read with Atomic set */
+#define EOE_RAD         0x0e    /* Read with Atomic decrement */
+#define EOE_RAI         0x0f    /* Read with Atomic increment */
+#define EOE_LDEC        0x10    /* Load external cache */
+#define EOE_LDECL       0x11    /* Load external cache with stash lock */
+#define EOE_LDECPE      0x12    /* Load external cache with preferred exclusive */
+#define EOE_LDECPEL     0x13    /* Load external cache with preferred exclusive and lock */
+#define EOE_LDECFE      0x14    /* Load external cache with forced exclusive */
+#define EOE_LDECFEL     0x15    /* Load external cache with forced exclusive and lock */
+#define EOE_RSA         0x16    /* Read with stash allocate */
+#define EOE_RSAU        0x17    /* Read with stash allocate and unlock */
+#define EOE_READI       0x18    /* Read with invalidate */
+#define EOE_RWNITC      0x19    /* Read with no intention to cache */
+#define EOE_WCI         0x1a    /* Write cache inhibited */
+#define EOE_WWSA        0x1b    /* Write with stash allocate */
+#define EOE_WWSAL       0x1c    /* Write with stash allocate and lock */
+#define EOE_WWSAO       0x1d    /* Write with stash allocate only */
+#define EOE_WWSAOL      0x1e    /* Write with stash allocate only and lock */
+#define EOE_VALID       0x80
+
+/* Function prototypes */
+int pamu_domain_init(void);
+int pamu_enable_liodn(int liodn);
+int pamu_disable_liodn(int liodn);
+void pamu_free_subwins(int liodn);
+int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
+		       u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid,
+		       u32 subwin_cnt, int prot);
+int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr,
+		       phys_addr_t subwin_size, u32 omi, unsigned long rpn,
+		       uint32_t snoopid, u32 stashid, int enable, int prot);
+
+u32 get_stash_id(u32 stash_dest_hint, u32 vcpu);
+void get_ome_index(u32 *omi_index, struct device *dev);
+int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value);
+int pamu_disable_spaace(int liodn, u32 subwin);
+
+#endif  /* __FSL_PAMU_H */
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
new file mode 100644
index 0000000..15b9909
--- /dev/null
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -0,0 +1,1135 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ *
+ */
+
+#define pr_fmt(fmt)    "fsl-pamu-domain: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/bootmem.h>
+#include <linux/err.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#include <asm/pci-bridge.h>
+
+#include "fsl_pamu_domain.h"
+
+/* 
+ * Global spinlock that needs to be held while
+ * configuring PAMU.
+ */
+static DEFINE_SPINLOCK(iommu_lock);
+
+static struct kmem_cache *fsl_pamu_domain_cache;
+static struct kmem_cache *iommu_devinfo_cache;
+static DEFINE_SPINLOCK(device_domain_lock);
+
+int __init iommu_init_mempool(void)
+{
+
+	fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain",
+					 sizeof(struct fsl_dma_domain),
+					 0,
+					 SLAB_HWCACHE_ALIGN,
+
+					 NULL);
+	if (!fsl_pamu_domain_cache) {
+		pr_err("Couldn't create fsl iommu_domain cache\n");
+		return -ENOMEM;
+	}
+
+	iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
+					 sizeof(struct device_domain_info),
+					 0,
+					 SLAB_HWCACHE_ALIGN,
+					 NULL);
+	if (!iommu_devinfo_cache) {
+		pr_err("Couldn't create devinfo cache\n");
+		kmem_cache_destroy(fsl_pamu_domain_cache);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, unsigned long iova)
+{
+	u32 win_cnt = dma_domain->win_cnt;
+	struct dma_window *win_ptr =
+				&dma_domain->win_arr[0];
+	struct iommu_domain_geometry *geom;
+
+	geom = &dma_domain->iommu_domain->geometry;
+
+	if (!win_cnt || !dma_domain->geom_size) {
+		pr_err("Number of windows/geometry not configured for the domain\n");
+		return 0;
+	}
+
+	if (win_cnt > 1) {
+		u64 subwin_size;
+		unsigned long subwin_iova;
+		u32 wnd;
+
+		subwin_size = dma_domain->geom_size >> ilog2(win_cnt);
+		subwin_iova = iova & ~(subwin_size - 1);
+		wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size);
+		win_ptr = &dma_domain->win_arr[wnd];
+	}
+
+	if (win_ptr->valid)
+		return (win_ptr->paddr + (iova & (win_ptr->size - 1)));
+
+	return 0;
+}
+
+static int map_liodn_subwins(int liodn, struct fsl_dma_domain *dma_domain)
+{
+	struct dma_window *sub_win_ptr = 
+				&dma_domain->win_arr[0];
+	int i, ret;
+	unsigned long rpn;
+
+	for (i = 0; i < dma_domain->win_cnt; i++) {
+		if (sub_win_ptr[i].valid) {
+			rpn = sub_win_ptr[i].paddr >>
+				 PAMU_PAGE_SHIFT;
+			spin_lock(&iommu_lock);
+			ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i,
+						 sub_win_ptr[i].size,
+						 ~(u32)0,
+						 rpn,
+						 dma_domain->snoop_id,
+						 dma_domain->stash_id,
+						 (i > 0) ? 1 : 0,
+						 sub_win_ptr[i].prot);
+			spin_unlock(&iommu_lock);
+			if (ret) {
+				pr_err("PAMU SPAACE configuration failed for liodn %d\n",
+					 liodn);
+				return ret;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int map_liodn_win(int liodn, struct fsl_dma_domain *dma_domain)
+{
+	int ret;
+	struct dma_window *wnd = &dma_domain->win_arr[0];
+	phys_addr_t wnd_addr = dma_domain->iommu_domain->geometry.aperture_start;
+
+	spin_lock(&iommu_lock);
+	ret = pamu_config_ppaace(liodn, wnd_addr,
+				 wnd->size,
+				 ~(u32)0,
+				 wnd->paddr >> PAMU_PAGE_SHIFT,
+				 dma_domain->snoop_id, dma_domain->stash_id,
+				 0, wnd->prot);
+	spin_unlock(&iommu_lock);
+	if (ret)
+		pr_err("PAMU PAACE configuration failed for liodn %d\n",
+			liodn);
+
+	return ret;
+}
+
+/* Map the DMA window corresponding to the LIODN */
+static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain)
+{
+	if (dma_domain->win_cnt > 1)
+		return map_liodn_subwins(liodn, dma_domain);
+	else
+		return map_liodn_win(liodn, dma_domain);
+
+}
+
+/* Update window/subwindow mapping for the LIODN */
+static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+	int ret;
+	struct dma_window *wnd = &dma_domain->win_arr[wnd_nr];
+
+	spin_lock(&iommu_lock);
+	if (dma_domain->win_cnt > 1) {
+		ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr,
+					 wnd->size,
+					 ~(u32)0,
+					 wnd->paddr >> PAMU_PAGE_SHIFT,
+					 dma_domain->snoop_id,
+					 dma_domain->stash_id,
+					 (wnd_nr > 0) ? 1 : 0,
+					 wnd->prot);
+		if (ret)
+			pr_err("Subwindow reconfiguration failed for liodn %d\n", liodn);
+	} else {
+		phys_addr_t wnd_addr;
+
+		wnd_addr = dma_domain->iommu_domain->geometry.aperture_start;
+
+		ret = pamu_config_ppaace(liodn, wnd_addr,
+					 wnd->size,
+					 ~(u32)0,
+					 wnd->paddr >> PAMU_PAGE_SHIFT,
+				 	dma_domain->snoop_id, dma_domain->stash_id,
+				 	0, wnd->prot);
+		if (ret)
+			pr_err("Window reconfiguration failed for liodn %d\n", liodn);
+	}
+
+	spin_unlock(&iommu_lock);
+
+	return ret;
+}
+
+static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain,
+				 u32 val)
+{
+	int ret = 0, i;
+
+	spin_lock(&iommu_lock);
+	if (!dma_domain->win_cnt) {
+		ret = pamu_update_paace_stash(liodn, 0, val);
+		if (ret) {
+			pr_err("Failed to update PAACE field for liodn %d\n ", liodn);
+			spin_unlock(&iommu_lock);
+			return ret;
+		}
+	} else {
+		for (i = 0; i < dma_domain->win_cnt; i++) {
+			ret = pamu_update_paace_stash(liodn, i, val);
+			if (ret) {
+				pr_err("Failed to update SPAACE %d field for liodn %d\n ", i, liodn);
+				spin_unlock(&iommu_lock);
+				return ret;
+			}
+		}
+	}
+	spin_unlock(&iommu_lock);
+
+	return ret;
+}
+
+/* Set the geometry parameters for a LIODN */
+static int configure_liodn(int liodn, struct device *dev,
+			   struct fsl_dma_domain *dma_domain,
+			   struct iommu_domain_geometry *geom_attr,
+			   u32 win_cnt)
+{
+	phys_addr_t window_addr, window_size;
+	phys_addr_t subwin_size;
+	int ret = 0, i;
+	u32 omi_index = ~(u32)0;
+
+	/* 
+	 * Configure the omi_index at the geometry setup time.
+	 * This is a static value which depends on the type of
+	 * device and would not change thereafter.
+	 */
+	get_ome_index(&omi_index, dev);
+
+	window_addr = geom_attr->aperture_start;
+	window_size = dma_domain->geom_size;
+
+	spin_lock(&iommu_lock);
+	ret = pamu_disable_liodn(liodn);
+	if (!ret)
+		ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index,
+					 0, dma_domain->snoop_id,
+					 dma_domain->stash_id, win_cnt, 0);
+	spin_unlock(&iommu_lock);
+	if (ret) {
+		pr_err("PAMU PAACE configuration failed for liodn %d, win_cnt =%d\n", liodn, win_cnt);
+		return ret;
+	}
+
+	if (win_cnt > 1) {
+		subwin_size = window_size >> ilog2(win_cnt);
+		for (i = 0; i < win_cnt; i++) {
+			spin_lock(&iommu_lock);
+			ret = pamu_config_spaace(liodn, win_cnt, i, subwin_size,
+						 omi_index, 0,
+						 dma_domain->snoop_id,
+						 dma_domain->stash_id, 0, 0);
+			spin_unlock(&iommu_lock);
+			if (ret) {
+				pr_err("PAMU SPAACE configuration failed for liodn %d\n", liodn);
+				return ret;
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int check_size(u64 size, unsigned long iova)
+{
+	/*
+	 * Size must be a power of two and at least be equal
+	 * to PAMU page size.
+	 */
+	if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) {
+		pr_err("%s: size too small or not a power of two\n", __func__);
+		return -EINVAL;
+	}
+
+	/* iova must be page size aligned*/
+	if (iova & (size - 1)) {
+		pr_err("%s: address is not aligned with window size\n", __func__);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct fsl_dma_domain *iommu_alloc_dma_domain(void)
+{
+	struct fsl_dma_domain *domain;
+
+	domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL);
+	if (!domain)
+		return NULL;
+
+	domain->stash_id = ~(u32)0;
+	domain->snoop_id = ~(u32)0;
+	domain->win_cnt = max_subwindow_count;
+	domain->geom_size = 0;
+
+	INIT_LIST_HEAD(&domain->devices);
+
+	spin_lock_init(&domain->domain_lock);
+
+	return domain;
+}
+
+static inline struct device_domain_info *find_domain(struct device *dev)
+{
+	return dev->archdata.iommu_domain;
+}
+
+static void remove_domain_ref(struct device_domain_info *info, u32 win_cnt)
+{
+		list_del(&info->link);
+		spin_lock(&iommu_lock);
+		if (win_cnt)
+			pamu_free_subwins(info->liodn);
+		pamu_disable_liodn(info->liodn);
+		spin_unlock(&iommu_lock);
+		spin_lock(&device_domain_lock);
+		info->dev->archdata.iommu_domain = NULL;
+		kmem_cache_free(iommu_devinfo_cache, info);
+		spin_unlock(&device_domain_lock);
+}
+
+static void destroy_domain(struct fsl_dma_domain *dma_domain)
+{
+	struct device_domain_info *info;
+
+	/* Dissociate all the devices from this domain */
+	while (!list_empty(&dma_domain->devices)) {
+		info = list_entry(dma_domain->devices.next,
+			struct device_domain_info, link);
+		remove_domain_ref(info, dma_domain->win_cnt);
+	}
+}
+
+static void detach_domain(struct device *dev, struct fsl_dma_domain *dma_domain)
+{
+	struct device_domain_info *info;
+	struct list_head *entry, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	/* Remove the device from the domain device list */
+	if (!list_empty(&dma_domain->devices)) {
+		list_for_each_safe(entry, tmp, &dma_domain->devices) {
+			info = list_entry(entry, struct device_domain_info, link);
+			if (info->dev == dev)
+				remove_domain_ref(info, dma_domain->win_cnt);
+		}
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+}
+
+static void attach_domain(struct fsl_dma_domain *dma_domain, int liodn, struct device *dev)
+{
+	struct device_domain_info *info, *old_domain_info;
+
+	spin_lock(&device_domain_lock);
+	/* 
+	 * Check here if the device is already attached to domain or not.
+	 * If the device is already attached to a domain detach it.
+	 */
+	old_domain_info = find_domain(dev);
+	if (old_domain_info && old_domain_info->domain != dma_domain) {
+		spin_unlock(&device_domain_lock);
+		detach_domain(dev, old_domain_info->domain);
+		spin_lock(&device_domain_lock);
+	}
+
+	info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_KERNEL);
+
+	info->dev = dev;
+	info->liodn = liodn;
+	info->domain = dma_domain;
+
+	list_add(&info->link, &dma_domain->devices);
+	/* 
+	 * In case of devices with multiple LIODNs just store
+	 * the info for the first LIODN as all
+	 * LIODNs share the same domain
+	 */
+	if (!old_domain_info)
+		dev->archdata.iommu_domain = info;
+	spin_unlock(&device_domain_lock);
+
+}
+
+static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain,
+					    unsigned long iova)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+
+	if ((iova < domain->geometry.aperture_start) ||
+		iova > (domain->geometry.aperture_end))
+		return 0;
+
+	return get_phys_addr(dma_domain, iova);
+}
+
+static int fsl_pamu_domain_has_cap(struct iommu_domain *domain,
+				      unsigned long cap)
+{
+	return cap == IOMMU_CAP_CACHE_COHERENCY;
+}
+
+static void fsl_pamu_domain_destroy(struct iommu_domain *domain)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+
+	domain->priv = NULL;
+
+	destroy_domain(dma_domain);
+
+	dma_domain->enabled = 0;
+	dma_domain->mapped = 0;
+
+	kmem_cache_free(fsl_pamu_domain_cache, dma_domain);
+}
+
+static int fsl_pamu_domain_init(struct iommu_domain *domain)
+{
+	struct fsl_dma_domain *dma_domain;
+
+	dma_domain = iommu_alloc_dma_domain();
+	if (!dma_domain) {
+		pr_err("dma_domain allocation failed\n");
+		return -ENOMEM;
+	}
+	domain->priv = dma_domain;
+	dma_domain->iommu_domain = domain;
+	/* defaul geometry 64 GB i.e. maximum system address */
+	domain->geometry.aperture_start = 0;
+	domain->geometry.aperture_end = 1ULL << 36; 
+	domain->geometry.force_aperture = true;
+
+	return 0;
+}
+
+/* Configure geometry settings for all LIODNs associated with domain */
+static int configure_domain(struct fsl_dma_domain *dma_domain,
+			    struct iommu_domain_geometry *geom_attr,
+			    u32 win_cnt)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	if (!list_empty(&dma_domain->devices)) {
+		list_for_each_entry(info, &dma_domain->devices, link) {
+			ret = configure_liodn(info->liodn, info->dev, dma_domain,
+					      geom_attr, win_cnt);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/* Update stash destination for all LIODNs associated with the domain */
+static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	if (!list_empty(&dma_domain->devices)) {
+		list_for_each_entry(info, &dma_domain->devices, link) {
+			ret = update_liodn_stash(info->liodn, dma_domain, val);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/* Update domain mappings for all LIODNs associated with the domain */
+static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	if (!list_empty(&dma_domain->devices)) {
+		list_for_each_entry(info, &dma_domain->devices, link) {
+			ret = update_liodn(info->liodn, dma_domain, wnd_nr);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+	struct device_domain_info *info;
+	int ret = 0;
+
+	if (!list_empty(&dma_domain->devices)) {
+		list_for_each_entry(info, &dma_domain->devices, link) {
+			if (dma_domain->win_cnt == 1 && dma_domain->enabled) {
+				ret = pamu_disable_liodn(info->liodn);
+				if (!ret)
+					dma_domain->enabled = 0;
+			} else {
+				ret = pamu_disable_spaace(info->liodn, wnd_nr);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	if (!dma_domain->win_arr) {
+		pr_err("Number of windows not configured\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return;
+	}
+
+	if (wnd_nr >= dma_domain->win_cnt) {
+		pr_err("Invalid window index\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return;
+	}
+
+	if (dma_domain->win_arr[wnd_nr].valid) {
+		ret = disable_domain_win(dma_domain, wnd_nr);
+		if (!ret) {
+			dma_domain->win_arr[wnd_nr].valid = 0;
+			dma_domain->mapped--;
+		}
+	}
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+}
+
+static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr,
+				  phys_addr_t paddr, u64 size, int iommu_prot)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	struct dma_window *wnd;
+	int prot = 0;
+	int ret;
+	unsigned long flags;
+	u64 win_size;
+
+	if (iommu_prot & IOMMU_READ)
+		prot |= PAACE_AP_PERMS_QUERY;
+	if (iommu_prot & IOMMU_WRITE)
+		prot |= PAACE_AP_PERMS_UPDATE;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	if (!dma_domain->win_arr) {
+		pr_err("Number of windows not configured\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -ENODEV;
+	}
+
+	if (wnd_nr >= dma_domain->win_cnt) {
+		pr_err("Invalid window index\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt);
+	if (size > win_size) {
+		pr_err("Invalid window size \n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	if (dma_domain->win_cnt == 1) {
+		if (dma_domain->enabled) {
+			pr_err("Disable the window before updating the mapping\n");
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -EBUSY;
+		}
+
+		ret = check_size(size, domain->geometry.aperture_start);
+		if (ret) {
+			pr_err("Aperture start not aligned to the size\n");
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -EINVAL;
+		}
+	}
+
+	wnd = &dma_domain->win_arr[wnd_nr];
+	if (!wnd->valid) {
+		wnd->paddr = paddr;
+		wnd->size = size;
+		wnd->prot = prot;
+
+		ret = update_domain_mapping(dma_domain, wnd_nr);
+		if (!ret) {
+			wnd->valid = 1;
+			dma_domain->mapped++;
+		}
+	} else {
+		pr_err("Disable the window before updating the mapping\n");
+		ret = -EBUSY;
+	}
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+/*
+ * Attach the LIODN to the DMA domain and configure the geometry
+ * and window mappings.
+ */
+static int handle_attach_device(struct fsl_dma_domain *dma_domain,
+				 struct device *dev, const u32 *liodn,
+				 int num)
+{
+	unsigned long flags;
+	struct iommu_domain *domain = dma_domain->iommu_domain;
+	int ret = 0;
+	int i;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	for (i = 0; i < num; i++) {
+
+		/* Ensure that LIODN value is valid */
+		if (liodn[i] > PAACE_NUMBER_ENTRIES) {
+			pr_err("Invalid liodn %d, attach device failed for %s\n",
+		          	liodn[i], dev->of_node->full_name);
+			ret = -EINVAL;
+			break;
+		} 
+
+		attach_domain(dma_domain, liodn[i], dev);
+		/*
+		 * Check if geometry has already been configured
+		 * for the domain. If yes, set the geometry for
+		 * the LIODN.
+		 */
+		if (dma_domain->win_cnt) {
+			u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0;
+			ret = configure_liodn(liodn[i], dev, dma_domain,
+					      &domain->geometry,
+					      win_cnt);
+			if (ret)
+				break;
+			if (dma_domain->mapped) {
+				/*
+				 * Create window/subwindow mapping for
+				 * the LIODN.
+				 */
+				ret = map_liodn(liodn[i], dma_domain);
+				if (ret)
+					break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+static int fsl_pamu_attach_device(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	const u32 *prop;
+	u32 prop_cnt;
+	int len, ret = 0;
+	struct pci_dev *pdev = NULL;
+	struct pci_controller *pci_ctl;
+
+	/*
+	 * Hack to make attach device work for the PCI devices. Simply assign the
+	 * the LIODN for the PCI controller to the PCI device.
+	 */
+	if (dev->bus == &pci_bus_type) {
+		pdev = to_pci_dev(dev);
+		pci_ctl = pci_bus_to_host(pdev->bus);
+		/*
+		 * make dev point to pci controller device
+		 * so we can get the LIODN programmed by
+		 * u-boot;
+		 */
+		dev = pci_ctl->parent;
+	}
+
+	prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+	if (prop) {
+		prop_cnt = len / sizeof(u32);
+		ret = handle_attach_device(dma_domain, dev,
+					 prop, prop_cnt);
+	} else {
+		pr_err("missing fsl,liodn property at %s\n",
+		          dev->of_node->full_name);
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void fsl_pamu_detach_device(struct iommu_domain *domain,
+				      struct device *dev)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	const u32 *prop;
+	int len;
+	struct pci_dev *pdev = NULL;
+	struct pci_controller *pci_ctl;
+
+	/*
+	 * Hack to make detach device work for the PCI devices. Simply assign the
+	 * the LIODN for the PCI controller to the PCI device.
+	 */
+	if (dev->bus == &pci_bus_type) {
+		pdev = to_pci_dev(dev);
+		pci_ctl = pci_bus_to_host(pdev->bus);
+		/*
+		 * make dev point to pci controller device
+		 * so we can get the LIODN programmed by
+		 * u-boot;
+		 */
+		dev = pci_ctl->parent;
+	}
+
+	prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+	if (prop)
+		detach_domain(dev, dma_domain);
+	else
+		pr_err("missing fsl,liodn property at %s\n",
+		          dev->of_node->full_name);
+}
+
+static  int configure_domain_geometry(struct iommu_domain *domain, void *data)
+{
+	struct iommu_domain_geometry *geom_attr = data;
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	dma_addr_t geom_size;
+	unsigned long flags;
+
+	geom_size = geom_attr->aperture_end - geom_attr->aperture_start;
+	/*
+	 * Sanity check the geometry size. Also, we do not support
+	 * DMA outside of the geometry.
+	 */
+	if (check_size(geom_size, geom_attr->aperture_start) ||
+		!geom_attr->force_aperture) {
+			pr_err("Invalid PAMU geometry attributes\n");
+			return -EINVAL;
+		}
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	if (dma_domain->enabled) {
+		pr_err("Can't set geometry attributes as domain is active\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return  -EBUSY;
+	}
+
+	/* Copy the domain geometry information */
+	memcpy(&domain->geometry, geom_attr,
+	       sizeof(struct iommu_domain_geometry));
+	dma_domain->geom_size = geom_size;
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return 0;
+}
+
+/* Set the domain stash attribute */
+static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data)
+{
+	struct iommu_stash_attribute *stash_attr = data;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+
+	memcpy(&dma_domain->dma_stash, stash_attr,
+		 sizeof(struct iommu_stash_attribute));
+
+	dma_domain->stash_id = get_stash_id(stash_attr->cache,
+					    stash_attr->cpu);
+	if (dma_domain->stash_id == ~(u32)0) {
+		pr_err("Invalid stash attributes\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	ret = update_domain_stash(dma_domain, dma_domain->stash_id);
+
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
+/* Configure domain dma state i.e. enable/disable DMA*/
+static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable)
+{
+	struct device_domain_info *info;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+
+	if (enable && !dma_domain->mapped) {
+		pr_err("Can't enable DMA domain without valid mapping\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -ENODEV;
+	}
+
+	dma_domain->enabled = enable;
+	if (!list_empty(&dma_domain->devices)) {
+		list_for_each_entry(info, &dma_domain->devices,
+					 link) {
+			ret = (enable) ? pamu_enable_liodn(info->liodn):
+				pamu_disable_liodn(info->liodn);
+			if (ret)
+				pr_err("Unable to set dma state for liodn %d",
+					 info->liodn);
+		}
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return 0;
+}
+
+int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
+				 enum iommu_attr attr_type, void *data)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	int ret = 0;
+
+
+	switch(attr_type) {
+	case DOMAIN_ATTR_GEOMETRY:
+		ret = configure_domain_geometry(domain, data);
+		break;
+	case DOMAIN_ATTR_PAMU_STASH:
+		ret = configure_domain_stash(dma_domain, data);
+		break;
+	case DOMAIN_ATTR_PAMU_ENABLE:
+		ret = configure_domain_dma_state(dma_domain, *(int *)data);
+		break;
+	default:
+		pr_err("Unsupported attribute type\n");
+		ret = -EINVAL;
+		break;
+	};
+
+	return ret;
+}
+
+int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
+				 enum iommu_attr attr_type, void *data)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	int ret = 0;
+
+
+	switch(attr_type) {
+	case DOMAIN_ATTR_PAMU_STASH:
+		memcpy((struct iommu_stash_attribute *) data, &dma_domain->dma_stash,
+				 sizeof(struct iommu_stash_attribute));
+		break;
+	case DOMAIN_ATTR_PAMU_ENABLE:
+		*(int *)data = dma_domain->enabled;
+		break;
+	case DOMAIN_ATTR_FSL_PAMUV1:
+		*(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
+		break;
+	default:
+		pr_err("Unsupported attribute type\n");
+		ret = -EINVAL;
+		break;
+	};
+
+	return ret;
+}
+
+static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
+{
+	pci_dev_put(*from);
+	*from = to;
+}
+
+static struct iommu_group *get_device_iommu_group(struct device *dev)
+{
+	struct iommu_group *group;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		group = iommu_group_alloc();
+
+	return group;
+}
+
+static  bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
+{
+	u32 version;
+
+	/* Check the PCI controller version number by readding BRR1 register */
+	version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2));
+	version &= PCI_FSL_BRR1_VER;
+	/* If PCI controller version is >= 0x204 we can partition endpoints*/
+	if (version >= 0x204)
+		return 1;
+
+	return 0;
+}
+
+static int fsl_pamu_add_device(struct device *dev)
+{
+	struct iommu_group *group = NULL;
+	struct pci_dev *pdev;
+	struct pci_dev *bridge, *dma_pdev = NULL;
+	struct pci_controller *pci_ctl;
+	int ret;
+
+	/*
+	 * For platform devices we allocate a separate group for
+	 * each of the devices.
+	 */
+	if (dev->bus == &pci_bus_type) {
+		bool pci_endpt_part;
+
+		pdev = to_pci_dev(dev);
+		/* Don't create device groups for virtual PCI bridges */
+		if (pdev->subordinate)
+			return 0;
+
+		pci_ctl = pci_bus_to_host(pdev->bus);
+		pci_endpt_part = check_pci_ctl_endpt_part(pci_ctl);
+		/* We can partition PCIe devices so assign device group to the device */
+		if (pci_endpt_part) {
+			bridge = pci_find_upstream_pcie_bridge(pdev);
+			if (bridge) {
+				if (pci_is_pcie(bridge))
+					dma_pdev = pci_get_domain_bus_and_slot(
+								pci_domain_nr(pdev->bus),
+								bridge->subordinate->number, 0);
+				if (!dma_pdev)
+					dma_pdev = pci_dev_get(bridge);
+			} else
+				dma_pdev = pci_dev_get(pdev);
+			/* Account for quirked devices */
+			swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+			group = get_device_iommu_group(&pdev->dev);
+			pci_dev_put(pdev);
+			/*
+			 * PCIe controller is not a paritionable entity 
+			 * free the controller device iommu_group.
+			 */
+			if (pci_ctl->parent->iommu_group)
+				iommu_group_remove_device(pci_ctl->parent);
+		} else {
+			/*
+			 * All devices connected to the controller will share the
+			 * PCI controllers device group. If this is the first
+			 * device to be probed for the pci controller, copy the 
+			 * device group information from the PCI controller device
+			 * node and remove the PCI controller iommu group.
+			 * For subsequent devices, the iommu group information can
+			 * be obtained from sibling devices (i.e. from the bus_devices
+			 * link list).
+			 */
+			if (pci_ctl->parent->iommu_group) {
+				group = get_device_iommu_group(pci_ctl->parent);
+				iommu_group_remove_device(pci_ctl->parent);
+			} else {
+				/* check if this is the first device on the bus*/
+				if (pdev->bus_list.next == pdev->bus_list.prev) {
+					struct pci_bus *bus = pdev->bus->parent;
+					int found = 0;
+					/* Traverese the parent bus list to get
+					 * pdev & dev for the sibling device.
+					 */
+					while (bus) {
+						if (!list_empty(&bus->devices)) {
+							pdev = container_of(bus->devices.next,
+								 struct pci_dev, bus_list);
+							group = iommu_group_get(&pdev->dev);
+							/* 
+							 * All devices should be associated with
+							 * a group.
+							 */
+							if (group)
+								found = 1;
+							break;
+						} else
+							bus = bus->parent;
+					}
+					if (!found)
+						dev_err(&pdev->dev, "Failed to allocate group for the device\n");
+				} else {
+					/*
+					 * Get the pdev & dev for the sibling device
+					 */
+					pdev = container_of(pdev->bus_list.prev, struct pci_dev, bus_list);
+					group = iommu_group_get(&pdev->dev);
+				}
+			}
+		}
+	} else
+		group = get_device_iommu_group(dev);
+
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = iommu_group_add_device(group, dev);
+
+	iommu_group_put(group);
+	return ret;
+}
+
+static void fsl_pamu_remove_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
+}
+
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	/* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+	if (dma_domain->enabled) {
+		pr_err("Can't set geometry attributes as domain is active\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return  -EBUSY;
+	}
+
+	/* Ensure that the geometry has been set for the domain */
+	if (!dma_domain->geom_size) {
+		pr_err("Please configure geometry before setting the number of windows\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	/* 
+	 * Ensure we have valid window count i.e. it should be less than
+	 * maximum permissible limit and should be a power of two.
+	 */
+	if (w_count > max_subwindow_count || (w_count & (w_count - 1))) {
+		pr_err("Invalid window count\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	ret = configure_domain(dma_domain, &domain->geometry, 
+				((w_count > 1) ? w_count : 0));
+	if (!ret) {
+		if (dma_domain->win_arr)
+			kfree(dma_domain->win_arr);
+		dma_domain->win_arr = kzalloc(sizeof(struct dma_window) *
+							  w_count, GFP_KERNEL);
+		if (!dma_domain->win_arr) {
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -ENOMEM;
+		}
+		dma_domain->win_cnt = w_count;
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+	
+	return ret;
+}
+
+static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+
+	return dma_domain->win_cnt;
+}
+
+static struct iommu_ops fsl_pamu_ops = {
+	.domain_init	= fsl_pamu_domain_init,
+	.domain_destroy = fsl_pamu_domain_destroy,
+	.attach_dev	= fsl_pamu_attach_device,
+	.detach_dev	= fsl_pamu_detach_device,
+	.domain_window_enable = fsl_pamu_window_enable,
+	.domain_window_disable = fsl_pamu_window_disable,
+	.domain_get_windows = fsl_pamu_get_windows,
+	.domain_set_windows = fsl_pamu_set_windows,
+	.iova_to_phys	= fsl_pamu_iova_to_phys,
+	.domain_has_cap = fsl_pamu_domain_has_cap,
+	.domain_set_attr = fsl_pamu_set_domain_attr,
+	.domain_get_attr = fsl_pamu_get_domain_attr,
+	.add_device	= fsl_pamu_add_device,
+	.remove_device	= fsl_pamu_remove_device,
+};
+
+int pamu_domain_init()
+{
+	int ret = 0;
+
+	ret = iommu_init_mempool();
+	if (ret)
+		return ret;
+
+	bus_set_iommu(&platform_bus_type, &fsl_pamu_ops);
+	bus_set_iommu(&pci_bus_type, &fsl_pamu_ops);
+
+	return ret;
+}
diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h
new file mode 100644
index 0000000..658ffc9
--- /dev/null
+++ b/drivers/iommu/fsl_pamu_domain.h
@@ -0,0 +1,89 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_DOMAIN_H
+#define __FSL_PAMU_DOMAIN_H
+
+#include "fsl_pamu.h"
+
+struct dma_window {
+	phys_addr_t paddr;
+	u64 size;
+	int valid;
+	int prot;
+};
+
+struct fsl_dma_domain {
+	/* 
+	 * Indicates the geometry size for the domain.
+	 * This would be set when the geometry is
+	 * configured for the domain.
+	 */
+	dma_addr_t			geom_size;
+	/* 
+	 * Number of windows assocaited with this domain.
+	 * During domain initialization, it is set to the
+	 * the maximum number of subwindows allowed for a LIODN.
+	 * Minimum value for this is 1 indicating a single PAMU
+	 * window, without any sub windows. Value can be set/
+	 * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS.
+	 * Value can only be set once the geometry has been configured.
+	 */
+	u32				win_cnt;
+	/*
+	 * win_arr contains information of the configured
+	 * windows for a domain. This is allocated only
+	 * when the number of windows for the domain are
+	 * set. 
+	 */
+	struct dma_window		*win_arr;
+	/* list of devices associated with the domain */
+	struct list_head 		devices;
+	/* dma_domain states:
+	 * mapped - A particular mapping has been created
+	 * within the configured geometry.
+	 * enabled - DMA has been enabled for the given
+	 * domain. This translates to setting of the
+	 * valid bit for the primary PAACE in the PAMU
+	 * PAACT table. Domain geometry should be set and
+	 * it must have a valid mapping before DMA can be
+	 * enabled for it.
+	 *
+	 */
+	int				mapped;
+	int				enabled;
+	/* stash_id obtained from the stash attribute details */
+	u32				stash_id;
+	struct iommu_stash_attribute	dma_stash;
+	u32				snoop_id;
+	struct iommu_domain		*iommu_domain;
+	spinlock_t			domain_lock;
+};
+
+/* domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct device *dev;
+	u32 liodn;
+	struct fsl_dma_domain *domain; /* pointer to domain */
+};
+
+extern unsigned int max_liodn;
+extern unsigned int max_subwindow_count;
+
+#endif  /* __FSL_PAMU_DOMAIN_H */
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH 5/6 v8] iommu/fsl: Add addtional attributes specific to the PAMU driver.
From: Varun Sethi @ 2013-02-18 12:52 UTC (permalink / raw)
  To: iommu, linuxppc-dev, linux-kernel, scottwood, joro, stuart.yoder
  Cc: Varun Sethi
In-Reply-To: <1361191939-21260-1-git-send-email-Varun.Sethi@freescale.com>

Added the following domain attributes for the FSL PAMU driver:
1. Added new iommu stash attribute, which allows setting of the
   LIODN specific stash id parameter through IOMMU API.
2. Added an attribute for enabling/disabling DMA to a particular
   memory window.
3. Added domain attribute to check for PAMUV1 specific constraints.


Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
 include/linux/iommu.h |   33 +++++++++++++++++++++++++++++++++
 1 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 529987c..c44e38b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -40,6 +40,23 @@ struct notifier_block;
 typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
 			struct device *, unsigned long, int, void *);
 
+/* cache stash targets */
+#define IOMMU_ATTR_CACHE_L1 1
+#define IOMMU_ATTR_CACHE_L2 2
+#define IOMMU_ATTR_CACHE_L3 3
+
+/* This attribute corresponds to IOMMUs capable of generating
+ * a stash transaction. A stash transaction is typically a
+ * hardware initiated prefetch of data from memory to cache.
+ * This attribute allows configuring stashig specific parameters
+ * in the IOMMU hardware.
+ */
+
+struct iommu_stash_attribute {
+	u32 	cpu;	/* cpu number */
+	u32 	cache;	/* cache to stash to: L1,L2,L3 */
+};
+
 struct iommu_domain_geometry {
 	dma_addr_t aperture_start; /* First address that can be mapped    */
 	dma_addr_t aperture_end;   /* Last address that can be mapped     */
@@ -57,10 +74,26 @@ struct iommu_domain {
 #define IOMMU_CAP_CACHE_COHERENCY	0x1
 #define IOMMU_CAP_INTR_REMAP		0x2	/* isolates device intrs */
 
+/*
+ * Following constraints are specifc to PAMUV1:
+ *  -aperture must be power of 2, and naturally aligned
+ *  -number of windows must be power of 2, and address space size
+ *   of each window is determined by aperture size / # of windows
+ *  -the actual size of the mapped region of a window must be power
+ *   of 2 starting with 4KB and physical address must be naturally
+ *   aligned.
+ * DOMAIN_ATTR_FSL_PAMUV1 corresponds to the above mentioned contraints.
+ * The caller can invoke iommu_domain_get_attr to check if the underlying
+ * iommu implementation supports these constraints.
+ */
+
 enum iommu_attr {
 	DOMAIN_ATTR_GEOMETRY,
 	DOMAIN_ATTR_PAGING,
 	DOMAIN_ATTR_WINDOWS,
+	DOMAIN_ATTR_PAMU_STASH,
+	DOMAIN_ATTR_PAMU_ENABLE,
+	DOMAIN_ATTR_FSL_PAMUV1,
 	DOMAIN_ATTR_MAX,
 };
 
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH 3/6] powerpc/fsl_pci: Added defines for the FSL PCI controller BRR1 register.
From: Varun Sethi @ 2013-02-18 12:52 UTC (permalink / raw)
  To: iommu, linuxppc-dev, linux-kernel, scottwood, joro, stuart.yoder
  Cc: Varun Sethi
In-Reply-To: <1361191939-21260-1-git-send-email-Varun.Sethi@freescale.com>

Macros for checking FSL PCI controller version.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
 arch/powerpc/include/asm/pci-bridge.h |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 025a130..c12ed78 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -14,6 +14,10 @@

 struct device_node;

+/* FSL PCI controller BRR1 register */
+#define PCI_FSL_BRR1      0xbf8
+#define PCI_FSL_BRR1_VER 0xffff
+
 /*
  * Structure of a PCI controller (host bridge)
  */
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH 4/6] iommu/fsl: Add window permission flags for iommu_domain_window_enable API.
From: Varun Sethi @ 2013-02-18 12:52 UTC (permalink / raw)
  To: iommu, linuxppc-dev, linux-kernel, scottwood, joro, stuart.yoder
  Cc: Varun Sethi
In-Reply-To: <1361191939-21260-1-git-send-email-Varun.Sethi@freescale.com>

Each iommu window can have access permissions associated with it. Extended the
window_enable API to incorporate window access permissions.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
 drivers/iommu/iommu.c |    5 +++--
 include/linux/iommu.h |    7 ++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b972d43..1a2564d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -854,12 +854,13 @@ EXPORT_SYMBOL_GPL(iommu_unmap);
 
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
-			       phys_addr_t paddr, u64 size)
+			       phys_addr_t paddr, u64 size, int iommu_prot)
 {
 	if (unlikely(domain->ops->domain_window_enable == NULL))
 		return -ENODEV;
 
-	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size);
+	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size,
+						 iommu_prot);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ba3b8a9..529987c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -105,7 +105,7 @@ struct iommu_ops {
 
 	/* Window handling functions */
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
-				    phys_addr_t paddr, u64 size);
+				    phys_addr_t paddr, u64 size, int iommu_prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
 	/* Set the numer of window per domain */
 	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
@@ -171,7 +171,8 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 
 /* Window handling function prototypes */
 extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
-				      phys_addr_t offset, u64 size);
+				      phys_addr_t offset, u64 size,
+				      int iommu_prot);
 extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
@@ -257,7 +258,7 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 
 static inline int iommu_domain_window_enable(struct iommu_domain *domain,
 					     u32 wnd_nr, phys_addr_t paddr,
-					     u64 size)
+					     u64 size, int iommu_prot)
 {
 	return -ENODEV;
 }
-- 
1.7.4.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox