LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 02/21] net: freescale: slight optimization of addr compare
From: Ding Tianhong @ 2013-12-23  5:09 UTC (permalink / raw)
  To: Li Yang, Netdev, linux-kernel@vger.kernel.org, linuxppc-dev

Use the recently added and possibly more efficient
ether_addr_equal_unaligned to instead of memcmp.

Cc: Li Yang <leoli@freescale.com>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 5548b6d..88a1525 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -437,7 +437,7 @@ static void hw_add_addr_in_hash(struct ucc_geth_private *ugeth,
 
 static inline int compare_addr(u8 **addr1, u8 **addr2)
 {
-	return memcmp(addr1, addr2, ETH_ALEN);
+	return !ether_addr_equal_unaligned(addr1, addr2);
 }
 
 #ifdef DEBUG
-- 
1.8.0

^ permalink raw reply related

* Re: [PATCH] ibmveth: Fix more little endian issues
From: Anton Blanchard @ 2013-12-23  6:38 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Dinar Valeev, Santiago Leon, linuxppc-dev, netdev
In-Reply-To: <1387762163-39662-1-git-send-email-agraf@suse.de>


Hi Alex,

> The ibmveth driver is memcpy()'ing the mac address between a variable
> (register) and memory. This assumes a certain endianness of the
> system, so let's make that implicit assumption work again.

Nice catch! I don't like how the driver has two different methods
for creating these MAC addresses, both without comments. How does
this look?

Anton
--

The hypervisor expects MAC addresses passed in registers to be big
endian u64. Create a helper function called ibmveth_encode_mac_addr
which does the right thing in both big and little endian.

We were storing the MAC address in a long in struct ibmveth_adapter.
It's never used so remove it - we don't need another place in the
driver where we create endian issues with MAC addresses.

Reported-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Anton Blanchard <anton@samba.org>
---

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 952d795..044178b 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -523,6 +523,17 @@ retry:
 	return rc;
 }
 
+/*
+ * The hypervisor expects MAC addresses passed in registers to be
+ * big endian u64.
+ */
+static unsigned long ibmveth_encode_mac_addr(char *mac)
+{
+	unsigned long encoded = 0;
+	memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
+	return cpu_to_be64(encoded);
+}
+
 static int ibmveth_open(struct net_device *netdev)
 {
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
@@ -580,8 +591,7 @@ static int ibmveth_open(struct net_device *netdev)
 	adapter->rx_queue.num_slots = rxq_entries;
 	adapter->rx_queue.toggle = 1;
 
-	memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
-	mac_address = mac_address >> 16;
+	mac_address = ibmveth_encode_mac_addr(netdev->dev_addr);
 
 	rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
 					adapter->rx_queue.queue_len;
@@ -1184,8 +1194,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 		/* add the addresses to the filter table */
 		netdev_for_each_mc_addr(ha, netdev) {
 			/* add the multicast address to the filter table */
-			unsigned long mcast_addr = 0;
-			memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN);
+			unsigned long mcast_addr;
+			mcast_addr = ibmveth_encode_mac_addr(ha->addr);
 			lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
 						   IbmVethMcastAddFilter,
 						   mcast_addr);
@@ -1369,9 +1379,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
 	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
 
-	adapter->mac_addr = 0;
-	memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN);
-
 	netdev->irq = dev->irq;
 	netdev->netdev_ops = &ibmveth_netdev_ops;
 	netdev->ethtool_ops = &netdev_ethtool_ops;
@@ -1380,7 +1387,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	netdev->features |= netdev->hw_features;
 
-	memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
+	memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
 
 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
 		struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 84066ba..2c636cb 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -139,7 +139,6 @@ struct ibmveth_adapter {
     struct napi_struct napi;
     struct net_device_stats stats;
     unsigned int mcastFilterSize;
-    unsigned long mac_addr;
     void * buffer_list_addr;
     void * filter_list_addr;
     dma_addr_t buffer_list_dma;

^ permalink raw reply related

* Re: [PATCH] ibmveth: Fix more little endian issues
From: Alexander Graf @ 2013-12-23 10:17 UTC (permalink / raw)
  To: Anton Blanchard; +Cc: Dinar Valeev, Santiago Leon, linuxppc-dev, netdev
In-Reply-To: <20131223173833.0a9a6705@kryten>


On 23.12.2013, at 07:38, Anton Blanchard <anton@samba.org> wrote:

>=20
> Hi Alex,
>=20
>> The ibmveth driver is memcpy()'ing the mac address between a variable
>> (register) and memory. This assumes a certain endianness of the
>> system, so let's make that implicit assumption work again.
>=20
> Nice catch! I don't like how the driver has two different methods
> for creating these MAC addresses, both without comments. How does
> this look?

Heh - I didn't even realize those two places were doing the same thing.

Obviously your patch is by far nicer.

Reviewed-by: Alexander Graf <agraf@suse.de>


Alex

>=20
> Anton
> --
>=20
> The hypervisor expects MAC addresses passed in registers to be big
> endian u64. Create a helper function called ibmveth_encode_mac_addr
> which does the right thing in both big and little endian.
>=20
> We were storing the MAC address in a long in struct ibmveth_adapter.
> It's never used so remove it - we don't need another place in the
> driver where we create endian issues with MAC addresses.
>=20
> Reported-by: Alexander Graf <agraf@suse.de>
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
>=20
> diff --git a/drivers/net/ethernet/ibm/ibmveth.c =
b/drivers/net/ethernet/ibm/ibmveth.c
> index 952d795..044178b 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.c
> +++ b/drivers/net/ethernet/ibm/ibmveth.c
> @@ -523,6 +523,17 @@ retry:
> 	return rc;
> }
>=20
> +/*
> + * The hypervisor expects MAC addresses passed in registers to be
> + * big endian u64.
> + */
> +static unsigned long ibmveth_encode_mac_addr(char *mac)
> +{
> +	unsigned long encoded =3D 0;
> +	memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
> +	return cpu_to_be64(encoded);
> +}
> +
> static int ibmveth_open(struct net_device *netdev)
> {
> 	struct ibmveth_adapter *adapter =3D netdev_priv(netdev);
> @@ -580,8 +591,7 @@ static int ibmveth_open(struct net_device *netdev)
> 	adapter->rx_queue.num_slots =3D rxq_entries;
> 	adapter->rx_queue.toggle =3D 1;
>=20
> -	memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
> -	mac_address =3D mac_address >> 16;
> +	mac_address =3D ibmveth_encode_mac_addr(netdev->dev_addr);
>=20
> 	rxq_desc.fields.flags_len =3D IBMVETH_BUF_VALID |
> 					adapter->rx_queue.queue_len;
> @@ -1184,8 +1194,8 @@ static void ibmveth_set_multicast_list(struct =
net_device *netdev)
> 		/* add the addresses to the filter table */
> 		netdev_for_each_mc_addr(ha, netdev) {
> 			/* add the multicast address to the filter table =
*/
> -			unsigned long mcast_addr =3D 0;
> -			memcpy(((char *)&mcast_addr)+2, ha->addr, =
ETH_ALEN);
> +			unsigned long mcast_addr;
> +			mcast_addr =3D =
ibmveth_encode_mac_addr(ha->addr);
> 			lpar_rc =3D =
h_multicast_ctrl(adapter->vdev->unit_address,
> 						   =
IbmVethMcastAddFilter,
> 						   mcast_addr);
> @@ -1369,9 +1379,6 @@ static int ibmveth_probe(struct vio_dev *dev, =
const struct vio_device_id *id)
>=20
> 	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
>=20
> -	adapter->mac_addr =3D 0;
> -	memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN);
> -
> 	netdev->irq =3D dev->irq;
> 	netdev->netdev_ops =3D &ibmveth_netdev_ops;
> 	netdev->ethtool_ops =3D &netdev_ethtool_ops;
> @@ -1380,7 +1387,7 @@ static int ibmveth_probe(struct vio_dev *dev, =
const struct vio_device_id *id)
> 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
> 	netdev->features |=3D netdev->hw_features;
>=20
> -	memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
> +	memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
>=20
> 	for (i =3D 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
> 		struct kobject *kobj =3D &adapter->rx_buff_pool[i].kobj;
> diff --git a/drivers/net/ethernet/ibm/ibmveth.h =
b/drivers/net/ethernet/ibm/ibmveth.h
> index 84066ba..2c636cb 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.h
> +++ b/drivers/net/ethernet/ibm/ibmveth.h
> @@ -139,7 +139,6 @@ struct ibmveth_adapter {
>     struct napi_struct napi;
>     struct net_device_stats stats;
>     unsigned int mcastFilterSize;
> -    unsigned long mac_addr;
>     void * buffer_list_addr;
>     void * filter_list_addr;
>     dma_addr_t buffer_list_dma;

^ permalink raw reply

* [PATCH 1/3] powerpc/xmon: Don't loop forever in get_output_lock()
From: Michael Ellerman @ 2013-12-23 12:46 UTC (permalink / raw)
  To: linuxppc-dev

From: Michael Ellerman <michael@ellerman.id.au>

If we enter with xmon_speaker != 0 we skip the first cmpxchg(), we also
skip the while loop because xmon_speaker != last_speaker (0) - meaning we
skip the second cmpxchg() also.

Following that code path the compiler sees no memory barriers and so is
within its rights to never reload xmon_speaker. The end result is we loop
forever.

This manifests as all cpus being in xmon ('c' command), but they refuse
to take control when you switch to them ('c x' for cpu # x).

I have seen this deadlock in practice and also checked the generated code to
confirm this is what's happening.

The simplest fix is just to always try the cmpxchg().

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index af9d346..500105c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -309,12 +309,12 @@ static void get_output_lock(void)
 
 	if (xmon_speaker == me)
 		return;
+
 	for (;;) {
-		if (xmon_speaker == 0) {
-			last_speaker = cmpxchg(&xmon_speaker, 0, me);
-			if (last_speaker == 0)
-				return;
-		}
+		last_speaker = cmpxchg(&xmon_speaker, 0, me);
+		if (last_speaker == 0)
+			return;
+
 		timeout = 10000000;
 		while (xmon_speaker == last_speaker) {
 			if (--timeout > 0)
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 2/3] powerpc/xmon: Fix timeout loop in get_output_lock()
From: Michael Ellerman @ 2013-12-23 12:46 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1387802766-7199-1-git-send-email-mpe@ellerman.id.au>

As far as I can tell, our 70s era timeout loop in get_output_lock() is
generating no code.

This leads to the hostile takeover happening more or less simultaneously
on all cpus. The result is "interesting", some example output that is
more readable than most:

    cpu 0x1: Vector: 100 (Scypsut e0mx bR:e setV)e catto xc0p:u[ c 00
    c0:0  000t0o0V0erc0td:o5 rfc28050000]0c00 0 0  0 6t(pSrycsV1ppuot
    uxe 1m 2 0Rx21e3:0s0ce000c00000t00)00 60602oV2SerucSayt0y 0p 1sxs

Fix it by using udelay() in the timeout loop. The wait time and check
frequency are arbitrary, but seem to work OK. We already rely on
udelay() working so this is not a new dependency.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 500105c..051037e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -315,10 +315,17 @@ static void get_output_lock(void)
 		if (last_speaker == 0)
 			return;
 
-		timeout = 10000000;
+		/*
+		 * Wait a full second for the lock, we might be on a slow
+		 * console, but check every 100us.
+		 */
+		timeout = 10000;
 		while (xmon_speaker == last_speaker) {
-			if (--timeout > 0)
+			if (--timeout > 0) {
+				udelay(100);
 				continue;
+			}
+
 			/* hostile takeover */
 			prev = cmpxchg(&xmon_speaker, last_speaker, me);
 			if (prev == last_speaker)
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 3/3] powerpc/xmon: Don't signal we've entered until we're finished printing
From: Michael Ellerman @ 2013-12-23 12:46 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <1387802766-7199-1-git-send-email-mpe@ellerman.id.au>

Currently we set our cpu's bit in cpus_in_xmon, and then we take the
output lock and print the exception information.

This can race with the master cpu entering the command loop and printing
the backtrace. The result is that the backtrace gets garbled with
another cpu's exception print out.

Fix it by delaying the set of cpus_in_xmon until we are finished
printing.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 051037e..b59f44f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -404,7 +404,6 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	}
 
 	xmon_fault_jmp[cpu] = recurse_jmp;
-	cpumask_set_cpu(cpu, &cpus_in_xmon);
 
 	bp = NULL;
 	if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
@@ -426,6 +425,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 		release_output_lock();
 	}
 
+	cpumask_set_cpu(cpu, &cpus_in_xmon);
+
  waiting:
 	secondary = 1;
 	while (secondary && !xmon_gate) {
-- 
1.8.3.2

^ permalink raw reply related

* Re: [PATCH] ibmveth: Fix more little endian issues
From: Joe Perches @ 2013-12-23 14:52 UTC (permalink / raw)
  To: Anton Blanchard
  Cc: Dinar Valeev, linuxppc-dev, Alexander Graf, netdev, Santiago Leon
In-Reply-To: <20131223173833.0a9a6705@kryten>

On Mon, 2013-12-23 at 17:38 +1100, Anton Blanchard wrote:
> The hypervisor expects MAC addresses passed in registers to be big
> endian u64.

So maybe use __be64 declarations?

> +static unsigned long ibmveth_encode_mac_addr(char *mac)

static __be64 ibmveth_encode_mac_addr(const char *mac)

?

etc...

^ permalink raw reply

* Re: [PATCH] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
From: Anton Blanchard @ 2013-12-24  1:02 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: paulmck, paulus, linuxppc-dev
In-Reply-To: <1387459057.1305.1.camel@concordia>

Hi Michael,

> > To try and catch any screw ups in our ppc64 memcpy and
> > copy_tofrom_user loops, I wrote a quick test:
> > 
> > http://ozlabs.org/~anton/junkcode/validate_kernel_copyloops.tar.gz
> 
> Nice! How's this look?

Love it!

At the moment my other copy_to/from_user tests run against the kernel
(testing we copy all data right up to a page fault and that we return
the correct number of bytes not copied etc). A small signal handler
that walks the exception entries and branches to the handler should be
all it takes to do it completely in userspace.

Anton

> 
> cheers
> 
> 
> selftests: Import Anton's memcpy / copy_tofrom_user tests
> 
> Turn Anton's memcpy / copy_tofrom_user test into something that can
> live in tools/testing/selftests.
> 
> It requires one turd in arch/powerpc/lib/memcpy_64.S, but it's pretty
> harmless IMHO.
> 
> We are sailing very close to the wind with the feature macros. We
> define them to nothing, which currently means we get a few extra nops
> and include the unaligned calls.
> 
> ---
>  arch/powerpc/lib/memcpy_64.S                       |  2 +
>  tools/testing/selftests/powerpc/Makefile           |  2 +-
>  tools/testing/selftests/powerpc/copyloops/Makefile | 29 +++++++
>  .../selftests/powerpc/copyloops/asm/ppc_asm.h      | 86
> +++++++++++++++++++ .../selftests/powerpc/copyloops/asm/processor.h
> |  0 .../selftests/powerpc/copyloops/copyuser_64.S      |  1 +
>  .../selftests/powerpc/copyloops/copyuser_power7.S  |  1 +
>  .../selftests/powerpc/copyloops/memcpy_64.S        |  1 +
>  .../selftests/powerpc/copyloops/memcpy_power7.S    |  1 +
>  .../testing/selftests/powerpc/copyloops/validate.c | 99
> ++++++++++++++++++++++
> tools/testing/selftests/powerpc/utils.h            |  3 + 11 files
> changed, 224 insertions(+), 1 deletion(-) create mode 100644
> tools/testing/selftests/powerpc/copyloops/Makefile create mode 100644
> tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h create mode
> 100644 tools/testing/selftests/powerpc/copyloops/asm/processor.h
> create mode 120000
> tools/testing/selftests/powerpc/copyloops/copyuser_64.S create mode
> 120000 tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
> create mode 120000
> tools/testing/selftests/powerpc/copyloops/memcpy_64.S create mode
> 120000 tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
> create mode 100644
> tools/testing/selftests/powerpc/copyloops/validate.c
> 
> diff --git a/arch/powerpc/lib/memcpy_64.S
> b/arch/powerpc/lib/memcpy_64.S index d2bbbc8..72ad055 100644
> --- a/arch/powerpc/lib/memcpy_64.S
> +++ b/arch/powerpc/lib/memcpy_64.S
> @@ -14,7 +14,9 @@ _GLOBAL(memcpy)
>  BEGIN_FTR_SECTION
>  	std	r3,48(r1)	/* save destination pointer for
> return value */ FTR_SECTION_ELSE
> +#ifndef SELFTEST
>  	b	memcpy_power7
> +#endif
>  ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
>  	PPC_MTOCRF(0x01,r5)
>  	cmpldi	cr1,r5,16
> diff --git a/tools/testing/selftests/powerpc/Makefile
> b/tools/testing/selftests/powerpc/Makefile index bd24ae5..316194f
> 100644 --- a/tools/testing/selftests/powerpc/Makefile
> +++ b/tools/testing/selftests/powerpc/Makefile
> @@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror
> -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR 
>  export CC CFLAGS
>  
> -TARGETS = pmu
> +TARGETS = pmu copyloops
>  
>  endif
>  
> diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile
> b/tools/testing/selftests/powerpc/copyloops/Makefile new file mode
> 100644 index 0000000..6f2d3be
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/Makefile
> @@ -0,0 +1,29 @@
> +# The loops are all 64-bit code
> +CFLAGS += -m64
> +CFLAGS += -I$(CURDIR)
> +CFLAGS += -D SELFTEST
> +
> +# Use our CFLAGS for the implicit .S rule
> +ASFLAGS = $(CFLAGS)
> +
> +PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
> +EXTRA_SOURCES := validate.c ../harness.c
> +
> +all: $(PROGS)
> +
> +copyuser_64:     CPPFLAGS += -D
> COPY_LOOP=test___copy_tofrom_user_base +copyuser_power7: CPPFLAGS +=
> -D COPY_LOOP=test___copy_tofrom_user_power7 +memcpy_64:
> CPPFLAGS += -D COPY_LOOP=test_memcpy +memcpy_power7:   CPPFLAGS += -D
> COPY_LOOP=test_memcpy_power7 +
> +$(PROGS): $(EXTRA_SOURCES)
> +
> +run_tests: all
> +	@-for PROG in $(PROGS); do \
> +		./$$PROG; \
> +	done;
> +
> +clean:
> +	rm -f $(PROGS) *.o
> +
> +.PHONY: all run_tests clean
> diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
> b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h new file
> mode 100644 index 0000000..ccd9c84
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
> @@ -0,0 +1,86 @@
> +#include <ppc-asm.h>
> +
> +#define CONFIG_ALTIVEC
> +
> +#define r1	1
> +
> +#define vr0     0
> +#define vr1     1
> +#define vr2     2
> +#define vr3     3
> +#define vr4     4
> +#define vr5     5
> +#define vr6     6
> +#define vr7     7
> +#define vr8     8
> +#define vr9     9
> +#define vr10    10
> +#define vr11    11
> +#define vr12    12
> +#define vr13    13
> +#define vr14    14
> +#define vr15    15
> +#define vr16    16
> +#define vr17    17
> +#define vr18    18
> +#define vr19    19
> +#define vr20    20
> +#define vr21    21
> +#define vr22    22
> +#define vr23    23
> +#define vr24    24
> +#define vr25    25
> +#define vr26    26
> +#define vr27    27
> +#define vr28    28
> +#define vr29    29
> +#define vr30    30
> +#define vr31    31
> +
> +#define R14 r14
> +#define R15 r15
> +#define R16 r16
> +#define R17 r17
> +#define R18 r18
> +#define R19 r19
> +#define R20 r20
> +#define R21 r21
> +#define R22 r22
> +
> +#define STACKFRAMESIZE	256
> +#define STK_PARAM(i)	(48 + ((i)-3)*8)
> +#define STK_REG(i)	(112 + ((i)-14)*8)
> +
> +#define _GLOBAL(A) FUNC_START(test_ ## A)
> +
> +#define PPC_MTOCRF(A, B)	mtocrf A, B
> +
> +FUNC_START(enter_vmx_usercopy)
> +	li	r3,1
> +	blr
> +
> +FUNC_START(exit_vmx_usercopy)
> +	li	r3,0
> +	blr
> +
> +FUNC_START(enter_vmx_copy)
> +	li	r3,1
> +	blr
> +
> +FUNC_START(exit_vmx_copy)
> +	blr
> +
> +FUNC_START(memcpy_power7)
> +	blr
> +
> +FUNC_START(__copy_tofrom_user_power7)
> +	blr
> +
> +FUNC_START(__copy_tofrom_user_base)
> +	blr
> +
> +#define BEGIN_FTR_SECTION
> +#define FTR_SECTION_ELSE
> +#define ALT_FTR_SECTION_END_IFCLR(x)
> +#define ALT_FTR_SECTION_END(x, y)
> +#define END_FTR_SECTION_IFCLR(x)
> diff --git
> a/tools/testing/selftests/powerpc/copyloops/asm/processor.h
> b/tools/testing/selftests/powerpc/copyloops/asm/processor.h new file
> mode 100644 index 0000000..e69de29 diff --git
> a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
> b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S new file
> mode 120000 index 0000000..f1c418a --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/copyuser_64.S
> \ No newline at end of file
> diff --git
> a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
> b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S new
> file mode 120000 index 0000000..4786895 --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/copyuser_power7.S
> \ No newline at end of file
> diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
> b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S new file mode
> 120000 index 0000000..cce33fb
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/memcpy_64.S
> \ No newline at end of file
> diff --git
> a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
> b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S new file
> mode 120000 index 0000000..0d6fbfa --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/memcpy_power7.S
> \ No newline at end of file
> diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c
> b/tools/testing/selftests/powerpc/copyloops/validate.c new file mode
> 100644 index 0000000..1750ff5
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/validate.c
> @@ -0,0 +1,99 @@
> +#include <malloc.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +
> +#include "../utils.h"
> +
> +#define MAX_LEN 8192
> +#define MAX_OFFSET 16
> +#define MIN_REDZONE 128
> +#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
> +#define POISON 0xa5
> +
> +unsigned long COPY_LOOP(void *to, const void *from, unsigned long
> size); +
> +static void do_one(char *src, char *dst, unsigned long src_off,
> +		   unsigned long dst_off, unsigned long len, void
> *redzone,
> +		   void *fill)
> +{
> +	char *srcp, *dstp;
> +	unsigned long ret;
> +	unsigned long i;
> +
> +	srcp = src + MIN_REDZONE + src_off;
> +	dstp = dst + MIN_REDZONE + dst_off;
> +
> +	memset(src, POISON, BUFLEN);
> +	memset(dst, POISON, BUFLEN);
> +	memcpy(srcp, fill, len);
> +
> +	ret = COPY_LOOP(dstp, srcp, len);
> +	if (ret && ret != (unsigned long)dstp) {
> +		printf("(%p,%p,%ld) returned %ld\n", dstp, srcp,
> len, ret);
> +		abort();
> +	}
> +
> +	if (memcmp(dstp, srcp, len)) {
> +		printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
> +		printf("src: ");
> +		for (i = 0; i < len; i++)
> +			printf("%02x ", srcp[i]);
> +		printf("\ndst: ");
> +		for (i = 0; i < len; i++)
> +			printf("%02x ", dstp[i]);
> +		printf("\n");
> +		abort();
> +	}
> +
> +	if (memcmp(dst, redzone, dstp - dst)) {
> +		printf("(%p,%p,%ld) redzone before corrupted\n",
> +		       dstp, srcp, len);
> +		abort();
> +	}
> +
> +	if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
> +		printf("(%p,%p,%ld) redzone after corrupted\n",
> +		       dstp, srcp, len);
> +		abort();
> +	}
> +}
> +
> +int test_copy_loop(void)
> +{
> +	char *src, *dst, *redzone, *fill;
> +	unsigned long len, src_off, dst_off;
> +	unsigned long i;
> +
> +	src = memalign(BUFLEN, BUFLEN);
> +	dst = memalign(BUFLEN, BUFLEN);
> +	redzone = malloc(BUFLEN);
> +	fill = malloc(BUFLEN);
> +
> +	if (!src || !dst || !redzone || !fill) {
> +		fprintf(stderr, "malloc failed\n");
> +		exit(1);
> +	}
> +
> +	memset(redzone, POISON, BUFLEN);
> +
> +	/* Fill with sequential bytes */
> +	for (i = 0; i < BUFLEN; i++)
> +		fill[i] = i & 0xff;
> +
> +	for (len = 1; len < MAX_LEN; len++) {
> +		for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
> +			for (dst_off = 0; dst_off < MAX_OFFSET;
> dst_off++) {
> +				do_one(src, dst, src_off, dst_off,
> len,
> +				       redzone, fill);
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int main(void)
> +{
> +	return test_harness(test_copy_loop, str(COPY_LOOP));
> +}
> diff --git a/tools/testing/selftests/powerpc/utils.h
> b/tools/testing/selftests/powerpc/utils.h index 5851c4b..0de0644
> 100644 --- a/tools/testing/selftests/powerpc/utils.h
> +++ b/tools/testing/selftests/powerpc/utils.h
> @@ -31,4 +31,7 @@ do
> {
> \ }							\ } while
> (0) 
> +#define _str(s) #s
> +#define str(s) _str(s)
> +
>  #endif /* _SELFTESTS_POWERPC_UTILS_H */

^ permalink raw reply

* [PATCH] ibmveth: Fix more little endian issues
From: Anton Blanchard @ 2013-12-24  1:55 UTC (permalink / raw)
  To: Joe Perches
  Cc: Dinar Valeev, linuxppc-dev, Alexander Graf, netdev, Santiago Leon
In-Reply-To: <1387810329.22671.66.camel@joe-AO722>


The hypervisor expects MAC addresses passed in registers to be big
endian u64. Create a helper function called ibmveth_encode_mac_addr
which does the right thing in both big and little endian.

We were storing the MAC address in a long in struct ibmveth_adapter.
It's never used so remove it - we don't need another place in the
driver where we create endian issues with MAC addresses.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Alexander Graf <agraf@suse.de>
---

v2: annotate with __be64 as suggested by Joe

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 952d795..bb9a631 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -497,7 +497,7 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
 }
 
 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
-        union ibmveth_buf_desc rxq_desc, u64 mac_address)
+			union ibmveth_buf_desc rxq_desc, __be64 mac_address)
 {
 	int rc, try_again = 1;
 
@@ -523,10 +523,20 @@ retry:
 	return rc;
 }
 
+/* The hypervisor expects MAC addresses passed in registers to be
+ * big endian u64.
+ */
+static __be64 ibmveth_encode_mac_addr(char *mac)
+{
+	unsigned long encoded = 0;
+	memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
+	return cpu_to_be64(encoded);
+}
+
 static int ibmveth_open(struct net_device *netdev)
 {
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
-	u64 mac_address = 0;
+	__be64 mac_address = 0;
 	int rxq_entries = 1;
 	unsigned long lpar_rc;
 	int rc;
@@ -580,8 +590,7 @@ static int ibmveth_open(struct net_device *netdev)
 	adapter->rx_queue.num_slots = rxq_entries;
 	adapter->rx_queue.toggle = 1;
 
-	memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
-	mac_address = mac_address >> 16;
+	mac_address = ibmveth_encode_mac_addr(netdev->dev_addr);
 
 	rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
 					adapter->rx_queue.queue_len;
@@ -1184,8 +1193,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 		/* add the addresses to the filter table */
 		netdev_for_each_mc_addr(ha, netdev) {
 			/* add the multicast address to the filter table */
-			unsigned long mcast_addr = 0;
-			memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN);
+			__be64 mcast_addr;
+			mcast_addr = ibmveth_encode_mac_addr(ha->addr);
 			lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
 						   IbmVethMcastAddFilter,
 						   mcast_addr);
@@ -1369,9 +1378,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
 	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
 
-	adapter->mac_addr = 0;
-	memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN);
-
 	netdev->irq = dev->irq;
 	netdev->netdev_ops = &ibmveth_netdev_ops;
 	netdev->ethtool_ops = &netdev_ethtool_ops;
@@ -1380,7 +1386,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	netdev->features |= netdev->hw_features;
 
-	memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
+	memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
 
 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
 		struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 84066ba..2c636cb 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -139,7 +139,6 @@ struct ibmveth_adapter {
     struct napi_struct napi;
     struct net_device_stats stats;
     unsigned int mcastFilterSize;
-    unsigned long mac_addr;
     void * buffer_list_addr;
     void * filter_list_addr;
     dma_addr_t buffer_list_dma;

^ permalink raw reply related

* Re: [PATCH V4 08/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
From: Michael Ellerman @ 2013-12-24  3:29 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: mikey, ak, linux-kernel, eranian, linuxppc-dev, acme, sukadev,
	mingo
In-Reply-To: <52B42394.4060705@linux.vnet.ibm.com>

On Fri, 2013-12-20 at 16:31 +0530, Anshuman Khandual wrote:
> On 12/09/2013 11:51 AM, Michael Ellerman wrote:
> > On Wed, 2013-04-12 at 10:32:40 UTC, Anshuman Khandual wrote:
> >> +
> >> +	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL) {
> >> +		/* XL-form instruction */
> >> +		if (instr_is_branch_xlform(*addr)) {
> >> +
> >> +			/* LR should be set */
> >> +			if (is_branch_link_set(*addr)) {
> >> +				/*
> >> +			 	 * Conditional and unconditional
> >> +			 	 * branch to CTR.
> >> +			 	 */
> >> +				if (is_xlform_ctr(*addr))
> >> +					result = true;
> >> +
> >> +				/*
> >> +			 	 * Conditional and unconditional
> >> +			 	 * branch to LR.
> >> +			 	 */
> >> +				if (is_xlform_lr(*addr))
> >> +					result = true;
> >> +
> >> +				/*
> >> +			 	 * Conditional and unconditional
> >> +			 	 * branch to TAR.
> >> +			 	 */
> >> +				if (is_xlform_tar(*addr))
> >> +					result = true;
> > 
> > What other kind of XL-Form branch is there?
> 
> I am not sure. Do you know of any ?

That was my point. There are no other types, so you can just do:

	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL)
		if (instr_is_branch_xlform(*addr) && is_branch_link_set(*addr))
			return true;

> >> +	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_COND) {
> >> +
> >> +		/* I-form instruction - excluded */
> >> +		if (instr_is_branch_iform(*addr))
> >> +			goto out;
> >> +
> >> +		/* B-form or XL-form instruction */
> >> +		if (instr_is_branch_bform(*addr) || instr_is_branch_xlform(*addr))  {
> >> +
> >> +			/* Not branch always  */
> >> +			if (!is_bo_always(*addr)) {
> >> +
> >> +				/* Conditional branch to CTR register */
> >> +				if (is_bo_ctr(*addr))
> >> +					goto out;
> > 
> > We might have discussed this but why not?
> 
> Did not get that, discuss what ?

Why are we saying a conditional branch to the CTR is not a conditional branch?

It is conditional, so I think it should be included.

> >> +
> >> +				/* CR[BI] conditional branch with static hint */
> > 
> > A conditional branch with a static hint is still a conditional branch?
> 
> No its not. 

Yes it is?

In fact they could be very interesting branches. Because the compiler or
programmer has statically hinted them, if the hint is wrong they may be a major
source of branch midpredicts.


> >> +				if (is_bo_crbi_off(*addr) || is_bo_crbi_on(*addr)) {
> >> +					if (is_bo_crbi_hint(*addr))
> >> +						goto out;
> >> +				}
> >> +
> >> +				result = true;
> >> +			}
> >> +		}
> >> +	}
> >> +out:
> >> +	return result;
> >> +}
 
> >> +	} else {
> >> +		/*
> >> +		 * Userspace address needs to be
> >> +		 * copied first before analysis.
> >> +		 */
> >> +		pagefault_disable();
> >> +		ret =  __get_user_inatomic(instr, (unsigned int __user *)addr);
> > 
> > I suspect you borrowed this incantation from the callchain code. Unlike that
> > code you don't fallback to reading the page tables directly.
> > 
> > I'd rather see the accessor in the callchain code made generic and have you
> > call it here.
> 
> You have mentioned to take care of this issue yourself.

Yes I will.

cheers

^ permalink raw reply

* Re: [PATCH] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
From: Michael Ellerman @ 2013-12-24  3:34 UTC (permalink / raw)
  To: Anton Blanchard; +Cc: paulmck, paulus, linuxppc-dev
In-Reply-To: <20131224120259.454bc44c@kryten>

On Tue, 2013-12-24 at 12:02 +1100, Anton Blanchard wrote:
> Hi Michael,
> 
> > > To try and catch any screw ups in our ppc64 memcpy and
> > > copy_tofrom_user loops, I wrote a quick test:
> > > 
> > > http://ozlabs.org/~anton/junkcode/validate_kernel_copyloops.tar.gz
> > 
> > Nice! How's this look?
> 
> Love it!

Cool, I'll add your Signed-off-by and resubmit.

> At the moment my other copy_to/from_user tests run against the kernel
> (testing we copy all data right up to a page fault and that we return
> the correct number of bytes not copied etc). A small signal handler
> that walks the exception entries and branches to the handler should be
> all it takes to do it completely in userspace.

That'd be nice. Are they in your junkcode? I couldn't spot them at a glance.

cheers

^ permalink raw reply

* Re: [PATCH V4 08/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
From: Anshuman Khandual @ 2013-12-24  3:50 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: mikey, ak, linux-kernel, eranian, linuxppc-dev, acme, sukadev,
	mingo
In-Reply-To: <1387855790.15093.1.camel@concordia>

On 12/24/2013 08:59 AM, Michael Ellerman wrote:
> On Fri, 2013-12-20 at 16:31 +0530, Anshuman Khandual wrote:
>> On 12/09/2013 11:51 AM, Michael Ellerman wrote:
>>> On Wed, 2013-04-12 at 10:32:40 UTC, Anshuman Khandual wrote:
>>>> +
>>>> +	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL) {
>>>> +		/* XL-form instruction */
>>>> +		if (instr_is_branch_xlform(*addr)) {
>>>> +
>>>> +			/* LR should be set */
>>>> +			if (is_branch_link_set(*addr)) {
>>>> +				/*
>>>> +			 	 * Conditional and unconditional
>>>> +			 	 * branch to CTR.
>>>> +			 	 */
>>>> +				if (is_xlform_ctr(*addr))
>>>> +					result = true;
>>>> +
>>>> +				/*
>>>> +			 	 * Conditional and unconditional
>>>> +			 	 * branch to LR.
>>>> +			 	 */
>>>> +				if (is_xlform_lr(*addr))
>>>> +					result = true;
>>>> +
>>>> +				/*
>>>> +			 	 * Conditional and unconditional
>>>> +			 	 * branch to TAR.
>>>> +			 	 */
>>>> +				if (is_xlform_tar(*addr))
>>>> +					result = true;
>>>
>>> What other kind of XL-Form branch is there?
>>
>> I am not sure. Do you know of any ?
> 
> That was my point. There are no other types, so you can just do:
> 
> 	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL)
> 		if (instr_is_branch_xlform(*addr) && is_branch_link_set(*addr))
> 			return true;
> 

Done

>>>> +	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_COND) {
>>>> +
>>>> +		/* I-form instruction - excluded */
>>>> +		if (instr_is_branch_iform(*addr))
>>>> +			goto out;
>>>> +
>>>> +		/* B-form or XL-form instruction */
>>>> +		if (instr_is_branch_bform(*addr) || instr_is_branch_xlform(*addr))  {
>>>> +
>>>> +			/* Not branch always  */
>>>> +			if (!is_bo_always(*addr)) {
>>>> +
>>>> +				/* Conditional branch to CTR register */
>>>> +				if (is_bo_ctr(*addr))
>>>> +					goto out;
>>>
>>> We might have discussed this but why not?
>>
>> Did not get that, discuss what ?
> 
> Why are we saying a conditional branch to the CTR is not a conditional branch?
> 
> It is conditional, so I think it should be included.
> 

I believe conditional branch to CTR register and the below conditional branch
with static hint are excluded when processed with BHRB PMU based filter IFM3,
Here the SW implemented filter try to match those exclusions, so that a user
should not see any difference in results whether the filter is processed
either in PMU or in SW.

>>>> +
>>>> +				/* CR[BI] conditional branch with static hint */
>>>
>>> A conditional branch with a static hint is still a conditional branch?
>>
>> No its not. 
> 
> Yes it is?
> 
> In fact they could be very interesting branches. Because the compiler or
> programmer has statically hinted them, if the hint is wrong they may be a major
> source of branch midpredicts.
> 
> 
>>>> +				if (is_bo_crbi_off(*addr) || is_bo_crbi_on(*addr)) {
>>>> +					if (is_bo_crbi_hint(*addr))
>>>> +						goto out;
>>>> +				}
>>>> +
>>>> +				result = true;
>>>> +			}
>>>> +		}
>>>> +	}
>>>> +out:
>>>> +	return result;
>>>> +}
> 
>>>> +	} else {
>>>> +		/*
>>>> +		 * Userspace address needs to be
>>>> +		 * copied first before analysis.
>>>> +		 */
>>>> +		pagefault_disable();
>>>> +		ret =  __get_user_inatomic(instr, (unsigned int __user *)addr);
>>>
>>> I suspect you borrowed this incantation from the callchain code. Unlike that
>>> code you don't fallback to reading the page tables directly.
>>>
>>> I'd rather see the accessor in the callchain code made generic and have you
>>> call it here.
>>
>> You have mentioned to take care of this issue yourself.
> 
> Yes I will.

Thanks !!

^ permalink raw reply

* Re: [PATCH V4 08/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
From: Michael Ellerman @ 2013-12-24  4:35 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: mikey, ak, linux-kernel, eranian, linuxppc-dev, acme, sukadev,
	mingo
In-Reply-To: <52B9049D.4020403@linux.vnet.ibm.com>

On Tue, 2013-12-24 at 09:20 +0530, Anshuman Khandual wrote:
> On 12/24/2013 08:59 AM, Michael Ellerman wrote:
> > On Fri, 2013-12-20 at 16:31 +0530, Anshuman Khandual wrote:
> >> On 12/09/2013 11:51 AM, Michael Ellerman wrote:
> >>> On Wed, 2013-04-12 at 10:32:40 UTC, Anshuman Khandual wrote:
> >>>> +
> 
> >>>> +	if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_COND) {
> >>>> +
> >>>> +		/* I-form instruction - excluded */
> >>>> +		if (instr_is_branch_iform(*addr))
> >>>> +			goto out;
> >>>> +
> >>>> +		/* B-form or XL-form instruction */
> >>>> +		if (instr_is_branch_bform(*addr) || instr_is_branch_xlform(*addr))  {
> >>>> +
> >>>> +			/* Not branch always  */
> >>>> +			if (!is_bo_always(*addr)) {
> >>>> +
> >>>> +				/* Conditional branch to CTR register */
> >>>> +				if (is_bo_ctr(*addr))
> >>>> +					goto out;
> >>>
> >>> We might have discussed this but why not?
> >>
> >> Did not get that, discuss what ?
> > 
> > Why are we saying a conditional branch to the CTR is not a conditional branch?
> > 
> > It is conditional, so I think it should be included.

> I believe conditional branch to CTR register and the below conditional branch
> with static hint are excluded when processed with BHRB PMU based filter IFM3,
> Here the SW implemented filter try to match those exclusions, so that a user
> should not see any difference in results whether the filter is processed
> either in PMU or in SW.

OK. That's what I meant by "we might have discussed this".

So you need to make it very clear in the code that we are implementing the IFM3
semantics, with a comment. Otherwise it's not obviously clear why those
semantics make sense.

And we need to make extra sure we implement the same semantics as IFM3, which I
don't think you do at the moment.

The description for IFM3 is:

   Do not record:
    * b and bl instructions, 
    * bc and bcl instructions for which the BO field indicates “Branch always.”
   
   For bclr, bclrl, bctr, bctrl, bctar, and bctarl instructions for which
   the BO field indicates “Branch always,” record only one entry
   containing the Branch target address.

So I don't think your SW filter implements that part correctly. You are
discarding all branches with "branch always" set.


   Do not record:
    * Branch instructions for which BO[0]=1, 

This is what excludes branches to CTR. But, it's only branches to CTR that
don't also depend on CR[BI] - we need to make that clear in the code.

    * Branch instructions for which the “a” bit in the BO field is set to 1.

So that's the is_bo_crbi_hint() check and rejection, but it's not related to
CR[BI] at all.

There's a note about CR[BI]:

    Do not record instructions that do not depend on the value of CR[BI].

But I think you've misinterpreted that. 

    Do not record instructions that do not depend on the value of CR[BI].

    Do     record instructions that        depend on the value of CR[BI].


In fact the only branches that don't depend on CR[BI] are "branch always"
branches, and branches with BO[0]=1, both of which were handled above.

cheers

^ permalink raw reply

* Re: [PATCH] ibmveth: Fix more little endian issues
From: Benjamin Herrenschmidt @ 2013-12-24  4:37 UTC (permalink / raw)
  To: Joe Perches
  Cc: netdev, Dinar Valeev, linuxppc-dev, Alexander Graf,
	Anton Blanchard, Santiago Leon
In-Reply-To: <1387810329.22671.66.camel@joe-AO722>

On Mon, 2013-12-23 at 06:52 -0800, Joe Perches wrote:
> On Mon, 2013-12-23 at 17:38 +1100, Anton Blanchard wrote:
> > The hypervisor expects MAC addresses passed in registers to be big
> > endian u64.
> 
> So maybe use __be64 declarations?
> 
> > +static unsigned long ibmveth_encode_mac_addr(char *mac)
> 
> static __be64 ibmveth_encode_mac_addr(const char *mac)

A register value has no endianness. Only memory content does. Especially
talking of a MAC address which is really a byte stream.... (Yes, our
__beXX types used without a * are borderline, but we've got used to it).

In fact I find the use of 

    memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);

Really gross :-) Yes it works with the added cpu_to_be64() but in that
specific case, I think it would be nicer to simply load & shift into
position the 6 bytes and avoid the endianness issue completely.

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH] iommu: Add empty stub for iommu_group_get_by_id()
From: Alexey Kardashevskiy @ 2013-12-24  5:10 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Joerg Roedel, linux-kernel
In-Reply-To: <1385016074-17026-1-git-send-email-aik@ozlabs.ru>

On 11/21/2013 05:41 PM, Alexey Kardashevskiy wrote:
> Almost every function in include/linux/iommu.h has an empty stub
> but the iommu_group_get_by_id() did not get one by mistake.
> 
> This adds an empty stub for iommu_group_get_by_id() for IOMMU_API
> disabled config.

Ping?


> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
>  include/linux/iommu.h | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 7ea319e..3c7903d 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -245,6 +245,11 @@ static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
>  	return NULL;
>  }
>  
> +static inline struct iommu_group *iommu_group_get_by_id(int id)
> +{
> +	return NULL;
> +}
> +
>  static inline void iommu_domain_free(struct iommu_domain *domain)
>  {
>  }
> 


-- 
Alexey

^ permalink raw reply

* [PATCH v4 00/10] powerpc: enable the relocatable support for fsl booke 32bit kernel
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc

v4:
  - Fix the bug when booting above 64M.
  - Rebase onto v3.13-rc5
  - Pass the following test on a p5020ds board:
       boot kernel at 0x5000000 and 0x9000000
       kdump test with kernel option "crashkernel=64M@80M"

v3:
The main changes include:
  * Drop the patch 5 in v2 (memblock: introduce the memblock_reinit function)
  * Change to use the 64M boot init tlb.

Please refer to the comment section of each patch for more detail.

This patch series passed the kdump test with kernel option "crashkernel=64M@32M"
and "crashkernel=64M@80M" on a p2020rdb board.

v2:
These patches are based on the Ben's next branch. In this version we choose
to do a second relocation if the PAGE_OFFSET is not mapped to the memstart_addr
and we also choose to set the tlb1 entries for the kernel space in address
space 1. With this implementation:
  * We can load the kernel at any place between
     memstart_addr ~ memstart_addr + 768M
  * We can reserve any memory between memstart_addr ~ memstart_addr + 768M
    for a kdump kernel.

I have done a kdump boot on a p2020rdb kernel with the memory reserved by
'crashkernel=32M@320M'.


v1:
Currently the fsl booke 32bit kernel is using the DYNAMIC_MEMSTART relocation
method. But the RELOCATABLE method is more flexible and has less alignment
restriction. So enable this feature on this platform and use it by
default for the kdump kernel.

These patches have passed the kdump boot test on a p2020rdb board.
---
Kevin Hao (10):
  powerpc/fsl_booke: protect the access to MAS7
  powerpc/fsl_booke: introduce get_phys_addr function
  powerpc: introduce macro LOAD_REG_ADDR_PIC
  powerpc: enable the relocatable support for the fsl booke 32bit kernel
  powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
  powerpc: introduce early_get_first_memblock_info
  powerpc/fsl_booke: introduce map_mem_in_cams_addr
  powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for
    relocatable kernel
  powerpc/fsl_booke: smp support for booting a relocatable kernel above
    64M
  powerpc/fsl_booke: enable the relocatable for the kdump kernel

 arch/powerpc/Kconfig                          |   5 +-
 arch/powerpc/include/asm/ppc_asm.h            |  13 ++
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |   2 +
 arch/powerpc/kernel/head_fsl_booke.S          | 266 +++++++++++++++++++++++---
 arch/powerpc/kernel/prom.c                    |  41 +++-
 arch/powerpc/mm/fsl_booke_mmu.c               |  72 ++++++-
 arch/powerpc/mm/hugetlbpage-book3e.c          |   3 +-
 arch/powerpc/mm/mmu_decl.h                    |   2 +
 arch/powerpc/mm/tlb_nohash_low.S              |   4 +-
 include/linux/of_fdt.h                        |   1 +
 10 files changed, 370 insertions(+), 39 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH v4 01/10] powerpc/fsl_booke: protect the access to MAS7
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

The e500v1 doesn't implement the MAS7, so we should avoid to access
this register on that implementations. In the current kernel, the
access to MAS7 are protected by either CONFIG_PHYS_64BIT or
MMU_FTR_BIG_PHYS. Since some code are executed before the code
patching, we have to use CONFIG_PHYS_64BIT in these cases.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.

v3: Use ifdef CONFIG_PHYS_64BIT for the code running before code patching.

v2: A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 2 ++
 arch/powerpc/mm/hugetlbpage-book3e.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index f45726a1d963..09921a5197c6 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -82,7 +82,9 @@ _ENTRY(_start);
 	and	r19,r3,r18		/* r19 = page offset */
 	andc	r31,r20,r18		/* r31 = page base */
 	or	r31,r31,r19		/* r31 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
 	mfspr	r30,SPRN_MAS7
+#endif
 
 	li	r25,0			/* phys kernel start (low) */
 	li	r24,0			/* CPU number */
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 74551b5e41e5..646c4bffaeba 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -103,7 +103,8 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
 	if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
 		mtspr(SPRN_MAS7_MAS3, mas7_3);
 	} else {
-		mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+		if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+			mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
 		mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
 	}
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 02/10] powerpc/fsl_booke: introduce get_phys_addr function
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

Move the codes which translate a effective address to physical address
to a separate function. So it can be reused by other code.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.

v3: Use ifdef CONFIG_PHYS_64BIT to protect the access to MAS7

v2: A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 50 +++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 09921a5197c6..196950f29c00 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -65,26 +65,9 @@ _ENTRY(_start);
 	nop
 
 	/* Translate device tree address to physical, save in r30/r31 */
-	mfmsr	r16
-	mfspr	r17,SPRN_PID
-	rlwinm	r17,r17,16,0x3fff0000	/* turn PID into MAS6[SPID] */
-	rlwimi	r17,r16,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */
-	mtspr	SPRN_MAS6,r17
-
-	tlbsx	0,r3			/* must succeed */
-
-	mfspr	r16,SPRN_MAS1
-	mfspr	r20,SPRN_MAS3
-	rlwinm	r17,r16,25,0x1f		/* r17 = log2(page size) */
-	li	r18,1024
-	slw	r18,r18,r17		/* r18 = page size */
-	addi	r18,r18,-1
-	and	r19,r3,r18		/* r19 = page offset */
-	andc	r31,r20,r18		/* r31 = page base */
-	or	r31,r31,r19		/* r31 = devtree phys addr */
-#ifdef CONFIG_PHYS_64BIT
-	mfspr	r30,SPRN_MAS7
-#endif
+	bl	get_phys_addr
+	mr	r30,r3
+	mr	r31,r4
 
 	li	r25,0			/* phys kernel start (low) */
 	li	r24,0			/* CPU number */
@@ -858,6 +841,33 @@ KernelSPE:
 #endif /* CONFIG_SPE */
 
 /*
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
+ */
+get_phys_addr:
+	mfmsr	r8
+	mfspr	r9,SPRN_PID
+	rlwinm	r9,r9,16,0x3fff0000	/* turn PID into MAS6[SPID] */
+	rlwimi	r9,r8,28,0x00000001	/* turn MSR[DS] into MAS6[SAS] */
+	mtspr	SPRN_MAS6,r9
+
+	tlbsx	0,r3			/* must succeed */
+
+	mfspr	r8,SPRN_MAS1
+	mfspr	r12,SPRN_MAS3
+	rlwinm	r9,r8,25,0x1f		/* r9 = log2(page size) */
+	li	r10,1024
+	slw	r10,r10,r9		/* r10 = page size */
+	addi	r10,r10,-1
+	and	r11,r3,r10		/* r11 = page offset */
+	andc	r4,r12,r10		/* r4 = page base */
+	or	r4,r4,r11		/* r4 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
+	mfspr	r3,SPRN_MAS7
+#endif
+	blr
+
+/*
  * Global functions
  */
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 03/10] powerpc: introduce macro LOAD_REG_ADDR_PIC
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

This is used to get the address of a variable when the kernel is not
running at the linked or relocated address.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: A new patch in v4.

 arch/powerpc/include/asm/ppc_asm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index f595b98079ee..1279c59624ed 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -295,6 +295,11 @@ n:
  *   you want to access various offsets within it).  On ppc32 this is
  *   identical to LOAD_REG_IMMEDIATE.
  *
+ * LOAD_REG_ADDR_PIC(rn, name)
+ *   Loads the address of label 'name' into register 'run'. Use this when
+ *   the kernel doesn't run at the linked or relocated address. Please
+ *   note that this macro will clobber the lr register.
+ *
  * LOAD_REG_ADDRBASE(rn, name)
  * ADDROFF(name)
  *   LOAD_REG_ADDRBASE loads part of the address of label 'name' into
@@ -305,6 +310,14 @@ n:
  *      LOAD_REG_ADDRBASE(rX, name)
  *      ld	rY,ADDROFF(name)(rX)
  */
+
+/* Be careful, this will clobber the lr register. */
+#define LOAD_REG_ADDR_PIC(reg, name)		\
+	bl	0f;				\
+0:	mflr	reg;				\
+	addis	reg,reg,(name - 0b)@ha;		\
+	addi	reg,reg,(name - 0b)@l;
+
 #ifdef __powerpc64__
 #define LOAD_REG_IMMEDIATE(reg,expr)		\
 	lis     reg,(expr)@highest;		\
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 04/10] powerpc: enable the relocatable support for the fsl booke 32bit kernel
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

This is based on the codes in the head_44x.S. The difference is that
the init tlb size we used is 64M. With this patch we can only load the
kernel at address between memstart_addr ~ memstart_addr + 64M. We will
fix this restriction in the following patches.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: Use macro LOAD_REG_ADDR_PIC.

v3:
  * Use the 64M align.
  * typo fix.

v2: Move the code to set kernstart_addr and virt_phys_offset to a c function.
    So we can expand it easily later.

 arch/powerpc/Kconfig                          |  2 +-
 arch/powerpc/kernel/fsl_booke_entry_mapping.S |  2 ++
 arch/powerpc/kernel/head_fsl_booke.S          | 34 +++++++++++++++++++++++++++
 arch/powerpc/mm/fsl_booke_mmu.c               | 28 ++++++++++++++++++++++
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b44b52c0a8f0..f5b464c41117 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -881,7 +881,7 @@ config DYNAMIC_MEMSTART
 
 config RELOCATABLE
 	bool "Build a relocatable kernel"
-	depends on ADVANCED_OPTIONS && FLATMEM && 44x
+	depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE)
 	select NONSTATIC_KERNEL
 	help
 	  This builds a kernel image that is capable of running at the
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79be2728..f22e7e44fbf3 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -176,6 +176,8 @@ skpinv:	addi	r6,r6,1				/* Increment */
 /* 7. Jump to KERNELBASE mapping */
 	lis	r6,(KERNELBASE & ~0xfff)@h
 	ori	r6,r6,(KERNELBASE & ~0xfff)@l
+	rlwinm	r7,r25,0,0x03ffffff
+	add	r6,r7,r6
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 196950f29c00..19bd574bda9d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -73,6 +73,30 @@ _ENTRY(_start);
 	li	r24,0			/* CPU number */
 	li	r23,0			/* phys kernel start (high) */
 
+#ifdef CONFIG_RELOCATABLE
+	LOAD_REG_ADDR_PIC(r3, _stext)	/* Get our current runtime base */
+
+	/* Translate _stext address to physical, save in r23/r25 */
+	bl	get_phys_addr
+	mr	r23,r3
+	mr	r25,r4
+
+	/*
+	 * We have the runtime (virutal) address of our base.
+	 * We calculate our shift of offset from a 64M page.
+	 * We could map the 64M page we belong to at PAGE_OFFSET and
+	 * get going from there.
+	 */
+	lis	r4,KERNELBASE@h
+	ori	r4,r4,KERNELBASE@l
+	rlwinm	r6,r25,0,0x3ffffff		/* r6 = PHYS_START % 64M */
+	rlwinm	r5,r4,0,0x3ffffff		/* r5 = KERNELBASE % 64M */
+	subf	r3,r5,r6			/* r3 = r6 - r5 */
+	add	r3,r4,r3			/* Required Virtual Address */
+
+	bl	relocate
+#endif
+
 /* We try to not make any assumptions about how the boot loader
  * setup or used the TLBs.  We invalidate all mappings from the
  * boot loader and load a single entry in TLB1[0] to map the
@@ -182,6 +206,16 @@ _ENTRY(__early_start)
 
 	bl	early_init
 
+#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_PHYS_64BIT
+	mr	r3,r23
+	mr	r4,r25
+#else
+	mr	r3,r25
+#endif
+	bl	relocate_init
+#endif
+
 #ifdef CONFIG_DYNAMIC_MEMSTART
 	lis	r3,kernstart_addr@ha
 	la	r3,kernstart_addr@l(r3)
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 07ba45b0f07c..ce4a1163ddd3 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -241,4 +241,32 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 	/* 64M mapped initially according to head_fsl_booke.S */
 	memblock_set_current_limit(min_t(u64, limit, 0x04000000));
 }
+
+#ifdef CONFIG_RELOCATABLE
+notrace void __init relocate_init(phys_addr_t start)
+{
+	unsigned long base = KERNELBASE;
+
+	/*
+	 * Relocatable kernel support based on processing of dynamic
+	 * relocation entries.
+	 * Compute the virt_phys_offset :
+	 * virt_phys_offset = stext.run - kernstart_addr
+	 *
+	 * stext.run = (KERNELBASE & ~0x3ffffff) + (kernstart_addr & 0x3ffffff)
+	 * When we relocate, we have :
+	 *
+	 *	(kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
+	 *
+	 * hence:
+	 *  virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
+	 *                              (kernstart_addr & ~0x3ffffff)
+	 *
+	 */
+	kernstart_addr = start;
+	start &= ~0x3ffffff;
+	base &= ~0x3ffffff;
+	virt_phys_offset = base - start;
+}
+#endif
 #endif
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 05/10] powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

We use the tlb1 entries to map low mem to the kernel space. In the
current code, it assumes that the first tlb entry would cover the
kernel image. But this is not true for some special cases, such as
when we run a relocatable kernel above the 64M or set
CONFIG_KERNEL_START above 64M. So we choose to switch to address
space 1 before setting these tlb entries.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.

v3: Typo fix.

v2: A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 81 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/fsl_booke_mmu.c      |  2 +
 arch/powerpc/mm/mmu_decl.h           |  2 +
 3 files changed, 85 insertions(+)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 19bd574bda9d..75f0223e6d0d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1157,6 +1157,87 @@ __secondary_hold_acknowledge:
 #endif
 
 /*
+ * Create a tlb entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+	mflr	r5
+
+	/* Find a entry not used */
+	mfspr	r3,SPRN_TLB1CFG
+	andi.	r3,r3,0xfff
+	mfspr	r4,SPRN_PID
+	rlwinm	r4,r4,16,0x3fff0000	/* turn PID into MAS6[SPID] */
+	mtspr	SPRN_MAS6,r4
+1:	lis	r4,0x1000		/* Set MAS0(TLBSEL) = 1 */
+	addi	r3,r3,-1
+	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r4
+	tlbre
+	mfspr	r4,SPRN_MAS1
+	andis.	r4,r4,MAS1_VALID@h
+	bne	1b
+
+	/* Get the tlb entry used by the current running code */
+	bl	0f
+0:	mflr	r4
+	tlbsx	0,r4
+
+	mfspr	r4,SPRN_MAS1
+	ori	r4,r4,MAS1_TS		/* Set the TS = 1 */
+	mtspr	SPRN_MAS1,r4
+
+	mfspr	r4,SPRN_MAS0
+	rlwinm	r4,r4,0,~MAS0_ESEL_MASK
+	rlwimi	r4,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r4
+	tlbwe
+	isync
+	sync
+
+	mfmsr	r4
+	ori	r4,r4,MSR_IS | MSR_DS
+	mtspr	SPRN_SRR0,r5
+	mtspr	SPRN_SRR1,r4
+	sync
+	rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+*/
+_GLOBAL(restore_to_as0)
+	mflr	r0
+
+	bl	0f
+0:	mflr	r9
+	addi	r9,r9,1f - 0b
+
+	mfmsr	r7
+	li	r8,(MSR_IS | MSR_DS)
+	andc	r7,r7,r8
+
+	mtspr	SPRN_SRR0,r9
+	mtspr	SPRN_SRR1,r7
+	sync
+	rfi
+
+	/* Invalidate the temporary tlb entry for AS1 */
+1:	lis	r9,0x1000		/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r9,r3,16,4,15		/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r9
+	tlbre
+	mfspr	r9,SPRN_MAS1
+	rlwinm	r9,r9,0,2,31		/* Clear MAS1 Valid and IPPROT */
+	mtspr	SPRN_MAS1,r9
+	tlbwe
+	isync
+	mtlr	r0
+	blr
+
+/*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
  */
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ce4a1163ddd3..1d54f6d35e71 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -222,7 +222,9 @@ void __init adjust_total_lowmem(void)
 	/* adjust lowmem size to __max_low_memory */
 	ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
 
+	i = switch_to_as1();
 	__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
+	restore_to_as0(i);
 
 	pr_info("Memory CAM mapping: ");
 	for (i = 0; i < tlbcam_index - 1; i++)
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 83eb5d5f53d5..eefbf7bb4331 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -148,6 +148,8 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(unsigned long top);
 extern void adjust_total_lowmem(void);
+extern int switch_to_as1(void);
+extern void restore_to_as0(int esel);
 #endif
 extern void loadcam_entry(unsigned int index);
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 06/10] powerpc: introduce early_get_first_memblock_info
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

For a relocatable kernel since it can be loaded at any place, there
is no any relation between the kernel start addr and the memstart_addr.
So we can't calculate the memstart_addr from kernel start addr. And
also we can't wait to do the relocation after we get the real
memstart_addr from device tree because it is so late. So introduce
a new function we can use to get the first memblock address and size
in a very early stage (before machine_init).

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.

v3: Introduce a variable to avoid to mess the memblock.

v2: A new patch in v2.

 arch/powerpc/kernel/prom.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/of_fdt.h     |  1 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa0ad8aafbcc..f58c0d3aaeb4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -523,6 +523,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
 	return early_init_dt_scan_memory(node, uname, depth, data);
 }
 
+/*
+ * For a relocatable kernel, we need to get the memstart_addr first,
+ * then use it to calculate the virtual kernel start address. This has
+ * to happen at a very early stage (before machine_init). In this case,
+ * we just want to get the memstart_address and would not like to mess the
+ * memblock at this stage. So introduce a variable to skip the memblock_add()
+ * for this reason.
+ */
+#ifdef CONFIG_RELOCATABLE
+static int add_mem_to_memblock = 1;
+#else
+#define add_mem_to_memblock 1
+#endif
+
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 #ifdef CONFIG_PPC64
@@ -543,7 +557,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	}
 
 	/* Add the chunk to the MEMBLOCK list */
-	memblock_add(base, size);
+	if (add_mem_to_memblock)
+		memblock_add(base, size);
 }
 
 static void __init early_reserve_mem_dt(void)
@@ -740,6 +755,30 @@ void __init early_init_devtree(void *params)
 	DBG(" <- early_init_devtree()\n");
 }
 
+#ifdef CONFIG_RELOCATABLE
+/*
+ * This function run before early_init_devtree, so we have to init
+ * initial_boot_params.
+ */
+void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
+{
+	/* Setup flat device-tree pointer */
+	initial_boot_params = params;
+
+	/*
+	 * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid
+	 * mess the memblock.
+	 */
+	add_mem_to_memblock = 0;
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+	add_mem_to_memblock = 1;
+
+	if (size)
+		*size = first_memblock_size;
+}
+#endif
+
 /*******
  *
  * New implementation of the OF "find" APIs, return a refcounted
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 0beaee9dac1f..2b77058a7335 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -116,6 +116,7 @@ extern const void *of_flat_dt_match_machine(const void *default_match,
 extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
+extern void early_get_first_memblock_info(void *, phys_addr_t *);
 #else /* CONFIG_OF_FLATTREE */
 static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
 static inline void unflatten_device_tree(void) {}
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 07/10] powerpc/fsl_booke: introduce map_mem_in_cams_addr
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

Introduce this function so we can set both the physical and virtual
address for the map in cams. This will be used by the relocation code.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: A new patch in v4.

 arch/powerpc/mm/fsl_booke_mmu.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 1d54f6d35e71..ca956c83e3a2 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -171,11 +171,10 @@ unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
 	return 1UL << camsize;
 }
 
-unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
+					unsigned long ram, int max_cam_idx)
 {
 	int i;
-	unsigned long virt = PAGE_OFFSET;
-	phys_addr_t phys = memstart_addr;
 	unsigned long amount_mapped = 0;
 
 	/* Calculate CAM values */
@@ -195,6 +194,14 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
 	return amount_mapped;
 }
 
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+{
+	unsigned long virt = PAGE_OFFSET;
+	phys_addr_t phys = memstart_addr;
+
+	return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx);
+}
+
 #ifdef CONFIG_PPC32
 
 #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 08/10] powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for relocatable kernel
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

This is always true for a non-relocatable kernel. Otherwise the kernel
would get stuck. But for a relocatable kernel, it seems a little
complicated. When booting a relocatable kernel, we just align the
kernel start addr to 64M and map the PAGE_OFFSET from there. The
relocation will base on this virtual address. But if this address
is not the same as the memstart_addr, we will have to change the
map of PAGE_OFFSET to the real memstart_addr and do another relocation
again.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4:
  * Create the correct mem map in cams when booting above 64M.
  * Don't skip the init tlb mapping for the second cpu.

v3:
  * Typo fix.
  * Refactor relocate_init, no function change.
  * Map only 64M memory before the second relocation.
  * Comments update.

v2: A new patch in v2.

 arch/powerpc/kernel/head_fsl_booke.S | 74 +++++++++++++++++++++++++++++++++---
 arch/powerpc/mm/fsl_booke_mmu.c      | 41 +++++++++++++++++---
 arch/powerpc/mm/mmu_decl.h           |  2 +-
 3 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 75f0223e6d0d..71e08dfbd1d1 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -81,6 +81,39 @@ _ENTRY(_start);
 	mr	r23,r3
 	mr	r25,r4
 
+	bl	0f
+0:	mflr	r8
+	addis	r3,r8,(is_second_reloc - 0b)@ha
+	lwz	r19,(is_second_reloc - 0b)@l(r3)
+
+	/* Check if this is the second relocation. */
+	cmpwi	r19,1
+	bne	1f
+
+	/*
+	 * For the second relocation, we already get the real memstart_addr
+	 * from device tree. So we will map PAGE_OFFSET to memstart_addr,
+	 * then the virtual address of start kernel should be:
+	 *          PAGE_OFFSET + (kernstart_addr - memstart_addr)
+	 * Since the offset between kernstart_addr and memstart_addr should
+	 * never be beyond 1G, so we can just use the lower 32bit of them
+	 * for the calculation.
+	 */
+	lis	r3,PAGE_OFFSET@h
+
+	addis	r4,r8,(kernstart_addr - 0b)@ha
+	addi	r4,r4,(kernstart_addr - 0b)@l
+	lwz	r5,4(r4)
+
+	addis	r6,r8,(memstart_addr - 0b)@ha
+	addi	r6,r6,(memstart_addr - 0b)@l
+	lwz	r7,4(r6)
+
+	subf	r5,r7,r5
+	add	r3,r3,r5
+	b	2f
+
+1:
 	/*
 	 * We have the runtime (virutal) address of our base.
 	 * We calculate our shift of offset from a 64M page.
@@ -94,7 +127,14 @@ _ENTRY(_start);
 	subf	r3,r5,r6			/* r3 = r6 - r5 */
 	add	r3,r4,r3			/* Required Virtual Address */
 
-	bl	relocate
+2:	bl	relocate
+
+	/*
+	 * For the second relocation, we already set the right tlb entries
+	 * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+	*/
+	cmpwi	r19,1
+	beq	set_ivor
 #endif
 
 /* We try to not make any assumptions about how the boot loader
@@ -122,6 +162,7 @@ _ENTRY(__early_start)
 #include "fsl_booke_entry_mapping.S"
 #undef ENTRY_MAPPING_BOOT_SETUP
 
+set_ivor:
 	/* Establish the interrupt vector offsets */
 	SET_IVOR(0,  CriticalInput);
 	SET_IVOR(1,  MachineCheck);
@@ -207,11 +248,13 @@ _ENTRY(__early_start)
 	bl	early_init
 
 #ifdef CONFIG_RELOCATABLE
+	mr	r3,r30
+	mr	r4,r31
 #ifdef CONFIG_PHYS_64BIT
-	mr	r3,r23
-	mr	r4,r25
+	mr	r5,r23
+	mr	r6,r25
 #else
-	mr	r3,r25
+	mr	r5,r25
 #endif
 	bl	relocate_init
 #endif
@@ -1207,6 +1250,9 @@ _GLOBAL(switch_to_as1)
 /*
  * Restore to the address space 0 and also invalidate the tlb entry created
  * by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS0) - __pa(PAGE_OFFSET in AS1)
+ * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
 */
 _GLOBAL(restore_to_as0)
 	mflr	r0
@@ -1215,7 +1261,15 @@ _GLOBAL(restore_to_as0)
 0:	mflr	r9
 	addi	r9,r9,1f - 0b
 
-	mfmsr	r7
+	/*
+	 * We may map the PAGE_OFFSET in AS0 to a different physical address,
+	 * so we need calculate the right jump and device tree address based
+	 * on the offset passed by r4.
+	 */
+	subf	r9,r4,r9
+	subf	r5,r4,r5
+
+2:	mfmsr	r7
 	li	r8,(MSR_IS | MSR_DS)
 	andc	r7,r7,r8
 
@@ -1234,9 +1288,19 @@ _GLOBAL(restore_to_as0)
 	mtspr	SPRN_MAS1,r9
 	tlbwe
 	isync
+
+	cmpwi	r4,0
+	bne	3f
 	mtlr	r0
 	blr
 
+	/*
+	 * The PAGE_OFFSET will map to a different physical address,
+	 * jump to _start to do another relocation again.
+	*/
+3:	mr	r3,r5
+	bl	_start
+
 /*
  * We put a few things here that have to be page-aligned. This stuff
  * goes at the beginning of the data segment, which is page-aligned.
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ca956c83e3a2..ce0c7d7db6c3 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -231,7 +231,7 @@ void __init adjust_total_lowmem(void)
 
 	i = switch_to_as1();
 	__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
-	restore_to_as0(i);
+	restore_to_as0(i, 0, 0);
 
 	pr_info("Memory CAM mapping: ");
 	for (i = 0; i < tlbcam_index - 1; i++)
@@ -252,17 +252,25 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 }
 
 #ifdef CONFIG_RELOCATABLE
-notrace void __init relocate_init(phys_addr_t start)
+int __initdata is_second_reloc;
+notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
 {
 	unsigned long base = KERNELBASE;
 
+	kernstart_addr = start;
+	if (is_second_reloc) {
+		virt_phys_offset = PAGE_OFFSET - memstart_addr;
+		return;
+	}
+
 	/*
 	 * Relocatable kernel support based on processing of dynamic
-	 * relocation entries.
-	 * Compute the virt_phys_offset :
+	 * relocation entries. Before we get the real memstart_addr,
+	 * We will compute the virt_phys_offset like this:
 	 * virt_phys_offset = stext.run - kernstart_addr
 	 *
-	 * stext.run = (KERNELBASE & ~0x3ffffff) + (kernstart_addr & 0x3ffffff)
+	 * stext.run = (KERNELBASE & ~0x3ffffff) +
+	 *				(kernstart_addr & 0x3ffffff)
 	 * When we relocate, we have :
 	 *
 	 *	(kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
@@ -272,10 +280,31 @@ notrace void __init relocate_init(phys_addr_t start)
 	 *                              (kernstart_addr & ~0x3ffffff)
 	 *
 	 */
-	kernstart_addr = start;
 	start &= ~0x3ffffff;
 	base &= ~0x3ffffff;
 	virt_phys_offset = base - start;
+	early_get_first_memblock_info(__va(dt_ptr), NULL);
+	/*
+	 * We now get the memstart_addr, then we should check if this
+	 * address is the same as what the PAGE_OFFSET map to now. If
+	 * not we have to change the map of PAGE_OFFSET to memstart_addr
+	 * and do a second relocation.
+	 */
+	if (start != memstart_addr) {
+		int n, offset = memstart_addr - start;
+
+		is_second_reloc = 1;
+		n = switch_to_as1();
+		/* map a 64M area for the second relocation */
+		if (memstart_addr > start)
+			map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM);
+		else
+			map_mem_in_cams_addr(start, PAGE_OFFSET - offset,
+					0x4000000, CONFIG_LOWMEM_CAM_NUM);
+		restore_to_as0(n, offset, __va(dt_ptr));
+		/* We should never reach here */
+		panic("Relocation error");
+	}
 }
 #endif
 #endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index eefbf7bb4331..91da910210cb 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -149,7 +149,7 @@ extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(unsigned long top);
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
-extern void restore_to_as0(int esel);
+extern void restore_to_as0(int esel, int offset, void *dt_ptr);
 #endif
 extern void loadcam_entry(unsigned int index);
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v4 09/10] powerpc/fsl_booke: smp support for booting a relocatable kernel above 64M
From: Kevin Hao @ 2013-12-24  7:12 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>

When booting above the 64M for a secondary cpu, we also face the
same issue as the boot cpu that the PAGE_OFFSET map two different
physical address for the init tlb and the final map. So we have to use
switch_to_as1/restore_to_as0 between the conversion of these two
maps. When restoring to as0 for a secondary cpu, we only need to
return to the caller. So add a new parameter for function
restore_to_as0 for this purpose.

Use LOAD_REG_ADDR_PIC to get the address of variables which may
be used before we set the final map in cams for the secondary cpu.
Move the setting of cams a bit earlier in order to avoid the
unnecessary using of LOAD_REG_ADDR_PIC.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: A new patch in v4.

 arch/powerpc/kernel/head_fsl_booke.S | 41 ++++++++++++++++++++++++------------
 arch/powerpc/mm/fsl_booke_mmu.c      |  4 ++--
 arch/powerpc/mm/mmu_decl.h           |  2 +-
 arch/powerpc/mm/tlb_nohash_low.S     |  4 +++-
 4 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 71e08dfbd1d1..0e545630c42a 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -216,8 +216,7 @@ set_ivor:
 	/* Check to see if we're the second processor, and jump
 	 * to the secondary_start code if so
 	 */
-	lis	r24, boot_cpuid@h
-	ori	r24, r24, boot_cpuid@l
+	LOAD_REG_ADDR_PIC(r24, boot_cpuid)
 	lwz	r24, 0(r24)
 	cmpwi	r24, -1
 	mfspr   r24,SPRN_PIR
@@ -1146,24 +1145,36 @@ _GLOBAL(__flush_disable_L1)
 /* When we get here, r24 needs to hold the CPU # */
 	.globl __secondary_start
 __secondary_start:
-	lis	r3,__secondary_hold_acknowledge@h
-	ori	r3,r3,__secondary_hold_acknowledge@l
-	stw	r24,0(r3)
-
-	li	r3,0
-	mr	r4,r24		/* Why? */
-	bl	call_setup_cpu
-
-	lis	r3,tlbcam_index@ha
-	lwz	r3,tlbcam_index@l(r3)
+	LOAD_REG_ADDR_PIC(r3, tlbcam_index)
+	lwz	r3,0(r3)
 	mtctr	r3
 	li	r26,0		/* r26 safe? */
 
+	bl	switch_to_as1
+	mr	r27,r3		/* tlb entry */
 	/* Load each CAM entry */
 1:	mr	r3,r26
 	bl	loadcam_entry
 	addi	r26,r26,1
 	bdnz	1b
+	mr	r3,r27		/* tlb entry */
+	LOAD_REG_ADDR_PIC(r4, memstart_addr)
+	lwz	r4,0(r4)
+	mr	r5,r25		/* phys kernel start */
+	rlwinm	r5,r5,0,~0x3ffffff	/* aligned 64M */
+	subf	r4,r5,r4	/* memstart_addr - phys kernel start */
+	li	r5,0		/* no device tree */
+	li	r6,0		/* not boot cpu */
+	bl	restore_to_as0
+
+
+	lis	r3,__secondary_hold_acknowledge@h
+	ori	r3,r3,__secondary_hold_acknowledge@l
+	stw	r24,0(r3)
+
+	li	r3,0
+	mr	r4,r24		/* Why? */
+	bl	call_setup_cpu
 
 	/* get current_thread_info and current */
 	lis	r1,secondary_ti@ha
@@ -1253,6 +1264,7 @@ _GLOBAL(switch_to_as1)
  * r3 - the tlb entry which should be invalidated
  * r4 - __pa(PAGE_OFFSET in AS0) - __pa(PAGE_OFFSET in AS1)
  * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
+ * r6 - boot cpu
 */
 _GLOBAL(restore_to_as0)
 	mflr	r0
@@ -1268,6 +1280,7 @@ _GLOBAL(restore_to_as0)
 	 */
 	subf	r9,r4,r9
 	subf	r5,r4,r5
+	subf	r0,r4,r0
 
 2:	mfmsr	r7
 	li	r8,(MSR_IS | MSR_DS)
@@ -1290,7 +1303,9 @@ _GLOBAL(restore_to_as0)
 	isync
 
 	cmpwi	r4,0
-	bne	3f
+	cmpwi	cr1,r6,0
+	cror	eq,4*cr1+eq,eq
+	bne	3f			/* offset != 0 && is_boot_cpu */
 	mtlr	r0
 	blr
 
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ce0c7d7db6c3..2a81f53d49f1 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -231,7 +231,7 @@ void __init adjust_total_lowmem(void)
 
 	i = switch_to_as1();
 	__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
-	restore_to_as0(i, 0, 0);
+	restore_to_as0(i, 0, 0, 1);
 
 	pr_info("Memory CAM mapping: ");
 	for (i = 0; i < tlbcam_index - 1; i++)
@@ -301,7 +301,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
 		else
 			map_mem_in_cams_addr(start, PAGE_OFFSET - offset,
 					0x4000000, CONFIG_LOWMEM_CAM_NUM);
-		restore_to_as0(n, offset, __va(dt_ptr));
+		restore_to_as0(n, offset, __va(dt_ptr), 1);
 		/* We should never reach here */
 		panic("Relocation error");
 	}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 91da910210cb..9615d82919b8 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -149,7 +149,7 @@ extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(unsigned long top);
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
-extern void restore_to_as0(int esel, int offset, void *dt_ptr);
+extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
 #endif
 extern void loadcam_entry(unsigned int index);
 
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 626ad081639f..43ff3c797fbf 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -402,7 +402,9 @@ _GLOBAL(set_context)
  * Load TLBCAM[index] entry in to the L2 CAM MMU
  */
 _GLOBAL(loadcam_entry)
-	LOAD_REG_ADDR(r4, TLBCAM)
+	mflr	r5
+	LOAD_REG_ADDR_PIC(r4, TLBCAM)
+	mtlr	r5
 	mulli	r5,r3,TLBCAM_SIZE
 	add	r3,r5,r4
 	lwz	r4,TLBCAM_MAS0(r3)
-- 
1.8.3.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox