* [PATCH 02/21] net: freescale: slight optimization of addr compare
From: Ding Tianhong @ 2013-12-23 5:09 UTC (permalink / raw)
To: Li Yang, Netdev, linux-kernel@vger.kernel.org, linuxppc-dev
Use the recently added and possibly more efficient
ether_addr_equal_unaligned to instead of memcmp.
Cc: Li Yang <leoli@freescale.com>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
---
drivers/net/ethernet/freescale/ucc_geth.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 5548b6d..88a1525 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -437,7 +437,7 @@ static void hw_add_addr_in_hash(struct ucc_geth_private *ugeth,
static inline int compare_addr(u8 **addr1, u8 **addr2)
{
- return memcmp(addr1, addr2, ETH_ALEN);
+ return !ether_addr_equal_unaligned(addr1, addr2);
}
#ifdef DEBUG
--
1.8.0
^ permalink raw reply related
* Re: [PATCH] ibmveth: Fix more little endian issues
From: Anton Blanchard @ 2013-12-23 6:38 UTC (permalink / raw)
To: Alexander Graf; +Cc: Dinar Valeev, Santiago Leon, linuxppc-dev, netdev
In-Reply-To: <1387762163-39662-1-git-send-email-agraf@suse.de>
Hi Alex,
> The ibmveth driver is memcpy()'ing the mac address between a variable
> (register) and memory. This assumes a certain endianness of the
> system, so let's make that implicit assumption work again.
Nice catch! I don't like how the driver has two different methods
for creating these MAC addresses, both without comments. How does
this look?
Anton
--
The hypervisor expects MAC addresses passed in registers to be big
endian u64. Create a helper function called ibmveth_encode_mac_addr
which does the right thing in both big and little endian.
We were storing the MAC address in a long in struct ibmveth_adapter.
It's never used so remove it - we don't need another place in the
driver where we create endian issues with MAC addresses.
Reported-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Anton Blanchard <anton@samba.org>
---
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 952d795..044178b 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -523,6 +523,17 @@ retry:
return rc;
}
+/*
+ * The hypervisor expects MAC addresses passed in registers to be
+ * big endian u64.
+ */
+static unsigned long ibmveth_encode_mac_addr(char *mac)
+{
+ unsigned long encoded = 0;
+ memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
+ return cpu_to_be64(encoded);
+}
+
static int ibmveth_open(struct net_device *netdev)
{
struct ibmveth_adapter *adapter = netdev_priv(netdev);
@@ -580,8 +591,7 @@ static int ibmveth_open(struct net_device *netdev)
adapter->rx_queue.num_slots = rxq_entries;
adapter->rx_queue.toggle = 1;
- memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
- mac_address = mac_address >> 16;
+ mac_address = ibmveth_encode_mac_addr(netdev->dev_addr);
rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
adapter->rx_queue.queue_len;
@@ -1184,8 +1194,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
/* add the addresses to the filter table */
netdev_for_each_mc_addr(ha, netdev) {
/* add the multicast address to the filter table */
- unsigned long mcast_addr = 0;
- memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN);
+ unsigned long mcast_addr;
+ mcast_addr = ibmveth_encode_mac_addr(ha->addr);
lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
IbmVethMcastAddFilter,
mcast_addr);
@@ -1369,9 +1379,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
- adapter->mac_addr = 0;
- memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN);
-
netdev->irq = dev->irq;
netdev->netdev_ops = &ibmveth_netdev_ops;
netdev->ethtool_ops = &netdev_ethtool_ops;
@@ -1380,7 +1387,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
netdev->features |= netdev->hw_features;
- memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
+ memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 84066ba..2c636cb 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -139,7 +139,6 @@ struct ibmveth_adapter {
struct napi_struct napi;
struct net_device_stats stats;
unsigned int mcastFilterSize;
- unsigned long mac_addr;
void * buffer_list_addr;
void * filter_list_addr;
dma_addr_t buffer_list_dma;
^ permalink raw reply related
* Re: [PATCH] ibmveth: Fix more little endian issues
From: Alexander Graf @ 2013-12-23 10:17 UTC (permalink / raw)
To: Anton Blanchard; +Cc: Dinar Valeev, Santiago Leon, linuxppc-dev, netdev
In-Reply-To: <20131223173833.0a9a6705@kryten>
On 23.12.2013, at 07:38, Anton Blanchard <anton@samba.org> wrote:
>=20
> Hi Alex,
>=20
>> The ibmveth driver is memcpy()'ing the mac address between a variable
>> (register) and memory. This assumes a certain endianness of the
>> system, so let's make that implicit assumption work again.
>=20
> Nice catch! I don't like how the driver has two different methods
> for creating these MAC addresses, both without comments. How does
> this look?
Heh - I didn't even realize those two places were doing the same thing.
Obviously your patch is by far nicer.
Reviewed-by: Alexander Graf <agraf@suse.de>
Alex
>=20
> Anton
> --
>=20
> The hypervisor expects MAC addresses passed in registers to be big
> endian u64. Create a helper function called ibmveth_encode_mac_addr
> which does the right thing in both big and little endian.
>=20
> We were storing the MAC address in a long in struct ibmveth_adapter.
> It's never used so remove it - we don't need another place in the
> driver where we create endian issues with MAC addresses.
>=20
> Reported-by: Alexander Graf <agraf@suse.de>
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
>=20
> diff --git a/drivers/net/ethernet/ibm/ibmveth.c =
b/drivers/net/ethernet/ibm/ibmveth.c
> index 952d795..044178b 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.c
> +++ b/drivers/net/ethernet/ibm/ibmveth.c
> @@ -523,6 +523,17 @@ retry:
> return rc;
> }
>=20
> +/*
> + * The hypervisor expects MAC addresses passed in registers to be
> + * big endian u64.
> + */
> +static unsigned long ibmveth_encode_mac_addr(char *mac)
> +{
> + unsigned long encoded =3D 0;
> + memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
> + return cpu_to_be64(encoded);
> +}
> +
> static int ibmveth_open(struct net_device *netdev)
> {
> struct ibmveth_adapter *adapter =3D netdev_priv(netdev);
> @@ -580,8 +591,7 @@ static int ibmveth_open(struct net_device *netdev)
> adapter->rx_queue.num_slots =3D rxq_entries;
> adapter->rx_queue.toggle =3D 1;
>=20
> - memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
> - mac_address =3D mac_address >> 16;
> + mac_address =3D ibmveth_encode_mac_addr(netdev->dev_addr);
>=20
> rxq_desc.fields.flags_len =3D IBMVETH_BUF_VALID |
> adapter->rx_queue.queue_len;
> @@ -1184,8 +1194,8 @@ static void ibmveth_set_multicast_list(struct =
net_device *netdev)
> /* add the addresses to the filter table */
> netdev_for_each_mc_addr(ha, netdev) {
> /* add the multicast address to the filter table =
*/
> - unsigned long mcast_addr =3D 0;
> - memcpy(((char *)&mcast_addr)+2, ha->addr, =
ETH_ALEN);
> + unsigned long mcast_addr;
> + mcast_addr =3D =
ibmveth_encode_mac_addr(ha->addr);
> lpar_rc =3D =
h_multicast_ctrl(adapter->vdev->unit_address,
> =
IbmVethMcastAddFilter,
> mcast_addr);
> @@ -1369,9 +1379,6 @@ static int ibmveth_probe(struct vio_dev *dev, =
const struct vio_device_id *id)
>=20
> netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
>=20
> - adapter->mac_addr =3D 0;
> - memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN);
> -
> netdev->irq =3D dev->irq;
> netdev->netdev_ops =3D &ibmveth_netdev_ops;
> netdev->ethtool_ops =3D &netdev_ethtool_ops;
> @@ -1380,7 +1387,7 @@ static int ibmveth_probe(struct vio_dev *dev, =
const struct vio_device_id *id)
> NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
> netdev->features |=3D netdev->hw_features;
>=20
> - memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
> + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
>=20
> for (i =3D 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
> struct kobject *kobj =3D &adapter->rx_buff_pool[i].kobj;
> diff --git a/drivers/net/ethernet/ibm/ibmveth.h =
b/drivers/net/ethernet/ibm/ibmveth.h
> index 84066ba..2c636cb 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.h
> +++ b/drivers/net/ethernet/ibm/ibmveth.h
> @@ -139,7 +139,6 @@ struct ibmveth_adapter {
> struct napi_struct napi;
> struct net_device_stats stats;
> unsigned int mcastFilterSize;
> - unsigned long mac_addr;
> void * buffer_list_addr;
> void * filter_list_addr;
> dma_addr_t buffer_list_dma;
^ permalink raw reply
* [PATCH 1/3] powerpc/xmon: Don't loop forever in get_output_lock()
From: Michael Ellerman @ 2013-12-23 12:46 UTC (permalink / raw)
To: linuxppc-dev
From: Michael Ellerman <michael@ellerman.id.au>
If we enter with xmon_speaker != 0 we skip the first cmpxchg(), we also
skip the while loop because xmon_speaker != last_speaker (0) - meaning we
skip the second cmpxchg() also.
Following that code path the compiler sees no memory barriers and so is
within its rights to never reload xmon_speaker. The end result is we loop
forever.
This manifests as all cpus being in xmon ('c' command), but they refuse
to take control when you switch to them ('c x' for cpu # x).
I have seen this deadlock in practice and also checked the generated code to
confirm this is what's happening.
The simplest fix is just to always try the cmpxchg().
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
arch/powerpc/xmon/xmon.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index af9d346..500105c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -309,12 +309,12 @@ static void get_output_lock(void)
if (xmon_speaker == me)
return;
+
for (;;) {
- if (xmon_speaker == 0) {
- last_speaker = cmpxchg(&xmon_speaker, 0, me);
- if (last_speaker == 0)
- return;
- }
+ last_speaker = cmpxchg(&xmon_speaker, 0, me);
+ if (last_speaker == 0)
+ return;
+
timeout = 10000000;
while (xmon_speaker == last_speaker) {
if (--timeout > 0)
--
1.8.3.2
^ permalink raw reply related
* [PATCH 2/3] powerpc/xmon: Fix timeout loop in get_output_lock()
From: Michael Ellerman @ 2013-12-23 12:46 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <1387802766-7199-1-git-send-email-mpe@ellerman.id.au>
As far as I can tell, our 70s era timeout loop in get_output_lock() is
generating no code.
This leads to the hostile takeover happening more or less simultaneously
on all cpus. The result is "interesting", some example output that is
more readable than most:
cpu 0x1: Vector: 100 (Scypsut e0mx bR:e setV)e catto xc0p:u[ c 00
c0:0 000t0o0V0erc0td:o5 rfc28050000]0c00 0 0 0 6t(pSrycsV1ppuot
uxe 1m 2 0Rx21e3:0s0ce000c00000t00)00 60602oV2SerucSayt0y 0p 1sxs
Fix it by using udelay() in the timeout loop. The wait time and check
frequency are arbitrary, but seem to work OK. We already rely on
udelay() working so this is not a new dependency.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/xmon/xmon.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 500105c..051037e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -315,10 +315,17 @@ static void get_output_lock(void)
if (last_speaker == 0)
return;
- timeout = 10000000;
+ /*
+ * Wait a full second for the lock, we might be on a slow
+ * console, but check every 100us.
+ */
+ timeout = 10000;
while (xmon_speaker == last_speaker) {
- if (--timeout > 0)
+ if (--timeout > 0) {
+ udelay(100);
continue;
+ }
+
/* hostile takeover */
prev = cmpxchg(&xmon_speaker, last_speaker, me);
if (prev == last_speaker)
--
1.8.3.2
^ permalink raw reply related
* [PATCH 3/3] powerpc/xmon: Don't signal we've entered until we're finished printing
From: Michael Ellerman @ 2013-12-23 12:46 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <1387802766-7199-1-git-send-email-mpe@ellerman.id.au>
Currently we set our cpu's bit in cpus_in_xmon, and then we take the
output lock and print the exception information.
This can race with the master cpu entering the command loop and printing
the backtrace. The result is that the backtrace gets garbled with
another cpu's exception print out.
Fix it by delaying the set of cpus_in_xmon until we are finished
printing.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/xmon/xmon.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 051037e..b59f44f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -404,7 +404,6 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
}
xmon_fault_jmp[cpu] = recurse_jmp;
- cpumask_set_cpu(cpu, &cpus_in_xmon);
bp = NULL;
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT))
@@ -426,6 +425,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
release_output_lock();
}
+ cpumask_set_cpu(cpu, &cpus_in_xmon);
+
waiting:
secondary = 1;
while (secondary && !xmon_gate) {
--
1.8.3.2
^ permalink raw reply related
* Re: [PATCH] ibmveth: Fix more little endian issues
From: Joe Perches @ 2013-12-23 14:52 UTC (permalink / raw)
To: Anton Blanchard
Cc: Dinar Valeev, linuxppc-dev, Alexander Graf, netdev, Santiago Leon
In-Reply-To: <20131223173833.0a9a6705@kryten>
On Mon, 2013-12-23 at 17:38 +1100, Anton Blanchard wrote:
> The hypervisor expects MAC addresses passed in registers to be big
> endian u64.
So maybe use __be64 declarations?
> +static unsigned long ibmveth_encode_mac_addr(char *mac)
static __be64 ibmveth_encode_mac_addr(const char *mac)
?
etc...
^ permalink raw reply
* Re: [PATCH] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
From: Anton Blanchard @ 2013-12-24 1:02 UTC (permalink / raw)
To: Michael Ellerman; +Cc: paulmck, paulus, linuxppc-dev
In-Reply-To: <1387459057.1305.1.camel@concordia>
Hi Michael,
> > To try and catch any screw ups in our ppc64 memcpy and
> > copy_tofrom_user loops, I wrote a quick test:
> >
> > http://ozlabs.org/~anton/junkcode/validate_kernel_copyloops.tar.gz
>
> Nice! How's this look?
Love it!
At the moment my other copy_to/from_user tests run against the kernel
(testing we copy all data right up to a page fault and that we return
the correct number of bytes not copied etc). A small signal handler
that walks the exception entries and branches to the handler should be
all it takes to do it completely in userspace.
Anton
>
> cheers
>
>
> selftests: Import Anton's memcpy / copy_tofrom_user tests
>
> Turn Anton's memcpy / copy_tofrom_user test into something that can
> live in tools/testing/selftests.
>
> It requires one turd in arch/powerpc/lib/memcpy_64.S, but it's pretty
> harmless IMHO.
>
> We are sailing very close to the wind with the feature macros. We
> define them to nothing, which currently means we get a few extra nops
> and include the unaligned calls.
>
> ---
> arch/powerpc/lib/memcpy_64.S | 2 +
> tools/testing/selftests/powerpc/Makefile | 2 +-
> tools/testing/selftests/powerpc/copyloops/Makefile | 29 +++++++
> .../selftests/powerpc/copyloops/asm/ppc_asm.h | 86
> +++++++++++++++++++ .../selftests/powerpc/copyloops/asm/processor.h
> | 0 .../selftests/powerpc/copyloops/copyuser_64.S | 1 +
> .../selftests/powerpc/copyloops/copyuser_power7.S | 1 +
> .../selftests/powerpc/copyloops/memcpy_64.S | 1 +
> .../selftests/powerpc/copyloops/memcpy_power7.S | 1 +
> .../testing/selftests/powerpc/copyloops/validate.c | 99
> ++++++++++++++++++++++
> tools/testing/selftests/powerpc/utils.h | 3 + 11 files
> changed, 224 insertions(+), 1 deletion(-) create mode 100644
> tools/testing/selftests/powerpc/copyloops/Makefile create mode 100644
> tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h create mode
> 100644 tools/testing/selftests/powerpc/copyloops/asm/processor.h
> create mode 120000
> tools/testing/selftests/powerpc/copyloops/copyuser_64.S create mode
> 120000 tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
> create mode 120000
> tools/testing/selftests/powerpc/copyloops/memcpy_64.S create mode
> 120000 tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
> create mode 100644
> tools/testing/selftests/powerpc/copyloops/validate.c
>
> diff --git a/arch/powerpc/lib/memcpy_64.S
> b/arch/powerpc/lib/memcpy_64.S index d2bbbc8..72ad055 100644
> --- a/arch/powerpc/lib/memcpy_64.S
> +++ b/arch/powerpc/lib/memcpy_64.S
> @@ -14,7 +14,9 @@ _GLOBAL(memcpy)
> BEGIN_FTR_SECTION
> std r3,48(r1) /* save destination pointer for
> return value */ FTR_SECTION_ELSE
> +#ifndef SELFTEST
> b memcpy_power7
> +#endif
> ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
> PPC_MTOCRF(0x01,r5)
> cmpldi cr1,r5,16
> diff --git a/tools/testing/selftests/powerpc/Makefile
> b/tools/testing/selftests/powerpc/Makefile index bd24ae5..316194f
> 100644 --- a/tools/testing/selftests/powerpc/Makefile
> +++ b/tools/testing/selftests/powerpc/Makefile
> @@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror
> -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR
> export CC CFLAGS
>
> -TARGETS = pmu
> +TARGETS = pmu copyloops
>
> endif
>
> diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile
> b/tools/testing/selftests/powerpc/copyloops/Makefile new file mode
> 100644 index 0000000..6f2d3be
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/Makefile
> @@ -0,0 +1,29 @@
> +# The loops are all 64-bit code
> +CFLAGS += -m64
> +CFLAGS += -I$(CURDIR)
> +CFLAGS += -D SELFTEST
> +
> +# Use our CFLAGS for the implicit .S rule
> +ASFLAGS = $(CFLAGS)
> +
> +PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
> +EXTRA_SOURCES := validate.c ../harness.c
> +
> +all: $(PROGS)
> +
> +copyuser_64: CPPFLAGS += -D
> COPY_LOOP=test___copy_tofrom_user_base +copyuser_power7: CPPFLAGS +=
> -D COPY_LOOP=test___copy_tofrom_user_power7 +memcpy_64:
> CPPFLAGS += -D COPY_LOOP=test_memcpy +memcpy_power7: CPPFLAGS += -D
> COPY_LOOP=test_memcpy_power7 +
> +$(PROGS): $(EXTRA_SOURCES)
> +
> +run_tests: all
> + @-for PROG in $(PROGS); do \
> + ./$$PROG; \
> + done;
> +
> +clean:
> + rm -f $(PROGS) *.o
> +
> +.PHONY: all run_tests clean
> diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
> b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h new file
> mode 100644 index 0000000..ccd9c84
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
> @@ -0,0 +1,86 @@
> +#include <ppc-asm.h>
> +
> +#define CONFIG_ALTIVEC
> +
> +#define r1 1
> +
> +#define vr0 0
> +#define vr1 1
> +#define vr2 2
> +#define vr3 3
> +#define vr4 4
> +#define vr5 5
> +#define vr6 6
> +#define vr7 7
> +#define vr8 8
> +#define vr9 9
> +#define vr10 10
> +#define vr11 11
> +#define vr12 12
> +#define vr13 13
> +#define vr14 14
> +#define vr15 15
> +#define vr16 16
> +#define vr17 17
> +#define vr18 18
> +#define vr19 19
> +#define vr20 20
> +#define vr21 21
> +#define vr22 22
> +#define vr23 23
> +#define vr24 24
> +#define vr25 25
> +#define vr26 26
> +#define vr27 27
> +#define vr28 28
> +#define vr29 29
> +#define vr30 30
> +#define vr31 31
> +
> +#define R14 r14
> +#define R15 r15
> +#define R16 r16
> +#define R17 r17
> +#define R18 r18
> +#define R19 r19
> +#define R20 r20
> +#define R21 r21
> +#define R22 r22
> +
> +#define STACKFRAMESIZE 256
> +#define STK_PARAM(i) (48 + ((i)-3)*8)
> +#define STK_REG(i) (112 + ((i)-14)*8)
> +
> +#define _GLOBAL(A) FUNC_START(test_ ## A)
> +
> +#define PPC_MTOCRF(A, B) mtocrf A, B
> +
> +FUNC_START(enter_vmx_usercopy)
> + li r3,1
> + blr
> +
> +FUNC_START(exit_vmx_usercopy)
> + li r3,0
> + blr
> +
> +FUNC_START(enter_vmx_copy)
> + li r3,1
> + blr
> +
> +FUNC_START(exit_vmx_copy)
> + blr
> +
> +FUNC_START(memcpy_power7)
> + blr
> +
> +FUNC_START(__copy_tofrom_user_power7)
> + blr
> +
> +FUNC_START(__copy_tofrom_user_base)
> + blr
> +
> +#define BEGIN_FTR_SECTION
> +#define FTR_SECTION_ELSE
> +#define ALT_FTR_SECTION_END_IFCLR(x)
> +#define ALT_FTR_SECTION_END(x, y)
> +#define END_FTR_SECTION_IFCLR(x)
> diff --git
> a/tools/testing/selftests/powerpc/copyloops/asm/processor.h
> b/tools/testing/selftests/powerpc/copyloops/asm/processor.h new file
> mode 100644 index 0000000..e69de29 diff --git
> a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
> b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S new file
> mode 120000 index 0000000..f1c418a --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/copyuser_64.S
> \ No newline at end of file
> diff --git
> a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
> b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S new
> file mode 120000 index 0000000..4786895 --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/copyuser_power7.S
> \ No newline at end of file
> diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
> b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S new file mode
> 120000 index 0000000..cce33fb
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/memcpy_64.S
> \ No newline at end of file
> diff --git
> a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
> b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S new file
> mode 120000 index 0000000..0d6fbfa --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
> @@ -0,0 +1 @@
> +../../../../../arch/powerpc/lib/memcpy_power7.S
> \ No newline at end of file
> diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c
> b/tools/testing/selftests/powerpc/copyloops/validate.c new file mode
> 100644 index 0000000..1750ff5
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/copyloops/validate.c
> @@ -0,0 +1,99 @@
> +#include <malloc.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +
> +#include "../utils.h"
> +
> +#define MAX_LEN 8192
> +#define MAX_OFFSET 16
> +#define MIN_REDZONE 128
> +#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
> +#define POISON 0xa5
> +
> +unsigned long COPY_LOOP(void *to, const void *from, unsigned long
> size); +
> +static void do_one(char *src, char *dst, unsigned long src_off,
> + unsigned long dst_off, unsigned long len, void
> *redzone,
> + void *fill)
> +{
> + char *srcp, *dstp;
> + unsigned long ret;
> + unsigned long i;
> +
> + srcp = src + MIN_REDZONE + src_off;
> + dstp = dst + MIN_REDZONE + dst_off;
> +
> + memset(src, POISON, BUFLEN);
> + memset(dst, POISON, BUFLEN);
> + memcpy(srcp, fill, len);
> +
> + ret = COPY_LOOP(dstp, srcp, len);
> + if (ret && ret != (unsigned long)dstp) {
> + printf("(%p,%p,%ld) returned %ld\n", dstp, srcp,
> len, ret);
> + abort();
> + }
> +
> + if (memcmp(dstp, srcp, len)) {
> + printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
> + printf("src: ");
> + for (i = 0; i < len; i++)
> + printf("%02x ", srcp[i]);
> + printf("\ndst: ");
> + for (i = 0; i < len; i++)
> + printf("%02x ", dstp[i]);
> + printf("\n");
> + abort();
> + }
> +
> + if (memcmp(dst, redzone, dstp - dst)) {
> + printf("(%p,%p,%ld) redzone before corrupted\n",
> + dstp, srcp, len);
> + abort();
> + }
> +
> + if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
> + printf("(%p,%p,%ld) redzone after corrupted\n",
> + dstp, srcp, len);
> + abort();
> + }
> +}
> +
> +int test_copy_loop(void)
> +{
> + char *src, *dst, *redzone, *fill;
> + unsigned long len, src_off, dst_off;
> + unsigned long i;
> +
> + src = memalign(BUFLEN, BUFLEN);
> + dst = memalign(BUFLEN, BUFLEN);
> + redzone = malloc(BUFLEN);
> + fill = malloc(BUFLEN);
> +
> + if (!src || !dst || !redzone || !fill) {
> + fprintf(stderr, "malloc failed\n");
> + exit(1);
> + }
> +
> + memset(redzone, POISON, BUFLEN);
> +
> + /* Fill with sequential bytes */
> + for (i = 0; i < BUFLEN; i++)
> + fill[i] = i & 0xff;
> +
> + for (len = 1; len < MAX_LEN; len++) {
> + for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
> + for (dst_off = 0; dst_off < MAX_OFFSET;
> dst_off++) {
> + do_one(src, dst, src_off, dst_off,
> len,
> + redzone, fill);
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +int main(void)
> +{
> + return test_harness(test_copy_loop, str(COPY_LOOP));
> +}
> diff --git a/tools/testing/selftests/powerpc/utils.h
> b/tools/testing/selftests/powerpc/utils.h index 5851c4b..0de0644
> 100644 --- a/tools/testing/selftests/powerpc/utils.h
> +++ b/tools/testing/selftests/powerpc/utils.h
> @@ -31,4 +31,7 @@ do
> {
> \ } \ } while
> (0)
> +#define _str(s) #s
> +#define str(s) _str(s)
> +
> #endif /* _SELFTESTS_POWERPC_UTILS_H */
^ permalink raw reply
* [PATCH] ibmveth: Fix more little endian issues
From: Anton Blanchard @ 2013-12-24 1:55 UTC (permalink / raw)
To: Joe Perches
Cc: Dinar Valeev, linuxppc-dev, Alexander Graf, netdev, Santiago Leon
In-Reply-To: <1387810329.22671.66.camel@joe-AO722>
The hypervisor expects MAC addresses passed in registers to be big
endian u64. Create a helper function called ibmveth_encode_mac_addr
which does the right thing in both big and little endian.
We were storing the MAC address in a long in struct ibmveth_adapter.
It's never used so remove it - we don't need another place in the
driver where we create endian issues with MAC addresses.
Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Alexander Graf <agraf@suse.de>
---
v2: annotate with __be64 as suggested by Joe
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 952d795..bb9a631 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -497,7 +497,7 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
}
static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
- union ibmveth_buf_desc rxq_desc, u64 mac_address)
+ union ibmveth_buf_desc rxq_desc, __be64 mac_address)
{
int rc, try_again = 1;
@@ -523,10 +523,20 @@ retry:
return rc;
}
+/* The hypervisor expects MAC addresses passed in registers to be
+ * big endian u64.
+ */
+static __be64 ibmveth_encode_mac_addr(char *mac)
+{
+ unsigned long encoded = 0;
+ memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
+ return cpu_to_be64(encoded);
+}
+
static int ibmveth_open(struct net_device *netdev)
{
struct ibmveth_adapter *adapter = netdev_priv(netdev);
- u64 mac_address = 0;
+ __be64 mac_address = 0;
int rxq_entries = 1;
unsigned long lpar_rc;
int rc;
@@ -580,8 +590,7 @@ static int ibmveth_open(struct net_device *netdev)
adapter->rx_queue.num_slots = rxq_entries;
adapter->rx_queue.toggle = 1;
- memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
- mac_address = mac_address >> 16;
+ mac_address = ibmveth_encode_mac_addr(netdev->dev_addr);
rxq_desc.fields.flags_len = IBMVETH_BUF_VALID |
adapter->rx_queue.queue_len;
@@ -1184,8 +1193,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
/* add the addresses to the filter table */
netdev_for_each_mc_addr(ha, netdev) {
/* add the multicast address to the filter table */
- unsigned long mcast_addr = 0;
- memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN);
+ __be64 mcast_addr;
+ mcast_addr = ibmveth_encode_mac_addr(ha->addr);
lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
IbmVethMcastAddFilter,
mcast_addr);
@@ -1369,9 +1378,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
- adapter->mac_addr = 0;
- memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN);
-
netdev->irq = dev->irq;
netdev->netdev_ops = &ibmveth_netdev_ops;
netdev->ethtool_ops = &netdev_ethtool_ops;
@@ -1380,7 +1386,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
netdev->features |= netdev->hw_features;
- memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
+ memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 84066ba..2c636cb 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -139,7 +139,6 @@ struct ibmveth_adapter {
struct napi_struct napi;
struct net_device_stats stats;
unsigned int mcastFilterSize;
- unsigned long mac_addr;
void * buffer_list_addr;
void * filter_list_addr;
dma_addr_t buffer_list_dma;
^ permalink raw reply related
* Re: [PATCH V4 08/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
From: Michael Ellerman @ 2013-12-24 3:29 UTC (permalink / raw)
To: Anshuman Khandual
Cc: mikey, ak, linux-kernel, eranian, linuxppc-dev, acme, sukadev,
mingo
In-Reply-To: <52B42394.4060705@linux.vnet.ibm.com>
On Fri, 2013-12-20 at 16:31 +0530, Anshuman Khandual wrote:
> On 12/09/2013 11:51 AM, Michael Ellerman wrote:
> > On Wed, 2013-04-12 at 10:32:40 UTC, Anshuman Khandual wrote:
> >> +
> >> + if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL) {
> >> + /* XL-form instruction */
> >> + if (instr_is_branch_xlform(*addr)) {
> >> +
> >> + /* LR should be set */
> >> + if (is_branch_link_set(*addr)) {
> >> + /*
> >> + * Conditional and unconditional
> >> + * branch to CTR.
> >> + */
> >> + if (is_xlform_ctr(*addr))
> >> + result = true;
> >> +
> >> + /*
> >> + * Conditional and unconditional
> >> + * branch to LR.
> >> + */
> >> + if (is_xlform_lr(*addr))
> >> + result = true;
> >> +
> >> + /*
> >> + * Conditional and unconditional
> >> + * branch to TAR.
> >> + */
> >> + if (is_xlform_tar(*addr))
> >> + result = true;
> >
> > What other kind of XL-Form branch is there?
>
> I am not sure. Do you know of any ?
That was my point. There are no other types, so you can just do:
if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL)
if (instr_is_branch_xlform(*addr) && is_branch_link_set(*addr))
return true;
> >> + if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_COND) {
> >> +
> >> + /* I-form instruction - excluded */
> >> + if (instr_is_branch_iform(*addr))
> >> + goto out;
> >> +
> >> + /* B-form or XL-form instruction */
> >> + if (instr_is_branch_bform(*addr) || instr_is_branch_xlform(*addr)) {
> >> +
> >> + /* Not branch always */
> >> + if (!is_bo_always(*addr)) {
> >> +
> >> + /* Conditional branch to CTR register */
> >> + if (is_bo_ctr(*addr))
> >> + goto out;
> >
> > We might have discussed this but why not?
>
> Did not get that, discuss what ?
Why are we saying a conditional branch to the CTR is not a conditional branch?
It is conditional, so I think it should be included.
> >> +
> >> + /* CR[BI] conditional branch with static hint */
> >
> > A conditional branch with a static hint is still a conditional branch?
>
> No its not.
Yes it is?
In fact they could be very interesting branches. Because the compiler or
programmer has statically hinted them, if the hint is wrong they may be a major
source of branch midpredicts.
> >> + if (is_bo_crbi_off(*addr) || is_bo_crbi_on(*addr)) {
> >> + if (is_bo_crbi_hint(*addr))
> >> + goto out;
> >> + }
> >> +
> >> + result = true;
> >> + }
> >> + }
> >> + }
> >> +out:
> >> + return result;
> >> +}
> >> + } else {
> >> + /*
> >> + * Userspace address needs to be
> >> + * copied first before analysis.
> >> + */
> >> + pagefault_disable();
> >> + ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
> >
> > I suspect you borrowed this incantation from the callchain code. Unlike that
> > code you don't fallback to reading the page tables directly.
> >
> > I'd rather see the accessor in the callchain code made generic and have you
> > call it here.
>
> You have mentioned to take care of this issue yourself.
Yes I will.
cheers
^ permalink raw reply
* Re: [PATCH] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
From: Michael Ellerman @ 2013-12-24 3:34 UTC (permalink / raw)
To: Anton Blanchard; +Cc: paulmck, paulus, linuxppc-dev
In-Reply-To: <20131224120259.454bc44c@kryten>
On Tue, 2013-12-24 at 12:02 +1100, Anton Blanchard wrote:
> Hi Michael,
>
> > > To try and catch any screw ups in our ppc64 memcpy and
> > > copy_tofrom_user loops, I wrote a quick test:
> > >
> > > http://ozlabs.org/~anton/junkcode/validate_kernel_copyloops.tar.gz
> >
> > Nice! How's this look?
>
> Love it!
Cool, I'll add your Signed-off-by and resubmit.
> At the moment my other copy_to/from_user tests run against the kernel
> (testing we copy all data right up to a page fault and that we return
> the correct number of bytes not copied etc). A small signal handler
> that walks the exception entries and branches to the handler should be
> all it takes to do it completely in userspace.
That'd be nice. Are they in your junkcode? I couldn't spot them at a glance.
cheers
^ permalink raw reply
* Re: [PATCH V4 08/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
From: Anshuman Khandual @ 2013-12-24 3:50 UTC (permalink / raw)
To: Michael Ellerman
Cc: mikey, ak, linux-kernel, eranian, linuxppc-dev, acme, sukadev,
mingo
In-Reply-To: <1387855790.15093.1.camel@concordia>
On 12/24/2013 08:59 AM, Michael Ellerman wrote:
> On Fri, 2013-12-20 at 16:31 +0530, Anshuman Khandual wrote:
>> On 12/09/2013 11:51 AM, Michael Ellerman wrote:
>>> On Wed, 2013-04-12 at 10:32:40 UTC, Anshuman Khandual wrote:
>>>> +
>>>> + if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL) {
>>>> + /* XL-form instruction */
>>>> + if (instr_is_branch_xlform(*addr)) {
>>>> +
>>>> + /* LR should be set */
>>>> + if (is_branch_link_set(*addr)) {
>>>> + /*
>>>> + * Conditional and unconditional
>>>> + * branch to CTR.
>>>> + */
>>>> + if (is_xlform_ctr(*addr))
>>>> + result = true;
>>>> +
>>>> + /*
>>>> + * Conditional and unconditional
>>>> + * branch to LR.
>>>> + */
>>>> + if (is_xlform_lr(*addr))
>>>> + result = true;
>>>> +
>>>> + /*
>>>> + * Conditional and unconditional
>>>> + * branch to TAR.
>>>> + */
>>>> + if (is_xlform_tar(*addr))
>>>> + result = true;
>>>
>>> What other kind of XL-Form branch is there?
>>
>> I am not sure. Do you know of any ?
>
> That was my point. There are no other types, so you can just do:
>
> if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_IND_CALL)
> if (instr_is_branch_xlform(*addr) && is_branch_link_set(*addr))
> return true;
>
Done
>>>> + if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_COND) {
>>>> +
>>>> + /* I-form instruction - excluded */
>>>> + if (instr_is_branch_iform(*addr))
>>>> + goto out;
>>>> +
>>>> + /* B-form or XL-form instruction */
>>>> + if (instr_is_branch_bform(*addr) || instr_is_branch_xlform(*addr)) {
>>>> +
>>>> + /* Not branch always */
>>>> + if (!is_bo_always(*addr)) {
>>>> +
>>>> + /* Conditional branch to CTR register */
>>>> + if (is_bo_ctr(*addr))
>>>> + goto out;
>>>
>>> We might have discussed this but why not?
>>
>> Did not get that, discuss what ?
>
> Why are we saying a conditional branch to the CTR is not a conditional branch?
>
> It is conditional, so I think it should be included.
>
I believe conditional branch to CTR register and the below conditional branch
with static hint are excluded when processed with BHRB PMU based filter IFM3,
Here the SW implemented filter try to match those exclusions, so that a user
should not see any difference in results whether the filter is processed
either in PMU or in SW.
>>>> +
>>>> + /* CR[BI] conditional branch with static hint */
>>>
>>> A conditional branch with a static hint is still a conditional branch?
>>
>> No its not.
>
> Yes it is?
>
> In fact they could be very interesting branches. Because the compiler or
> programmer has statically hinted them, if the hint is wrong they may be a major
> source of branch midpredicts.
>
>
>>>> + if (is_bo_crbi_off(*addr) || is_bo_crbi_on(*addr)) {
>>>> + if (is_bo_crbi_hint(*addr))
>>>> + goto out;
>>>> + }
>>>> +
>>>> + result = true;
>>>> + }
>>>> + }
>>>> + }
>>>> +out:
>>>> + return result;
>>>> +}
>
>>>> + } else {
>>>> + /*
>>>> + * Userspace address needs to be
>>>> + * copied first before analysis.
>>>> + */
>>>> + pagefault_disable();
>>>> + ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
>>>
>>> I suspect you borrowed this incantation from the callchain code. Unlike that
>>> code you don't fallback to reading the page tables directly.
>>>
>>> I'd rather see the accessor in the callchain code made generic and have you
>>> call it here.
>>
>> You have mentioned to take care of this issue yourself.
>
> Yes I will.
Thanks !!
^ permalink raw reply
* Re: [PATCH V4 08/10] powerpc, perf: Enable SW filtering in branch stack sampling framework
From: Michael Ellerman @ 2013-12-24 4:35 UTC (permalink / raw)
To: Anshuman Khandual
Cc: mikey, ak, linux-kernel, eranian, linuxppc-dev, acme, sukadev,
mingo
In-Reply-To: <52B9049D.4020403@linux.vnet.ibm.com>
On Tue, 2013-12-24 at 09:20 +0530, Anshuman Khandual wrote:
> On 12/24/2013 08:59 AM, Michael Ellerman wrote:
> > On Fri, 2013-12-20 at 16:31 +0530, Anshuman Khandual wrote:
> >> On 12/09/2013 11:51 AM, Michael Ellerman wrote:
> >>> On Wed, 2013-04-12 at 10:32:40 UTC, Anshuman Khandual wrote:
> >>>> +
>
> >>>> + if (bhrb_sw_filter & PERF_SAMPLE_BRANCH_COND) {
> >>>> +
> >>>> + /* I-form instruction - excluded */
> >>>> + if (instr_is_branch_iform(*addr))
> >>>> + goto out;
> >>>> +
> >>>> + /* B-form or XL-form instruction */
> >>>> + if (instr_is_branch_bform(*addr) || instr_is_branch_xlform(*addr)) {
> >>>> +
> >>>> + /* Not branch always */
> >>>> + if (!is_bo_always(*addr)) {
> >>>> +
> >>>> + /* Conditional branch to CTR register */
> >>>> + if (is_bo_ctr(*addr))
> >>>> + goto out;
> >>>
> >>> We might have discussed this but why not?
> >>
> >> Did not get that, discuss what ?
> >
> > Why are we saying a conditional branch to the CTR is not a conditional branch?
> >
> > It is conditional, so I think it should be included.
> I believe conditional branch to CTR register and the below conditional branch
> with static hint are excluded when processed with BHRB PMU based filter IFM3,
> Here the SW implemented filter try to match those exclusions, so that a user
> should not see any difference in results whether the filter is processed
> either in PMU or in SW.
OK. That's what I meant by "we might have discussed this".
So you need to make it very clear in the code that we are implementing the IFM3
semantics, with a comment. Otherwise it's not obviously clear why those
semantics make sense.
And we need to make extra sure we implement the same semantics as IFM3, which I
don't think you do at the moment.
The description for IFM3 is:
Do not record:
* b and bl instructions,
* bc and bcl instructions for which the BO field indicates “Branch always.”
For bclr, bclrl, bctr, bctrl, bctar, and bctarl instructions for which
the BO field indicates “Branch always,” record only one entry
containing the Branch target address.
So I don't think your SW filter implements that part correctly. You are
discarding all branches with "branch always" set.
Do not record:
* Branch instructions for which BO[0]=1,
This is what excludes branches to CTR. But, it's only branches to CTR that
don't also depend on CR[BI] - we need to make that clear in the code.
* Branch instructions for which the “a” bit in the BO field is set to 1.
So that's the is_bo_crbi_hint() check and rejection, but it's not related to
CR[BI] at all.
There's a note about CR[BI]:
Do not record instructions that do not depend on the value of CR[BI].
But I think you've misinterpreted that.
Do not record instructions that do not depend on the value of CR[BI].
Do record instructions that depend on the value of CR[BI].
In fact the only branches that don't depend on CR[BI] are "branch always"
branches, and branches with BO[0]=1, both of which were handled above.
cheers
^ permalink raw reply
* Re: [PATCH] ibmveth: Fix more little endian issues
From: Benjamin Herrenschmidt @ 2013-12-24 4:37 UTC (permalink / raw)
To: Joe Perches
Cc: netdev, Dinar Valeev, linuxppc-dev, Alexander Graf,
Anton Blanchard, Santiago Leon
In-Reply-To: <1387810329.22671.66.camel@joe-AO722>
On Mon, 2013-12-23 at 06:52 -0800, Joe Perches wrote:
> On Mon, 2013-12-23 at 17:38 +1100, Anton Blanchard wrote:
> > The hypervisor expects MAC addresses passed in registers to be big
> > endian u64.
>
> So maybe use __be64 declarations?
>
> > +static unsigned long ibmveth_encode_mac_addr(char *mac)
>
> static __be64 ibmveth_encode_mac_addr(const char *mac)
A register value has no endianness. Only memory content does. Especially
talking of a MAC address which is really a byte stream.... (Yes, our
__beXX types used without a * are borderline, but we've got used to it).
In fact I find the use of
memcpy(((char *)&encoded) + 2, mac, ETH_ALEN);
Really gross :-) Yes it works with the added cpu_to_be64() but in that
specific case, I think it would be nicer to simply load & shift into
position the 6 bytes and avoid the endianness issue completely.
Cheers,
Ben.
^ permalink raw reply
* Re: [PATCH] iommu: Add empty stub for iommu_group_get_by_id()
From: Alexey Kardashevskiy @ 2013-12-24 5:10 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Joerg Roedel, linux-kernel
In-Reply-To: <1385016074-17026-1-git-send-email-aik@ozlabs.ru>
On 11/21/2013 05:41 PM, Alexey Kardashevskiy wrote:
> Almost every function in include/linux/iommu.h has an empty stub
> but the iommu_group_get_by_id() did not get one by mistake.
>
> This adds an empty stub for iommu_group_get_by_id() for IOMMU_API
> disabled config.
Ping?
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> ---
> include/linux/iommu.h | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 7ea319e..3c7903d 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -245,6 +245,11 @@ static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
> return NULL;
> }
>
> +static inline struct iommu_group *iommu_group_get_by_id(int id)
> +{
> + return NULL;
> +}
> +
> static inline void iommu_domain_free(struct iommu_domain *domain)
> {
> }
>
--
Alexey
^ permalink raw reply
* [PATCH v4 00/10] powerpc: enable the relocatable support for fsl booke 32bit kernel
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
v4:
- Fix the bug when booting above 64M.
- Rebase onto v3.13-rc5
- Pass the following test on a p5020ds board:
boot kernel at 0x5000000 and 0x9000000
kdump test with kernel option "crashkernel=64M@80M"
v3:
The main changes include:
* Drop the patch 5 in v2 (memblock: introduce the memblock_reinit function)
* Change to use the 64M boot init tlb.
Please refer to the comment section of each patch for more detail.
This patch series passed the kdump test with kernel option "crashkernel=64M@32M"
and "crashkernel=64M@80M" on a p2020rdb board.
v2:
These patches are based on the Ben's next branch. In this version we choose
to do a second relocation if the PAGE_OFFSET is not mapped to the memstart_addr
and we also choose to set the tlb1 entries for the kernel space in address
space 1. With this implementation:
* We can load the kernel at any place between
memstart_addr ~ memstart_addr + 768M
* We can reserve any memory between memstart_addr ~ memstart_addr + 768M
for a kdump kernel.
I have done a kdump boot on a p2020rdb kernel with the memory reserved by
'crashkernel=32M@320M'.
v1:
Currently the fsl booke 32bit kernel is using the DYNAMIC_MEMSTART relocation
method. But the RELOCATABLE method is more flexible and has less alignment
restriction. So enable this feature on this platform and use it by
default for the kdump kernel.
These patches have passed the kdump boot test on a p2020rdb board.
---
Kevin Hao (10):
powerpc/fsl_booke: protect the access to MAS7
powerpc/fsl_booke: introduce get_phys_addr function
powerpc: introduce macro LOAD_REG_ADDR_PIC
powerpc: enable the relocatable support for the fsl booke 32bit kernel
powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
powerpc: introduce early_get_first_memblock_info
powerpc/fsl_booke: introduce map_mem_in_cams_addr
powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for
relocatable kernel
powerpc/fsl_booke: smp support for booting a relocatable kernel above
64M
powerpc/fsl_booke: enable the relocatable for the kdump kernel
arch/powerpc/Kconfig | 5 +-
arch/powerpc/include/asm/ppc_asm.h | 13 ++
arch/powerpc/kernel/fsl_booke_entry_mapping.S | 2 +
arch/powerpc/kernel/head_fsl_booke.S | 266 +++++++++++++++++++++++---
arch/powerpc/kernel/prom.c | 41 +++-
arch/powerpc/mm/fsl_booke_mmu.c | 72 ++++++-
arch/powerpc/mm/hugetlbpage-book3e.c | 3 +-
arch/powerpc/mm/mmu_decl.h | 2 +
arch/powerpc/mm/tlb_nohash_low.S | 4 +-
include/linux/of_fdt.h | 1 +
10 files changed, 370 insertions(+), 39 deletions(-)
--
1.8.3.1
^ permalink raw reply
* [PATCH v4 01/10] powerpc/fsl_booke: protect the access to MAS7
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
The e500v1 doesn't implement the MAS7, so we should avoid to access
this register on that implementations. In the current kernel, the
access to MAS7 are protected by either CONFIG_PHYS_64BIT or
MMU_FTR_BIG_PHYS. Since some code are executed before the code
patching, we have to use CONFIG_PHYS_64BIT in these cases.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.
v3: Use ifdef CONFIG_PHYS_64BIT for the code running before code patching.
v2: A new patch in v2.
arch/powerpc/kernel/head_fsl_booke.S | 2 ++
arch/powerpc/mm/hugetlbpage-book3e.c | 3 ++-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index f45726a1d963..09921a5197c6 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -82,7 +82,9 @@ _ENTRY(_start);
and r19,r3,r18 /* r19 = page offset */
andc r31,r20,r18 /* r31 = page base */
or r31,r31,r19 /* r31 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
mfspr r30,SPRN_MAS7
+#endif
li r25,0 /* phys kernel start (low) */
li r24,0 /* CPU number */
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 74551b5e41e5..646c4bffaeba 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -103,7 +103,8 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
mtspr(SPRN_MAS7_MAS3, mas7_3);
} else {
- mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+ if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+ mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 02/10] powerpc/fsl_booke: introduce get_phys_addr function
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
Move the codes which translate a effective address to physical address
to a separate function. So it can be reused by other code.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.
v3: Use ifdef CONFIG_PHYS_64BIT to protect the access to MAS7
v2: A new patch in v2.
arch/powerpc/kernel/head_fsl_booke.S | 50 +++++++++++++++++++++---------------
1 file changed, 30 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 09921a5197c6..196950f29c00 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -65,26 +65,9 @@ _ENTRY(_start);
nop
/* Translate device tree address to physical, save in r30/r31 */
- mfmsr r16
- mfspr r17,SPRN_PID
- rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */
- rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
- mtspr SPRN_MAS6,r17
-
- tlbsx 0,r3 /* must succeed */
-
- mfspr r16,SPRN_MAS1
- mfspr r20,SPRN_MAS3
- rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */
- li r18,1024
- slw r18,r18,r17 /* r18 = page size */
- addi r18,r18,-1
- and r19,r3,r18 /* r19 = page offset */
- andc r31,r20,r18 /* r31 = page base */
- or r31,r31,r19 /* r31 = devtree phys addr */
-#ifdef CONFIG_PHYS_64BIT
- mfspr r30,SPRN_MAS7
-#endif
+ bl get_phys_addr
+ mr r30,r3
+ mr r31,r4
li r25,0 /* phys kernel start (low) */
li r24,0 /* CPU number */
@@ -858,6 +841,33 @@ KernelSPE:
#endif /* CONFIG_SPE */
/*
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
+ */
+get_phys_addr:
+ mfmsr r8
+ mfspr r9,SPRN_PID
+ rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
+ mtspr SPRN_MAS6,r9
+
+ tlbsx 0,r3 /* must succeed */
+
+ mfspr r8,SPRN_MAS1
+ mfspr r12,SPRN_MAS3
+ rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */
+ li r10,1024
+ slw r10,r10,r9 /* r10 = page size */
+ addi r10,r10,-1
+ and r11,r3,r10 /* r11 = page offset */
+ andc r4,r12,r10 /* r4 = page base */
+ or r4,r4,r11 /* r4 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r3,SPRN_MAS7
+#endif
+ blr
+
+/*
* Global functions
*/
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 03/10] powerpc: introduce macro LOAD_REG_ADDR_PIC
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
This is used to get the address of a variable when the kernel is not
running at the linked or relocated address.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: A new patch in v4.
arch/powerpc/include/asm/ppc_asm.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index f595b98079ee..1279c59624ed 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -295,6 +295,11 @@ n:
* you want to access various offsets within it). On ppc32 this is
* identical to LOAD_REG_IMMEDIATE.
*
+ * LOAD_REG_ADDR_PIC(rn, name)
+ * Loads the address of label 'name' into register 'run'. Use this when
+ * the kernel doesn't run at the linked or relocated address. Please
+ * note that this macro will clobber the lr register.
+ *
* LOAD_REG_ADDRBASE(rn, name)
* ADDROFF(name)
* LOAD_REG_ADDRBASE loads part of the address of label 'name' into
@@ -305,6 +310,14 @@ n:
* LOAD_REG_ADDRBASE(rX, name)
* ld rY,ADDROFF(name)(rX)
*/
+
+/* Be careful, this will clobber the lr register. */
+#define LOAD_REG_ADDR_PIC(reg, name) \
+ bl 0f; \
+0: mflr reg; \
+ addis reg,reg,(name - 0b)@ha; \
+ addi reg,reg,(name - 0b)@l;
+
#ifdef __powerpc64__
#define LOAD_REG_IMMEDIATE(reg,expr) \
lis reg,(expr)@highest; \
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 04/10] powerpc: enable the relocatable support for the fsl booke 32bit kernel
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
This is based on the codes in the head_44x.S. The difference is that
the init tlb size we used is 64M. With this patch we can only load the
kernel at address between memstart_addr ~ memstart_addr + 64M. We will
fix this restriction in the following patches.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: Use macro LOAD_REG_ADDR_PIC.
v3:
* Use the 64M align.
* typo fix.
v2: Move the code to set kernstart_addr and virt_phys_offset to a c function.
So we can expand it easily later.
arch/powerpc/Kconfig | 2 +-
arch/powerpc/kernel/fsl_booke_entry_mapping.S | 2 ++
arch/powerpc/kernel/head_fsl_booke.S | 34 +++++++++++++++++++++++++++
arch/powerpc/mm/fsl_booke_mmu.c | 28 ++++++++++++++++++++++
4 files changed, 65 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b44b52c0a8f0..f5b464c41117 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -881,7 +881,7 @@ config DYNAMIC_MEMSTART
config RELOCATABLE
bool "Build a relocatable kernel"
- depends on ADVANCED_OPTIONS && FLATMEM && 44x
+ depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE)
select NONSTATIC_KERNEL
help
This builds a kernel image that is capable of running at the
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79be2728..f22e7e44fbf3 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -176,6 +176,8 @@ skpinv: addi r6,r6,1 /* Increment */
/* 7. Jump to KERNELBASE mapping */
lis r6,(KERNELBASE & ~0xfff)@h
ori r6,r6,(KERNELBASE & ~0xfff)@l
+ rlwinm r7,r25,0,0x03ffffff
+ add r6,r7,r6
#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
/*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 196950f29c00..19bd574bda9d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -73,6 +73,30 @@ _ENTRY(_start);
li r24,0 /* CPU number */
li r23,0 /* phys kernel start (high) */
+#ifdef CONFIG_RELOCATABLE
+ LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */
+
+ /* Translate _stext address to physical, save in r23/r25 */
+ bl get_phys_addr
+ mr r23,r3
+ mr r25,r4
+
+ /*
+ * We have the runtime (virutal) address of our base.
+ * We calculate our shift of offset from a 64M page.
+ * We could map the 64M page we belong to at PAGE_OFFSET and
+ * get going from there.
+ */
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */
+ rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */
+ subf r3,r5,r6 /* r3 = r6 - r5 */
+ add r3,r4,r3 /* Required Virtual Address */
+
+ bl relocate
+#endif
+
/* We try to not make any assumptions about how the boot loader
* setup or used the TLBs. We invalidate all mappings from the
* boot loader and load a single entry in TLB1[0] to map the
@@ -182,6 +206,16 @@ _ENTRY(__early_start)
bl early_init
+#ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_PHYS_64BIT
+ mr r3,r23
+ mr r4,r25
+#else
+ mr r3,r25
+#endif
+ bl relocate_init
+#endif
+
#ifdef CONFIG_DYNAMIC_MEMSTART
lis r3,kernstart_addr@ha
la r3,kernstart_addr@l(r3)
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 07ba45b0f07c..ce4a1163ddd3 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -241,4 +241,32 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
/* 64M mapped initially according to head_fsl_booke.S */
memblock_set_current_limit(min_t(u64, limit, 0x04000000));
}
+
+#ifdef CONFIG_RELOCATABLE
+notrace void __init relocate_init(phys_addr_t start)
+{
+ unsigned long base = KERNELBASE;
+
+ /*
+ * Relocatable kernel support based on processing of dynamic
+ * relocation entries.
+ * Compute the virt_phys_offset :
+ * virt_phys_offset = stext.run - kernstart_addr
+ *
+ * stext.run = (KERNELBASE & ~0x3ffffff) + (kernstart_addr & 0x3ffffff)
+ * When we relocate, we have :
+ *
+ * (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
+ *
+ * hence:
+ * virt_phys_offset = (KERNELBASE & ~0x3ffffff) -
+ * (kernstart_addr & ~0x3ffffff)
+ *
+ */
+ kernstart_addr = start;
+ start &= ~0x3ffffff;
+ base &= ~0x3ffffff;
+ virt_phys_offset = base - start;
+}
+#endif
#endif
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 05/10] powerpc/fsl_booke: set the tlb entry for the kernel address in AS1
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
We use the tlb1 entries to map low mem to the kernel space. In the
current code, it assumes that the first tlb entry would cover the
kernel image. But this is not true for some special cases, such as
when we run a relocatable kernel above the 64M or set
CONFIG_KERNEL_START above 64M. So we choose to switch to address
space 1 before setting these tlb entries.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.
v3: Typo fix.
v2: A new patch in v2.
arch/powerpc/kernel/head_fsl_booke.S | 81 ++++++++++++++++++++++++++++++++++++
arch/powerpc/mm/fsl_booke_mmu.c | 2 +
arch/powerpc/mm/mmu_decl.h | 2 +
3 files changed, 85 insertions(+)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 19bd574bda9d..75f0223e6d0d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1157,6 +1157,87 @@ __secondary_hold_acknowledge:
#endif
/*
+ * Create a tlb entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+ mflr r5
+
+ /* Find a entry not used */
+ mfspr r3,SPRN_TLB1CFG
+ andi. r3,r3,0xfff
+ mfspr r4,SPRN_PID
+ rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ mtspr SPRN_MAS6,r4
+1: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ addi r3,r3,-1
+ rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r4
+ tlbre
+ mfspr r4,SPRN_MAS1
+ andis. r4,r4,MAS1_VALID@h
+ bne 1b
+
+ /* Get the tlb entry used by the current running code */
+ bl 0f
+0: mflr r4
+ tlbsx 0,r4
+
+ mfspr r4,SPRN_MAS1
+ ori r4,r4,MAS1_TS /* Set the TS = 1 */
+ mtspr SPRN_MAS1,r4
+
+ mfspr r4,SPRN_MAS0
+ rlwinm r4,r4,0,~MAS0_ESEL_MASK
+ rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r4
+ tlbwe
+ isync
+ sync
+
+ mfmsr r4
+ ori r4,r4,MSR_IS | MSR_DS
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r4
+ sync
+ rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+*/
+_GLOBAL(restore_to_as0)
+ mflr r0
+
+ bl 0f
+0: mflr r9
+ addi r9,r9,1f - 0b
+
+ mfmsr r7
+ li r8,(MSR_IS | MSR_DS)
+ andc r7,r7,r8
+
+ mtspr SPRN_SRR0,r9
+ mtspr SPRN_SRR1,r7
+ sync
+ rfi
+
+ /* Invalidate the temporary tlb entry for AS1 */
+1: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r9
+ tlbre
+ mfspr r9,SPRN_MAS1
+ rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */
+ mtspr SPRN_MAS1,r9
+ tlbwe
+ isync
+ mtlr r0
+ blr
+
+/*
* We put a few things here that have to be page-aligned. This stuff
* goes at the beginning of the data segment, which is page-aligned.
*/
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ce4a1163ddd3..1d54f6d35e71 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -222,7 +222,9 @@ void __init adjust_total_lowmem(void)
/* adjust lowmem size to __max_low_memory */
ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
+ i = switch_to_as1();
__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
+ restore_to_as0(i);
pr_info("Memory CAM mapping: ");
for (i = 0; i < tlbcam_index - 1; i++)
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 83eb5d5f53d5..eefbf7bb4331 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -148,6 +148,8 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
extern void MMU_init_hw(void);
extern unsigned long mmu_mapin_ram(unsigned long top);
extern void adjust_total_lowmem(void);
+extern int switch_to_as1(void);
+extern void restore_to_as0(int esel);
#endif
extern void loadcam_entry(unsigned int index);
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 06/10] powerpc: introduce early_get_first_memblock_info
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
For a relocatable kernel since it can be loaded at any place, there
is no any relation between the kernel start addr and the memstart_addr.
So we can't calculate the memstart_addr from kernel start addr. And
also we can't wait to do the relocation after we get the real
memstart_addr from device tree because it is so late. So introduce
a new function we can use to get the first memblock address and size
in a very early stage (before machine_init).
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: No change.
v3: Introduce a variable to avoid to mess the memblock.
v2: A new patch in v2.
arch/powerpc/kernel/prom.c | 41 ++++++++++++++++++++++++++++++++++++++++-
include/linux/of_fdt.h | 1 +
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa0ad8aafbcc..f58c0d3aaeb4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -523,6 +523,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
return early_init_dt_scan_memory(node, uname, depth, data);
}
+/*
+ * For a relocatable kernel, we need to get the memstart_addr first,
+ * then use it to calculate the virtual kernel start address. This has
+ * to happen at a very early stage (before machine_init). In this case,
+ * we just want to get the memstart_address and would not like to mess the
+ * memblock at this stage. So introduce a variable to skip the memblock_add()
+ * for this reason.
+ */
+#ifdef CONFIG_RELOCATABLE
+static int add_mem_to_memblock = 1;
+#else
+#define add_mem_to_memblock 1
+#endif
+
void __init early_init_dt_add_memory_arch(u64 base, u64 size)
{
#ifdef CONFIG_PPC64
@@ -543,7 +557,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
}
/* Add the chunk to the MEMBLOCK list */
- memblock_add(base, size);
+ if (add_mem_to_memblock)
+ memblock_add(base, size);
}
static void __init early_reserve_mem_dt(void)
@@ -740,6 +755,30 @@ void __init early_init_devtree(void *params)
DBG(" <- early_init_devtree()\n");
}
+#ifdef CONFIG_RELOCATABLE
+/*
+ * This function run before early_init_devtree, so we have to init
+ * initial_boot_params.
+ */
+void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
+{
+ /* Setup flat device-tree pointer */
+ initial_boot_params = params;
+
+ /*
+ * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid
+ * mess the memblock.
+ */
+ add_mem_to_memblock = 0;
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+ of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ add_mem_to_memblock = 1;
+
+ if (size)
+ *size = first_memblock_size;
+}
+#endif
+
/*******
*
* New implementation of the OF "find" APIs, return a refcounted
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 0beaee9dac1f..2b77058a7335 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -116,6 +116,7 @@ extern const void *of_flat_dt_match_machine(const void *default_match,
extern void unflatten_device_tree(void);
extern void unflatten_and_copy_device_tree(void);
extern void early_init_devtree(void *);
+extern void early_get_first_memblock_info(void *, phys_addr_t *);
#else /* CONFIG_OF_FLATTREE */
static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
static inline void unflatten_device_tree(void) {}
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 07/10] powerpc/fsl_booke: introduce map_mem_in_cams_addr
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
Introduce this function so we can set both the physical and virtual
address for the map in cams. This will be used by the relocation code.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: A new patch in v4.
arch/powerpc/mm/fsl_booke_mmu.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 1d54f6d35e71..ca956c83e3a2 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -171,11 +171,10 @@ unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
return 1UL << camsize;
}
-unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
+ unsigned long ram, int max_cam_idx)
{
int i;
- unsigned long virt = PAGE_OFFSET;
- phys_addr_t phys = memstart_addr;
unsigned long amount_mapped = 0;
/* Calculate CAM values */
@@ -195,6 +194,14 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
return amount_mapped;
}
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+{
+ unsigned long virt = PAGE_OFFSET;
+ phys_addr_t phys = memstart_addr;
+
+ return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx);
+}
+
#ifdef CONFIG_PPC32
#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 08/10] powerpc/fsl_booke: make sure PAGE_OFFSET map to memstart_addr for relocatable kernel
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
This is always true for a non-relocatable kernel. Otherwise the kernel
would get stuck. But for a relocatable kernel, it seems a little
complicated. When booting a relocatable kernel, we just align the
kernel start addr to 64M and map the PAGE_OFFSET from there. The
relocation will base on this virtual address. But if this address
is not the same as the memstart_addr, we will have to change the
map of PAGE_OFFSET to the real memstart_addr and do another relocation
again.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4:
* Create the correct mem map in cams when booting above 64M.
* Don't skip the init tlb mapping for the second cpu.
v3:
* Typo fix.
* Refactor relocate_init, no function change.
* Map only 64M memory before the second relocation.
* Comments update.
v2: A new patch in v2.
arch/powerpc/kernel/head_fsl_booke.S | 74 +++++++++++++++++++++++++++++++++---
arch/powerpc/mm/fsl_booke_mmu.c | 41 +++++++++++++++++---
arch/powerpc/mm/mmu_decl.h | 2 +-
3 files changed, 105 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 75f0223e6d0d..71e08dfbd1d1 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -81,6 +81,39 @@ _ENTRY(_start);
mr r23,r3
mr r25,r4
+ bl 0f
+0: mflr r8
+ addis r3,r8,(is_second_reloc - 0b)@ha
+ lwz r19,(is_second_reloc - 0b)@l(r3)
+
+ /* Check if this is the second relocation. */
+ cmpwi r19,1
+ bne 1f
+
+ /*
+ * For the second relocation, we already get the real memstart_addr
+ * from device tree. So we will map PAGE_OFFSET to memstart_addr,
+ * then the virtual address of start kernel should be:
+ * PAGE_OFFSET + (kernstart_addr - memstart_addr)
+ * Since the offset between kernstart_addr and memstart_addr should
+ * never be beyond 1G, so we can just use the lower 32bit of them
+ * for the calculation.
+ */
+ lis r3,PAGE_OFFSET@h
+
+ addis r4,r8,(kernstart_addr - 0b)@ha
+ addi r4,r4,(kernstart_addr - 0b)@l
+ lwz r5,4(r4)
+
+ addis r6,r8,(memstart_addr - 0b)@ha
+ addi r6,r6,(memstart_addr - 0b)@l
+ lwz r7,4(r6)
+
+ subf r5,r7,r5
+ add r3,r3,r5
+ b 2f
+
+1:
/*
* We have the runtime (virutal) address of our base.
* We calculate our shift of offset from a 64M page.
@@ -94,7 +127,14 @@ _ENTRY(_start);
subf r3,r5,r6 /* r3 = r6 - r5 */
add r3,r4,r3 /* Required Virtual Address */
- bl relocate
+2: bl relocate
+
+ /*
+ * For the second relocation, we already set the right tlb entries
+ * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+ */
+ cmpwi r19,1
+ beq set_ivor
#endif
/* We try to not make any assumptions about how the boot loader
@@ -122,6 +162,7 @@ _ENTRY(__early_start)
#include "fsl_booke_entry_mapping.S"
#undef ENTRY_MAPPING_BOOT_SETUP
+set_ivor:
/* Establish the interrupt vector offsets */
SET_IVOR(0, CriticalInput);
SET_IVOR(1, MachineCheck);
@@ -207,11 +248,13 @@ _ENTRY(__early_start)
bl early_init
#ifdef CONFIG_RELOCATABLE
+ mr r3,r30
+ mr r4,r31
#ifdef CONFIG_PHYS_64BIT
- mr r3,r23
- mr r4,r25
+ mr r5,r23
+ mr r6,r25
#else
- mr r3,r25
+ mr r5,r25
#endif
bl relocate_init
#endif
@@ -1207,6 +1250,9 @@ _GLOBAL(switch_to_as1)
/*
* Restore to the address space 0 and also invalidate the tlb entry created
* by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS0) - __pa(PAGE_OFFSET in AS1)
+ * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
*/
_GLOBAL(restore_to_as0)
mflr r0
@@ -1215,7 +1261,15 @@ _GLOBAL(restore_to_as0)
0: mflr r9
addi r9,r9,1f - 0b
- mfmsr r7
+ /*
+ * We may map the PAGE_OFFSET in AS0 to a different physical address,
+ * so we need calculate the right jump and device tree address based
+ * on the offset passed by r4.
+ */
+ subf r9,r4,r9
+ subf r5,r4,r5
+
+2: mfmsr r7
li r8,(MSR_IS | MSR_DS)
andc r7,r7,r8
@@ -1234,9 +1288,19 @@ _GLOBAL(restore_to_as0)
mtspr SPRN_MAS1,r9
tlbwe
isync
+
+ cmpwi r4,0
+ bne 3f
mtlr r0
blr
+ /*
+ * The PAGE_OFFSET will map to a different physical address,
+ * jump to _start to do another relocation again.
+ */
+3: mr r3,r5
+ bl _start
+
/*
* We put a few things here that have to be page-aligned. This stuff
* goes at the beginning of the data segment, which is page-aligned.
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ca956c83e3a2..ce0c7d7db6c3 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -231,7 +231,7 @@ void __init adjust_total_lowmem(void)
i = switch_to_as1();
__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
- restore_to_as0(i);
+ restore_to_as0(i, 0, 0);
pr_info("Memory CAM mapping: ");
for (i = 0; i < tlbcam_index - 1; i++)
@@ -252,17 +252,25 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
}
#ifdef CONFIG_RELOCATABLE
-notrace void __init relocate_init(phys_addr_t start)
+int __initdata is_second_reloc;
+notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
{
unsigned long base = KERNELBASE;
+ kernstart_addr = start;
+ if (is_second_reloc) {
+ virt_phys_offset = PAGE_OFFSET - memstart_addr;
+ return;
+ }
+
/*
* Relocatable kernel support based on processing of dynamic
- * relocation entries.
- * Compute the virt_phys_offset :
+ * relocation entries. Before we get the real memstart_addr,
+ * We will compute the virt_phys_offset like this:
* virt_phys_offset = stext.run - kernstart_addr
*
- * stext.run = (KERNELBASE & ~0x3ffffff) + (kernstart_addr & 0x3ffffff)
+ * stext.run = (KERNELBASE & ~0x3ffffff) +
+ * (kernstart_addr & 0x3ffffff)
* When we relocate, we have :
*
* (kernstart_addr & 0x3ffffff) = (stext.run & 0x3ffffff)
@@ -272,10 +280,31 @@ notrace void __init relocate_init(phys_addr_t start)
* (kernstart_addr & ~0x3ffffff)
*
*/
- kernstart_addr = start;
start &= ~0x3ffffff;
base &= ~0x3ffffff;
virt_phys_offset = base - start;
+ early_get_first_memblock_info(__va(dt_ptr), NULL);
+ /*
+ * We now get the memstart_addr, then we should check if this
+ * address is the same as what the PAGE_OFFSET map to now. If
+ * not we have to change the map of PAGE_OFFSET to memstart_addr
+ * and do a second relocation.
+ */
+ if (start != memstart_addr) {
+ int n, offset = memstart_addr - start;
+
+ is_second_reloc = 1;
+ n = switch_to_as1();
+ /* map a 64M area for the second relocation */
+ if (memstart_addr > start)
+ map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM);
+ else
+ map_mem_in_cams_addr(start, PAGE_OFFSET - offset,
+ 0x4000000, CONFIG_LOWMEM_CAM_NUM);
+ restore_to_as0(n, offset, __va(dt_ptr));
+ /* We should never reach here */
+ panic("Relocation error");
+ }
}
#endif
#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index eefbf7bb4331..91da910210cb 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -149,7 +149,7 @@ extern void MMU_init_hw(void);
extern unsigned long mmu_mapin_ram(unsigned long top);
extern void adjust_total_lowmem(void);
extern int switch_to_as1(void);
-extern void restore_to_as0(int esel);
+extern void restore_to_as0(int esel, int offset, void *dt_ptr);
#endif
extern void loadcam_entry(unsigned int index);
--
1.8.3.1
^ permalink raw reply related
* [PATCH v4 09/10] powerpc/fsl_booke: smp support for booting a relocatable kernel above 64M
From: Kevin Hao @ 2013-12-24 7:12 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1387869132-12650-1-git-send-email-haokexin@gmail.com>
When booting above the 64M for a secondary cpu, we also face the
same issue as the boot cpu that the PAGE_OFFSET map two different
physical address for the init tlb and the final map. So we have to use
switch_to_as1/restore_to_as0 between the conversion of these two
maps. When restoring to as0 for a secondary cpu, we only need to
return to the caller. So add a new parameter for function
restore_to_as0 for this purpose.
Use LOAD_REG_ADDR_PIC to get the address of variables which may
be used before we set the final map in cams for the secondary cpu.
Move the setting of cams a bit earlier in order to avoid the
unnecessary using of LOAD_REG_ADDR_PIC.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
v4: A new patch in v4.
arch/powerpc/kernel/head_fsl_booke.S | 41 ++++++++++++++++++++++++------------
arch/powerpc/mm/fsl_booke_mmu.c | 4 ++--
arch/powerpc/mm/mmu_decl.h | 2 +-
arch/powerpc/mm/tlb_nohash_low.S | 4 +++-
4 files changed, 34 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 71e08dfbd1d1..0e545630c42a 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -216,8 +216,7 @@ set_ivor:
/* Check to see if we're the second processor, and jump
* to the secondary_start code if so
*/
- lis r24, boot_cpuid@h
- ori r24, r24, boot_cpuid@l
+ LOAD_REG_ADDR_PIC(r24, boot_cpuid)
lwz r24, 0(r24)
cmpwi r24, -1
mfspr r24,SPRN_PIR
@@ -1146,24 +1145,36 @@ _GLOBAL(__flush_disable_L1)
/* When we get here, r24 needs to hold the CPU # */
.globl __secondary_start
__secondary_start:
- lis r3,__secondary_hold_acknowledge@h
- ori r3,r3,__secondary_hold_acknowledge@l
- stw r24,0(r3)
-
- li r3,0
- mr r4,r24 /* Why? */
- bl call_setup_cpu
-
- lis r3,tlbcam_index@ha
- lwz r3,tlbcam_index@l(r3)
+ LOAD_REG_ADDR_PIC(r3, tlbcam_index)
+ lwz r3,0(r3)
mtctr r3
li r26,0 /* r26 safe? */
+ bl switch_to_as1
+ mr r27,r3 /* tlb entry */
/* Load each CAM entry */
1: mr r3,r26
bl loadcam_entry
addi r26,r26,1
bdnz 1b
+ mr r3,r27 /* tlb entry */
+ LOAD_REG_ADDR_PIC(r4, memstart_addr)
+ lwz r4,0(r4)
+ mr r5,r25 /* phys kernel start */
+ rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
+ subf r4,r5,r4 /* memstart_addr - phys kernel start */
+ li r5,0 /* no device tree */
+ li r6,0 /* not boot cpu */
+ bl restore_to_as0
+
+
+ lis r3,__secondary_hold_acknowledge@h
+ ori r3,r3,__secondary_hold_acknowledge@l
+ stw r24,0(r3)
+
+ li r3,0
+ mr r4,r24 /* Why? */
+ bl call_setup_cpu
/* get current_thread_info and current */
lis r1,secondary_ti@ha
@@ -1253,6 +1264,7 @@ _GLOBAL(switch_to_as1)
* r3 - the tlb entry which should be invalidated
* r4 - __pa(PAGE_OFFSET in AS0) - __pa(PAGE_OFFSET in AS1)
* r5 - device tree virtual address. If r4 is 0, r5 is ignored.
+ * r6 - boot cpu
*/
_GLOBAL(restore_to_as0)
mflr r0
@@ -1268,6 +1280,7 @@ _GLOBAL(restore_to_as0)
*/
subf r9,r4,r9
subf r5,r4,r5
+ subf r0,r4,r0
2: mfmsr r7
li r8,(MSR_IS | MSR_DS)
@@ -1290,7 +1303,9 @@ _GLOBAL(restore_to_as0)
isync
cmpwi r4,0
- bne 3f
+ cmpwi cr1,r6,0
+ cror eq,4*cr1+eq,eq
+ bne 3f /* offset != 0 && is_boot_cpu */
mtlr r0
blr
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ce0c7d7db6c3..2a81f53d49f1 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -231,7 +231,7 @@ void __init adjust_total_lowmem(void)
i = switch_to_as1();
__max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
- restore_to_as0(i, 0, 0);
+ restore_to_as0(i, 0, 0, 1);
pr_info("Memory CAM mapping: ");
for (i = 0; i < tlbcam_index - 1; i++)
@@ -301,7 +301,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
else
map_mem_in_cams_addr(start, PAGE_OFFSET - offset,
0x4000000, CONFIG_LOWMEM_CAM_NUM);
- restore_to_as0(n, offset, __va(dt_ptr));
+ restore_to_as0(n, offset, __va(dt_ptr), 1);
/* We should never reach here */
panic("Relocation error");
}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 91da910210cb..9615d82919b8 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -149,7 +149,7 @@ extern void MMU_init_hw(void);
extern unsigned long mmu_mapin_ram(unsigned long top);
extern void adjust_total_lowmem(void);
extern int switch_to_as1(void);
-extern void restore_to_as0(int esel, int offset, void *dt_ptr);
+extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
#endif
extern void loadcam_entry(unsigned int index);
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 626ad081639f..43ff3c797fbf 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -402,7 +402,9 @@ _GLOBAL(set_context)
* Load TLBCAM[index] entry in to the L2 CAM MMU
*/
_GLOBAL(loadcam_entry)
- LOAD_REG_ADDR(r4, TLBCAM)
+ mflr r5
+ LOAD_REG_ADDR_PIC(r4, TLBCAM)
+ mtlr r5
mulli r5,r3,TLBCAM_SIZE
add r3,r5,r4
lwz r4,TLBCAM_MAS0(r3)
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox