LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC v10 4/6] dma: of: Add common xlate function for matching by channel id
From: Alexander Popov @ 2014-03-20 14:47 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Andy Shevchenko,
	Alexander Popov, linuxppc-dev, dmaengine
  Cc: devicetree
In-Reply-To: <1395326878-25243-1-git-send-email-a13xp0p0v88@gmail.com>

This patch adds a new common OF dma xlate callback function which will match a
channel by it's id. The binding expects one integer argument which it will use to
lookup the channel by the id.

Unlike of_dma_simple_xlate this function is able to handle a system with
multiple DMA controllers. When registering the of dma provider with
of_dma_controller_register a pointer to the dma_device struct which is
associated with the dt node needs to passed as the data parameter.
New function will use this pointer to match only channels which belong to the
specified DMA controller.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
---
 drivers/dma/of-dma.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/of_dma.h |  4 ++++
 2 files changed, 39 insertions(+)

diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index e8fe9dc..d5fbeaa 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dma_device *dev = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate = NULL;
+
+	if (!dev || dma_spec->args_count != 1)
+		return NULL;
+
+	list_for_each_entry(chan, &dev->channels, device_node)
+		if (chan->chan_id == dma_spec->args[0]) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index ae36298..56bc026 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -41,6 +41,8 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma);
 #else
 static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
@@ -66,6 +68,8 @@ static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_s
 	return NULL;
 }
 
+#define of_dma_xlate_by_chan_id NULL
+
 #endif
 
 #endif /* __LINUX_OF_DMA_H */
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH RFC v10 5/6] dma: mpc512x: add device tree binding document
From: Alexander Popov @ 2014-03-20 14:47 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Andy Shevchenko,
	Alexander Popov, linuxppc-dev, dmaengine
  Cc: devicetree
In-Reply-To: <1395326878-25243-1-git-send-email-a13xp0p0v88@gmail.com>

From: Gerhard Sittig <gsi@denx.de>

introduce a device tree binding document for the MPC512x DMA controller

Signed-off-by: Gerhard Sittig <gsi@denx.de>
[ a13xp0p0v88@gmail.com: turn this into a separate patch ]
---
 .../devicetree/bindings/dma/mpc512x-dma.txt        | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/dma/mpc512x-dma.txt

diff --git a/Documentation/devicetree/bindings/dma/mpc512x-dma.txt b/Documentation/devicetree/bindings/dma/mpc512x-dma.txt
new file mode 100644
index 0000000..a4867d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mpc512x-dma.txt
@@ -0,0 +1,55 @@
+* Freescale MPC512x DMA Controller
+
+The DMA controller in the Freescale MPC512x SoC can move blocks of
+memory contents between memory and peripherals or memory to memory.
+
+Refer to the "Generic DMA Controller and DMA request bindings" description
+in the dma.txt file for a more detailled discussion of the binding.  The
+MPC512x DMA engine binding follows the common scheme, but doesn't provide
+support for the optional channels and requests counters (those values are
+derived from the detected hardware features) and has a fixed client
+specifier length of 1 integer cell (the value is the DMA channel, since
+the DMA controller uses a fixed assignment of request lines per channel).
+
+
+DMA controller node properties:
+
+Required properties:
+- compatible:		should be "fsl,mpc5121-dma"
+- reg:			address and size of the DMA controller's register set
+- interrupts:		interrupt spec for the DMA controller
+
+Optional properties:
+- #dma-cells:		must be <1>, describes the number of integer cells
+			needed to specify the 'dmas' property in client nodes,
+			strongly recommended since common client helper code
+			uses this property
+
+Example:
+
+	dma0: dma@14000 {
+		compatible = "fsl,mpc5121-dma";
+		reg = <0x14000 0x1800>;
+		interrupts = <65 0x8>;
+		#dma-cells = <1>;
+	};
+
+
+Client node properties:
+
+Required properties:
+- dmas:			list of DMA specifiers, consisting each of a handle
+			for the DMA controller and integer cells to specify
+			the channel used within the DMA controller
+- dma-names:		list of identifier strings for the DMA specifiers,
+			client device driver code uses these strings to
+			have DMA channels looked up at the controller
+
+Example:
+
+	sdhc@1500 {
+		compatible = "fsl,mpc5121-sdhc";
+		/* ... */
+		dmas = <&dma0 30>;
+		dma-names = "rx-tx";
+	};
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH RFC v10 6/6] dma: mpc512x: register for device tree channel lookup
From: Alexander Popov @ 2014-03-20 14:47 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Andy Shevchenko,
	Alexander Popov, linuxppc-dev, dmaengine
  Cc: devicetree
In-Reply-To: <1395326878-25243-1-git-send-email-a13xp0p0v88@gmail.com>

Register the controller for device tree based lookup of DMA channels
(non-fatal for backwards compatibility with older device trees) and
provide the '#dma-cells' property in the shared mpc5121.dtsi file

Signed-off-by: Gerhard Sittig <gsi@denx.de>
Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
---
 arch/powerpc/boot/dts/mpc5121.dtsi |  1 +
 drivers/dma/mpc512x_dma.c          | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 2c0e155..7f9d14f 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -498,6 +498,7 @@
 			compatible = "fsl,mpc5121-dma";
 			reg = <0x14000 0x1800>;
 			interrupts = <65 0x8>;
+			#dma-cells = <1>;
 		};
 	};
 
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 1b90b3b..ab70012 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -52,6 +52,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
+#include <linux/of_dma.h>
 #include <linux/of_platform.h>
 
 #include <linux/random.h>
@@ -1031,7 +1032,15 @@ static int mpc_dma_probe(struct platform_device *op)
 	if (retval)
 		goto err_free2;
 
-	return retval;
+	/* Register with OF helpers for DMA lookups (nonfatal) */
+	if (dev->of_node) {
+		retval = of_dma_controller_register(dev->of_node,
+						of_dma_xlate_by_chan_id, mdma);
+		if (retval)
+			dev_warn(dev, "Could not register for OF lookup\n");
+	}
+
+	return 0;
 
 err_free2:
 	if (mdma->is_mpc8308)
@@ -1052,6 +1061,8 @@ static int mpc_dma_remove(struct platform_device *op)
 	struct device *dev = &op->dev;
 	struct mpc_dma *mdma = dev_get_drvdata(dev);
 
+	if (dev->of_node)
+		of_dma_controller_free(dev->of_node);
 	dma_async_device_unregister(&mdma->dma);
 	if (mdma->is_mpc8308) {
 		free_irq(mdma->irq2, mdma);
-- 
1.8.4.2

^ permalink raw reply related

* Re: Tasks stuck in futex code (in 3.14-rc6)
From: Davidlohr Bueso @ 2014-03-20 15:06 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: Peter Zijlstra, linuxppc-dev, LKML, paulus, tglx, Paul McKenney,
	torvalds, mingo
In-Reply-To: <20140320100848.GC10406@linux.vnet.ibm.com>

On Thu, 2014-03-20 at 15:38 +0530, Srikar Dronamraju wrote:
> > This problem suggests that we missed a wakeup for a task that was adding
> > itself to the queue in a wait path. And the only place that can happen
> > is with the hb spinlock check for any pending waiters. Just in case we
> > missed some assumption about checking the hash bucket spinlock as a way
> > of detecting any waiters (powerpc?), could you revert this commit and
> > try the original atomic operations variant:
> > 
> > https://lkml.org/lkml/2013/12/19/630
> 
> I think the above url and the commit id that I reverted i.e
> git://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=b0c29f79ecea0b6fbcefc999
> are the same.
> 
> Or am I missing something? 

No, please take a closer look, it is a different approaches to the same
end.

diff --git a/kernel/futex.c b/kernel/futex.c
index 34ecd9d..35ff697 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -203,6 +203,7 @@ static const struct futex_q futex_q_init = {
  * waiting on a futex.
  */
 struct futex_hash_bucket {
+	atomic_t waiters;
 	spinlock_t lock;
 	struct plist_head chain;
 } ____cacheline_aligned_in_smp;
@@ -211,6 +212,53 @@ static unsigned long __read_mostly futex_hashsize;
 
 static struct futex_hash_bucket *futex_queues;
 
+static inline void futex_get_mm(union futex_key *key)
+{
+	atomic_inc(&key->private.mm->mm_count);
+#ifdef CONFIG_SMP
+	/*
+	 * Ensure futex_get_mm() implies a full barrier such that
+	 * get_futex_key() implies a full barrier. This is relied upon
+	 * as full barrier (B), see the ordering comment above.
+	 */
+	smp_mb__after_atomic_inc();
+#endif
+}
+
+/*
+ * Reflects a new waiter being added to the waitqueue.
+ */
+static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+	atomic_inc(&hb->waiters);
+	/*
+	 * Full barrier (A), see the ordering comment above.
+	 */
+	smp_mb__after_atomic_inc();
+#endif
+}
+
+/*
+ * Reflects a waiter being removed from the waitqueue by wakeup
+ * paths.
+ */
+static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+	atomic_dec(&hb->waiters);
+#endif
+}
+
+static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+	return atomic_read(&hb->waiters);
+#else
+	return 1;
+#endif
+}
+
 /*
  * We hash on the keys returned from get_futex_key (see below).
  */
@@ -245,10 +293,10 @@ static void get_futex_key_refs(union futex_key *key)
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		ihold(key->shared.inode);
+		ihold(key->shared.inode); /* implies MB */
 		break;
 	case FUT_OFF_MMSHARED:
-		atomic_inc(&key->private.mm->mm_count);
+		futex_get_mm(key); /* implies MB */
 		break;
 	}
 }
@@ -322,7 +370,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	if (!fshared) {
 		key->private.mm = mm;
 		key->private.address = address;
-		get_futex_key_refs(key);
+		get_futex_key_refs(key); /* implies MB (B) */
 		return 0;
 	}
 
@@ -429,7 +477,7 @@ again:
 		key->shared.pgoff = basepage_index(page);
 	}
 
-	get_futex_key_refs(key);
+	get_futex_key_refs(key); /* implies MB (B) */
 
 out:
 	unlock_page(page_head);
@@ -893,6 +941,7 @@ static void __unqueue_futex(struct futex_q *q)
 
 	hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
 	plist_del(&q->list, &hb->chain);
+	hb_waiters_dec(hb);
 }
 
 /*
@@ -1052,6 +1101,11 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 		goto out;
 
 	hb = hash_futex(&key);
+
+	/* Make sure we really have tasks to wakeup */
+	if (!hb_waiters_pending(hb))
+		goto out_put_key;
+
 	spin_lock(&hb->lock);
 
 	plist_for_each_entry_safe(this, next, &hb->chain, list) {
@@ -1072,6 +1126,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 	}
 
 	spin_unlock(&hb->lock);
+out_put_key:
 	put_futex_key(&key);
 out:
 	return ret;
@@ -1190,7 +1245,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 	 */
 	if (likely(&hb1->chain != &hb2->chain)) {
 		plist_del(&q->list, &hb1->chain);
+		hb_waiters_dec(hb1);
 		plist_add(&q->list, &hb2->chain);
+		hb_waiters_inc(hb2);
 		q->lock_ptr = &hb2->lock;
 	}
 	get_futex_key_refs(key2);
@@ -1533,6 +1590,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
 	struct futex_hash_bucket *hb;
 
 	hb = hash_futex(&q->key);
+
+	/*
+	 * Increment the counter before taking the lock so that
+	 * a potential waker won't miss a to-be-slept task that is
+	 * waiting for the spinlock. This is safe as all queue_lock()
+	 * users end up calling queue_me(). Similarly, for housekeeping,
+	 * decrement the counter at queue_unlock() when some error has
+	 * occurred and we don't end up adding the task to the list.
+	 */
+	hb_waiters_inc(hb);
+
 	q->lock_ptr = &hb->lock;
 
 	spin_lock(&hb->lock);
@@ -1544,6 +1612,7 @@ queue_unlock(struct futex_hash_bucket *hb)
 	__releases(&hb->lock)
 {
 	spin_unlock(&hb->lock);
+	hb_waiters_dec(hb);
 }
 
 /**
@@ -2275,6 +2344,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
 		 * Unqueue the futex_q and determine which it was.
 		 */
 		plist_del(&q->list, &hb->chain);
+		hb_waiters_dec(hb);
 
 		/* Handle spurious wakeups gracefully */
 		ret = -EWOULDBLOCK;
@@ -2808,6 +2878,7 @@ static int __init futex_init(void)
 		futex_cmpxchg_enabled = 1;
 
 	for (i = 0; i < futex_hashsize; i++) {
+		atomic_set(&futex_queues[i].waiters, 0);
 		plist_head_init(&futex_queues[i].chain);
 		spin_lock_init(&futex_queues[i].lock);
 	}

^ permalink raw reply related

* [PATCH 05/18] powerpc/boot: add PROM_ERROR define in oflib
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This is mostly useful to make to the boot wrapper code closer with
the kernel code in prom_init.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/oflib.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index 0f72b1a42133..96fe4d225abe 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -27,6 +27,8 @@ struct prom_args {
 	__be32 args[10];	/* Input/output arguments. */
 };
 
+#define PROM_ERROR (-1u)
+
 static int (*prom) (void *);
 
 void of_init(void *promptr)
@@ -55,7 +57,7 @@ int of_call_prom(const char *service, int nargs, int nret, ...)
 		args.args[nargs+i] = 0;
 
 	if (prom(&args) < 0)
-		return -1;
+		return PROM_ERROR;
 
 	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
 }
@@ -80,9 +82,9 @@ static int of_call_prom_ret(const char *service, int nargs, int nret,
 		args.args[nargs+i] = 0;
 
 	if (prom(&args) < 0)
-		return -1;
+		return PROM_ERROR;
 
-	if (rets != (void *) 0)
+	if (rets != NULL)
 		for (i = 1; i < nret; ++i)
 			rets[i-1] = be32_to_cpu(args.args[nargs+i]);
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 08/18] powerpc/boot: fix compile warning in 64bit
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

 arch/powerpc/boot/oflib.c:211:9: warning: cast to pointer from integer of \
		  different size [-Wint-to-pointer-cast]
  return (phandle) of_call_prom("finddevice", 1, 1, name);

This is a work around. The definite solution would be to define the
phandle typedef as a u32, as in the kernel, but this would break the
device tree ops API. 

Let it be for the moment.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/oflib.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index 5ab9cb57cd31..e8697df2df27 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -203,7 +203,7 @@ void of_exit(void)
  */
 void *of_finddevice(const char *name)
 {
-	return (phandle) of_call_prom("finddevice", 1, 1, name);
+	return (void *) (unsigned long) of_call_prom("finddevice", 1, 1, name);
 }
 
 int of_getprop(const void *phandle, const char *name, void *buf,
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 00/18] powerpc/boot: 64bit little endian wrapper
From: Cédric Le Goater @ 2014-03-20 15:09 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

Hi,

The following patchset adds support for 64bit little endian boot 
wrapper. It is based on original code from Andrew Tauferner. 

The first patches provide fixes for 64bit support. I also changed 
the prom code to make it converge with the prom_init kernel code. 
They have a lot in common and they could probably be merged.

  powerpc/boot: fix do_div for 64bit wrapper
  powerpc/boot: use a common prom_args struct in oflib
  powerpc/boot: use prom_arg_t in oflib
  powerpc/boot: add byteswapping routines in oflib
  powerpc/boot: add PROM_ERROR define in oflib
  powerpc/boot: rework of_claim() to make it 64bit friendly
  powerpc/boot: define typedef ihandle as u32
  powerpc/boot: fix compile warning in 64bit

These are for little endian only:

  powerpc/boot: define byteswapping routines for little endian
  powerpc/boot: add 64bit and little endian support to addnote
  powerpc/boot: add little endian support to elf utils

and these to support a 64bit boot wrapper in both endian
order :

  powerpc/boot: define a routine to enter prom
  powerpc/boot: modify entry point for 64bit
  powerpc/boot: modify how we enter kernel on 64bit
  powerpc/boot: add a global entry point for pseries
  powerpc/boot: add support for 64bit big endian wrapper
  powerpc/boot: add support for 64bit little endian wrapper

This final patch restores the previous configuration for 
64bit big endian kernel, which is to compile in 32bit : 

  powerpc/boot: add PPC64_BOOT_WRAPPER config option


Here are some topics to discuss :

  - to compile in 64bit, -m64 is added to the cross32 compiler. 
   
  - There are still some compile warnings due to 64bit, in addnote 
    and in the device tree library.

  - the wrapper is compiled as a position independent executable. 
    This might not be an issue.

  - is the addnote executable still in use ? It seems important for 
    RS6000.

This patchset is based on a 3.14-rc7 and was tested on qemu with 
the -kernel option on little and big endian guests and with a custom
yaboot suporting LE. An another round of tests will be required when 
a 64bit grub is available.

Best,

C. 

Cédric Le Goater (18):
  powerpc/boot: fix do_div for 64bit wrapper
  powerpc/boot: use a common prom_args struct in oflib
  powerpc/boot: use prom_arg_t in oflib
  powerpc/boot: add byteswapping routines in oflib
  powerpc/boot: add PROM_ERROR define in oflib
  powerpc/boot: rework of_claim() to make it 64bit friendly
  powerpc/boot: define typedef ihandle as u32
  powerpc/boot: fix compile warning in 64bit
  powerpc/boot: define byteswapping routines for little endian
  powerpc/boot: add 64bit and little endian support to addnote
  powerpc/boot: add little endian support to elf utils
  powerpc/boot: define a routine to enter prom
  powerpc/boot: modify entry point for 64bit
  powerpc/boot: modify how we enter kernel on 64bit
  powerpc/boot: add a global entry point for pseries
  powerpc/boot: add support for 64bit big endian wrapper
  powerpc/boot: add support for 64bit little endian wrapper
  powerpc/boot: add PPC64_BOOT_WRAPPER config option

 arch/powerpc/boot/Makefile             |   18 +++-
 arch/powerpc/boot/addnote.c            |  128 +++++++++++++++--------
 arch/powerpc/boot/crt0.S               |  180 +++++++++++++++++++++++++++++++-
 arch/powerpc/boot/elf_util.c           |    4 +
 arch/powerpc/boot/main.c               |    4 +
 arch/powerpc/boot/of.c                 |    4 +-
 arch/powerpc/boot/of.h                 |   17 ++-
 arch/powerpc/boot/ofconsole.c          |    6 +-
 arch/powerpc/boot/oflib.c              |   94 +++++++++--------
 arch/powerpc/boot/ppc_asm.h            |   12 +++
 arch/powerpc/boot/pseries-head.S       |    8 ++
 arch/powerpc/boot/stdio.c              |   14 +++
 arch/powerpc/boot/swab.h               |   29 +++++
 arch/powerpc/boot/wrapper              |   17 ++-
 arch/powerpc/boot/zImage.lds.S         |   25 ++++-
 arch/powerpc/platforms/Kconfig.cputype |    5 +
 16 files changed, 465 insertions(+), 100 deletions(-)
 create mode 100644 arch/powerpc/boot/pseries-head.S
 create mode 100644 arch/powerpc/boot/swab.h

-- 
1.7.10.4

^ permalink raw reply

* [PATCH 17/18] powerpc/boot: add support for 64bit little endian wrapper
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

Compilation is changed for little endian and entry points between the
wrapper and the kernel are modified to fix endian order with the 
FIXUP_ENDIAN trampoline. 

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/Makefile       |    7 +++++--
 arch/powerpc/boot/crt0.S         |    1 +
 arch/powerpc/boot/ppc_asm.h      |   12 ++++++++++++
 arch/powerpc/boot/pseries-head.S |    3 +++
 arch/powerpc/boot/wrapper        |    1 +
 5 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e0744781ba5b..b7e640028711 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -22,11 +22,14 @@ all: $(obj)/zImage
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-		 -isystem $(shell $(CROSS32CC) -print-file-name=include) \
-		 -mbig-endian
+		 -isystem $(shell $(CROSS32CC) -print-file-name=include)
 ifdef CONFIG_PPC64
 BOOTCFLAGS	+= -m64
 endif
+ifdef CONFIG_CPU_BIG_ENDIAN
+BOOTCFLAGS	+= -mbig-endian
+endif
+
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
 ifdef CONFIG_DEBUG_INFO
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 689290561e69..14de4f8778a7 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -275,6 +275,7 @@ prom:
 	rfid
 
 1:	/* Return from OF */
+	FIXUP_ENDIAN
 
 	/* Restore registers and return. */
 	rldicl  r1,r1,0,32
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index eb0e98be69e0..35ea60c1f070 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -62,4 +62,16 @@
 #define SPRN_TBRL	268
 #define SPRN_TBRU	269
 
+#define FIXUP_ENDIAN						   \
+	tdi   0, 0, 0x48; /* Reverse endian of b . + 8		*/ \
+	b     $+36;	  /* Skip trampoline if endian is good	*/ \
+	.long 0x05009f42; /* bcl 20,31,$+4			*/ \
+	.long 0xa602487d; /* mflr r10				*/ \
+	.long 0x1c004a39; /* addi r10,r10,28			*/ \
+	.long 0xa600607d; /* mfmsr r11				*/ \
+	.long 0x01006b69; /* xori r11,r11,1			*/ \
+	.long 0xa6035a7d; /* mtsrr0 r10				*/ \
+	.long 0xa6037b7d; /* mtsrr1 r11				*/ \
+	.long 0x2400004c  /* rfid				*/
+
 #endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
index 655c3d2c321b..6ef6e02e80f9 100644
--- a/arch/powerpc/boot/pseries-head.S
+++ b/arch/powerpc/boot/pseries-head.S
@@ -1,5 +1,8 @@
+#include "ppc_asm.h"
+
 	.text
 
 	.globl _zimage_start
 _zimage_start:
+	FIXUP_ENDIAN
 	b _zimage_start_lib
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index baaad96eed7f..3270e2a5c901 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -139,6 +139,7 @@ fi
 
 elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
 case "$elfformat" in
+    elf64-powerpcle)	format=elf64lppc	;;
     elf64-powerpc)	format=elf64ppc	;;
     elf32-powerpc)	format=elf32ppc	;;
 esac
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 13/18] powerpc/boot: modify entry point for 64bit
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This patch adds support a 64bit wrapper entry point. As in 32bit, the
entry point does its own relocation and can be loaded at any address
by the firmware.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/crt0.S |  108 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 104 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index dbd99d064828..689290561e69 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -1,17 +1,20 @@
 /*
  * Copyright (C) Paul Mackerras 1997.
  *
+ * Adapted for 64 bit LE PowerPC by Andrew Tauferner
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * NOTE: this code runs in 32 bit mode, is position-independent,
- * and is packaged as ELF32.
  */
 
 #include "ppc_asm.h"
 
+RELA = 7
+RELACOUNT = 0x6ffffff9
+
 	.text
 	/* A procedure descriptor used when booting this as a COFF file.
 	 * When making COFF, this comes first in the link and we're
@@ -21,6 +24,20 @@
 _zimage_start_opd:
 	.long	0x500000, 0, 0, 0
 
+#ifdef __powerpc64__
+.balign 8
+p_start:	.llong	_start
+p_etext:	.llong	_etext
+p_bss_start:	.llong	__bss_start
+p_end:		.llong	_end
+
+p_toc:		.llong	__toc_start + 0x8000 - p_base
+p_dyn:		.llong	__dynamic_start - p_base
+p_rela:		.llong	__rela_dyn_start - p_base
+p_prom:		.llong	0
+	.weak	_platform_stack_top
+p_pstack:	.llong	_platform_stack_top
+#else
 p_start:	.long	_start
 p_etext:	.long	_etext
 p_bss_start:	.long	__bss_start
@@ -28,6 +45,7 @@ p_end:		.long	_end
 
 	.weak	_platform_stack_top
 p_pstack:	.long	_platform_stack_top
+#endif
 
 	.weak	_zimage_start
 	.globl	_zimage_start
@@ -38,6 +56,7 @@ _zimage_start_lib:
 	   and the address where we're running. */
 	bl	.+4
 p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
+#ifndef __powerpc64__
 	/* grab the link address of the dynamic section in r11 */
 	addis	r11,r10,(_GLOBAL_OFFSET_TABLE_-p_base)@ha
 	lwz	r11,(_GLOBAL_OFFSET_TABLE_-p_base)@l(r11)
@@ -51,8 +70,6 @@ p_base:	mflr	r10		/* r10 now points to runtime addr of p_base */
 
 	/* The dynamic section contains a series of tagged entries.
 	 * We need the RELA and RELACOUNT entries. */
-RELA = 7
-RELACOUNT = 0x6ffffff9
 	li	r9,0
 	li	r0,0
 9:	lwz	r8,0(r12)	/* get tag */
@@ -120,7 +137,90 @@ RELACOUNT = 0x6ffffff9
 	li	r0,0
 	stwu	r0,-16(r1)	/* establish a stack frame */
 6:
+#else /* __powerpc64__ */
+	/* Save the prom pointer at p_prom. */
+	std	r5,(p_prom-p_base)(r10)
+
+	/* Set r2 to the TOC. */
+	ld	r2,(p_toc-p_base)(r10)
+	add	r2,r2,r10
+
+	/* Grab the link address of the dynamic section in r11. */
+	ld	r11,-32768(r2)
+	cmpwi	r11,0
+	beq	3f              /* if not linked -pie then no dynamic section */
+
+	ld	r11,(p_dyn-p_base)(r10)
+	add	r11,r11,r10
+	ld	r9,(p_rela-p_base)(r10)
+	add	r9,r9,r10
+
+	li	r7,0
+	li	r8,0
+9:	ld	r6,0(r11)       /* get tag */
+	cmpdi	r6,0
+	beq	12f              /* end of list */
+	cmpdi	r6,RELA
+	bne	10f
+	ld	r7,8(r11)       /* get RELA pointer in r7 */
+	b	11f
+10:	addis	r6,r6,(-RELACOUNT)@ha
+	cmpdi	r6,RELACOUNT@l
+	bne	11f
+	ld	r8,8(r11)       /* get RELACOUNT value in r8 */
+11:	addi	r11,r11,16
+	b	9b
+12:
+	cmpdi	r7,0            /* check we have both RELA and RELACOUNT */
+	cmpdi	cr1,r8,0
+	beq	3f
+	beq	cr1,3f
+
+	/* Calcuate the runtime offset. */
+	subf	r7,r7,r9
 
+	/* Run through the list of relocations and process the
+	 * R_PPC64_RELATIVE ones. */
+	mtctr	r8
+13:	ld	r0,8(r9)        /* ELF64_R_TYPE(reloc->r_info) */
+	cmpdi	r0,22           /* R_PPC64_RELATIVE */
+	bne	3f
+	ld	r6,0(r9)        /* reloc->r_offset */
+	ld	r0,16(r9)       /* reloc->r_addend */
+	add	r0,r0,r7
+	stdx	r0,r7,r6
+	addi	r9,r9,24
+	bdnz	13b
+
+	/* Do a cache flush for our text, in case the loader didn't */
+3:	ld	r9,p_start-p_base(r10)	/* note: these are relocated now */
+	ld	r8,p_etext-p_base(r10)
+4:	dcbf	r0,r9
+	icbi	r0,r9
+	addi	r9,r9,0x20
+	cmpld	cr0,r9,r8
+	blt	4b
+	sync
+	isync
+
+	/* Clear the BSS */
+	ld	r9,p_bss_start-p_base(r10)
+	ld	r8,p_end-p_base(r10)
+	li	r0,0
+5:	std	r0,0(r9)
+	addi	r9,r9,8
+	cmpld	cr0,r9,r8
+	blt	5b
+
+	/* Possibly set up a custom stack */
+	ld	r8,p_pstack-p_base(r10)
+	cmpdi	r8,0
+	beq	6f
+	ld	r1,0(r8)
+	li	r0,0
+	stdu	r0,-16(r1)	/* establish a stack frame */
+6:
+#endif  /* __powerpc64__ */
 	/* Call platform_init() */
 	bl	platform_init
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 02/18] powerpc/boot: use a common prom_args struct in oflib
From: Cédric Le Goater @ 2014-03-20 15:09 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This patch fixes warnings when the wrapper is compiled in 64bit and
updates the boot wrapper code related to prom to converge with the
kernel code in prom_init. This should make the review of changes easier.

The kernel has a different number of possible arguments (10) when
entering prom. There does not seem to be any good reason to have
12 in the wrapper, so the patch changes this value to args[10] in
the prom_args struct.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/of.h    |    2 ++
 arch/powerpc/boot/oflib.c |   29 +++++++++++++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index e4c68f7391c5..5da03d9b9463 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -18,4 +18,6 @@ int of_setprop(const void *phandle, const char *name, const void *buf,
 /* Console functions */
 void of_console_init(void);
 
+typedef u32			__be32;
+
 #endif /* _PPC_BOOT_OF_H_ */
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index b0ec9cf3eaaf..c3288a3446b3 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -16,6 +16,15 @@
 
 #include "of.h"
 
+/* The following structure is used to communicate with open firmware.
+ * All arguments in and out are in big endian format. */
+struct prom_args {
+	__be32 service;	/* Address of service name string. */
+	__be32 nargs;	/* Number of input arguments. */
+	__be32 nret;	/* Number of output arguments. */
+	__be32 args[10];	/* Input/output arguments. */
+};
+
 static int (*prom) (void *);
 
 void of_init(void *promptr)
@@ -23,18 +32,15 @@ void of_init(void *promptr)
 	prom = (int (*)(void *))promptr;
 }
 
+#define ADDR(x)		(u32)(unsigned long)(x)
+
 int of_call_prom(const char *service, int nargs, int nret, ...)
 {
 	int i;
-	struct prom_args {
-		const char *service;
-		int nargs;
-		int nret;
-		unsigned int args[12];
-	} args;
+	struct prom_args args;
 	va_list list;
 
-	args.service = service;
+	args.service = ADDR(service);
 	args.nargs = nargs;
 	args.nret = nret;
 
@@ -56,15 +62,10 @@ static int of_call_prom_ret(const char *service, int nargs, int nret,
 			    unsigned int *rets, ...)
 {
 	int i;
-	struct prom_args {
-		const char *service;
-		int nargs;
-		int nret;
-		unsigned int args[12];
-	} args;
+	struct prom_args args;
 	va_list list;
 
-	args.service = service;
+	args.service = ADDR(service);
 	args.nargs = nargs;
 	args.nret = nret;
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 10/18] powerpc/boot: add 64bit and little endian support to addnote
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

It could certainly be improved using Elf macros and byteswapping
routines, but the initial version of the code is organised to be a
single file program with limited dependencies. yaboot is the same.

Please scream if you want a total rewrite.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---

>From the comment in the header :

    "This is needed for OF on RS/6000s to load an image correctly" 

If so, the data written in the ELF note should be in Big Endian as this 
code is doing ?

Changes since RFC:

 - fixed the note creation which was done with the wrong endianess 
 - increased 'buf' as ELF structures are larger in 64bit

 arch/powerpc/boot/addnote.c |  128 ++++++++++++++++++++++++++++---------------
 1 file changed, 85 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 349b5530d2c4..9d9f6f334d3c 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -6,6 +6,8 @@
  *
  * Copyright 2000 Paul Mackerras.
  *
+ * Adapted for 64 bit little endian images by Andrew Tauferner.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -55,36 +57,61 @@ unsigned int rpanote[N_RPA_DESCR] = {
 
 #define ROUNDUP(len)	(((len) + 3) & ~3)
 
-unsigned char buf[512];
+unsigned char buf[1024];
+#define ELFDATA2LSB     1
+#define ELFDATA2MSB     2
+static int e_data = ELFDATA2MSB;
+#define ELFCLASS32      1
+#define ELFCLASS64      2
+static int e_class = ELFCLASS32;
 
 #define GET_16BE(off)	((buf[off] << 8) + (buf[(off)+1]))
-#define GET_32BE(off)	((GET_16BE(off) << 16) + GET_16BE((off)+2))
-
-#define PUT_16BE(off, v)	(buf[off] = ((v) >> 8) & 0xff, \
-				 buf[(off) + 1] = (v) & 0xff)
-#define PUT_32BE(off, v)	(PUT_16BE((off), (v) >> 16), \
-				 PUT_16BE((off) + 2, (v)))
+#define GET_32BE(off)	((GET_16BE(off) << 16U) + GET_16BE((off)+2U))
+#define GET_64BE(off)	((((unsigned long long)GET_32BE(off)) << 32ULL) + \
+			((unsigned long long)GET_32BE((off)+4ULL)))
+#define PUT_16BE(off, v)(buf[off] = ((v) >> 8) & 0xff, \
+			 buf[(off) + 1] = (v) & 0xff)
+#define PUT_32BE(off, v)(PUT_16BE((off), (v) >> 16L), PUT_16BE((off) + 2, (v)))
+#define PUT_64BE(off, v)((PUT_32BE((off), (v) >> 32L), \
+			  PUT_32BE((off) + 4, (v))))
+
+#define GET_16LE(off)	((buf[off]) + (buf[(off)+1] << 8))
+#define GET_32LE(off)	(GET_16LE(off) + (GET_16LE((off)+2U) << 16U))
+#define GET_64LE(off)	((unsigned long long)GET_32LE(off) + \
+			(((unsigned long long)GET_32LE((off)+4ULL)) << 32ULL))
+#define PUT_16LE(off, v) (buf[off] = (v) & 0xff, \
+			  buf[(off) + 1] = ((v) >> 8) & 0xff)
+#define PUT_32LE(off, v) (PUT_16LE((off), (v)), PUT_16LE((off) + 2, (v) >> 16L))
+#define PUT_64LE(off, v) (PUT_32LE((off), (v)), PUT_32LE((off) + 4, (v) >> 32L))
+
+#define GET_16(off)	(e_data == ELFDATA2MSB ? GET_16BE(off) : GET_16LE(off))
+#define GET_32(off)	(e_data == ELFDATA2MSB ? GET_32BE(off) : GET_32LE(off))
+#define GET_64(off)	(e_data == ELFDATA2MSB ? GET_64BE(off) : GET_64LE(off))
+#define PUT_16(off, v)	(e_data == ELFDATA2MSB ? PUT_16BE(off, v) : \
+			 PUT_16LE(off, v))
+#define PUT_32(off, v)  (e_data == ELFDATA2MSB ? PUT_32BE(off, v) : \
+			 PUT_32LE(off, v))
+#define PUT_64(off, v)  (e_data == ELFDATA2MSB ? PUT_64BE(off, v) : \
+			 PUT_64LE(off, v))
 
 /* Structure of an ELF file */
 #define E_IDENT		0	/* ELF header */
-#define	E_PHOFF		28
-#define E_PHENTSIZE	42
-#define E_PHNUM		44
-#define E_HSIZE		52	/* size of ELF header */
+#define	E_PHOFF		(e_class == ELFCLASS32 ? 28 : 32)
+#define E_PHENTSIZE	(e_class == ELFCLASS32 ? 42 : 54)
+#define E_PHNUM		(e_class == ELFCLASS32 ? 44 : 56)
+#define E_HSIZE		(e_class == ELFCLASS32 ? 52 : 64)
 
 #define EI_MAGIC	0	/* offsets in E_IDENT area */
 #define EI_CLASS	4
 #define EI_DATA		5
 
 #define PH_TYPE		0	/* ELF program header */
-#define PH_OFFSET	4
-#define PH_FILESZ	16
-#define PH_HSIZE	32	/* size of program header */
+#define PH_OFFSET	(e_class == ELFCLASS32 ? 4 : 8)
+#define PH_FILESZ	(e_class == ELFCLASS32 ? 16 : 32)
+#define PH_HSIZE	(e_class == ELFCLASS32 ? 32 : 56)
 
 #define PT_NOTE		4	/* Program header type = note */
 
-#define ELFCLASS32	1
-#define ELFDATA2MSB	2
 
 unsigned char elf_magic[4] = { 0x7f, 'E', 'L', 'F' };
 
@@ -92,8 +119,8 @@ int
 main(int ac, char **av)
 {
 	int fd, n, i;
-	int ph, ps, np;
-	int nnote, nnote2, ns;
+	unsigned long ph, ps, np;
+	long nnote, nnote2, ns;
 
 	if (ac != 2) {
 		fprintf(stderr, "Usage: %s elf-file\n", av[0]);
@@ -114,26 +141,27 @@ main(int ac, char **av)
 		exit(1);
 	}
 
-	if (n < E_HSIZE || memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+	if (memcmp(&buf[E_IDENT+EI_MAGIC], elf_magic, 4) != 0)
+		goto notelf;
+	e_class = buf[E_IDENT+EI_CLASS];
+	if (e_class != ELFCLASS32 && e_class != ELFCLASS64)
+		goto notelf;
+	e_data = buf[E_IDENT+EI_DATA];
+	if (e_data != ELFDATA2MSB && e_data != ELFDATA2LSB)
+		goto notelf;
+	if (n < E_HSIZE)
 		goto notelf;
 
-	if (buf[E_IDENT+EI_CLASS] != ELFCLASS32
-	    || buf[E_IDENT+EI_DATA] != ELFDATA2MSB) {
-		fprintf(stderr, "%s is not a big-endian 32-bit ELF image\n",
-			av[1]);
-		exit(1);
-	}
-
-	ph = GET_32BE(E_PHOFF);
-	ps = GET_16BE(E_PHENTSIZE);
-	np = GET_16BE(E_PHNUM);
+	ph = (e_class == ELFCLASS32 ? GET_32(E_PHOFF) : GET_64(E_PHOFF));
+	ps = GET_16(E_PHENTSIZE);
+	np = GET_16(E_PHNUM);
 	if (ph < E_HSIZE || ps < PH_HSIZE || np < 1)
 		goto notelf;
 	if (ph + (np + 2) * ps + nnote + nnote2 > n)
 		goto nospace;
 
 	for (i = 0; i < np; ++i) {
-		if (GET_32BE(ph + PH_TYPE) == PT_NOTE) {
+		if (GET_32(ph + PH_TYPE) == PT_NOTE) {
 			fprintf(stderr, "%s already has a note entry\n",
 				av[1]);
 			exit(0);
@@ -148,15 +176,22 @@ main(int ac, char **av)
 
 	/* fill in the program header entry */
 	ns = ph + 2 * ps;
-	PUT_32BE(ph + PH_TYPE, PT_NOTE);
-	PUT_32BE(ph + PH_OFFSET, ns);
-	PUT_32BE(ph + PH_FILESZ, nnote);
+	PUT_32(ph + PH_TYPE, PT_NOTE);
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_OFFSET, ns);
+	else
+		PUT_64(ph + PH_OFFSET, ns);
+
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_FILESZ, nnote);
+	else
+		PUT_64(ph + PH_FILESZ, nnote);
 
 	/* fill in the note area we point to */
 	/* XXX we should probably make this a proper section */
-	PUT_32BE(ns, strlen(arch) + 1);
-	PUT_32BE(ns + 4, N_DESCR * 4);
-	PUT_32BE(ns + 8, 0x1275);
+	PUT_32(ns, strlen(arch) + 1);
+	PUT_32(ns + 4, N_DESCR * 4);
+	PUT_32(ns + 8, 0x1275);
 	strcpy((char *) &buf[ns + 12], arch);
 	ns += 12 + strlen(arch) + 1;
 	for (i = 0; i < N_DESCR; ++i, ns += 4)
@@ -164,21 +199,28 @@ main(int ac, char **av)
 
 	/* fill in the second program header entry and the RPA note area */
 	ph += ps;
-	PUT_32BE(ph + PH_TYPE, PT_NOTE);
-	PUT_32BE(ph + PH_OFFSET, ns);
-	PUT_32BE(ph + PH_FILESZ, nnote2);
+	PUT_32(ph + PH_TYPE, PT_NOTE);
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_OFFSET, ns);
+	else
+		PUT_64(ph + PH_OFFSET, ns);
+
+	if (e_class == ELFCLASS32)
+		PUT_32(ph + PH_FILESZ, nnote);
+	else
+		PUT_64(ph + PH_FILESZ, nnote2);
 
 	/* fill in the note area we point to */
-	PUT_32BE(ns, strlen(rpaname) + 1);
-	PUT_32BE(ns + 4, sizeof(rpanote));
-	PUT_32BE(ns + 8, 0x12759999);
+	PUT_32(ns, strlen(rpaname) + 1);
+	PUT_32(ns + 4, sizeof(rpanote));
+	PUT_32(ns + 8, 0x12759999);
 	strcpy((char *) &buf[ns + 12], rpaname);
 	ns += 12 + ROUNDUP(strlen(rpaname) + 1);
 	for (i = 0; i < N_RPA_DESCR; ++i, ns += 4)
 		PUT_32BE(ns, rpanote[i]);
 
 	/* Update the number of program headers */
-	PUT_16BE(E_PHNUM, np + 2);
+	PUT_16(E_PHNUM, np + 2);
 
 	/* write back */
 	lseek(fd, (long) 0, SEEK_SET);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 03/18] powerpc/boot: use prom_arg_t in oflib
From: Cédric Le Goater @ 2014-03-20 15:09 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This patch updates the wrapper code to converge with the kernel code in
prom_init.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/oflib.c |   10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index c3288a3446b3..3b0c9458504f 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -16,6 +16,8 @@
 
 #include "of.h"
 
+typedef u32 prom_arg_t;
+
 /* The following structure is used to communicate with open firmware.
  * All arguments in and out are in big endian format. */
 struct prom_args {
@@ -46,7 +48,7 @@ int of_call_prom(const char *service, int nargs, int nret, ...)
 
 	va_start(list, nret);
 	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, unsigned int);
+		args.args[i] = va_arg(list, prom_arg_t);
 	va_end(list);
 
 	for (i = 0; i < nret; i++)
@@ -59,7 +61,7 @@ int of_call_prom(const char *service, int nargs, int nret, ...)
 }
 
 static int of_call_prom_ret(const char *service, int nargs, int nret,
-			    unsigned int *rets, ...)
+			    prom_arg_t *rets, ...)
 {
 	int i;
 	struct prom_args args;
@@ -71,7 +73,7 @@ static int of_call_prom_ret(const char *service, int nargs, int nret,
 
 	va_start(list, rets);
 	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, unsigned int);
+		args.args[i] = va_arg(list, prom_arg_t);
 	va_end(list);
 
 	for (i = 0; i < nret; i++)
@@ -148,7 +150,7 @@ static int check_of_version(void)
 void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
 {
 	int ret;
-	unsigned int result;
+	prom_arg_t result;
 
 	if (need_map < 0)
 		need_map = check_of_version();
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 06/18] powerpc/boot: rework of_claim() to make it 64bit friendly
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This patch fixes 64bit compile warnings and updates the wrapper code
to converge the kernel code in prom_init.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/of.c    |    4 ++--
 arch/powerpc/boot/of.h    |    3 ++-
 arch/powerpc/boot/oflib.c |   15 ++++++++-------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
index 62e2f43ec1df..6836a2b2112a 100644
--- a/arch/powerpc/boot/of.c
+++ b/arch/powerpc/boot/of.c
@@ -40,8 +40,8 @@ static void *of_try_claim(unsigned long size)
 #ifdef DEBUG
 		printf("    trying: 0x%08lx\n\r", claim_base);
 #endif
-		addr = (unsigned long)of_claim(claim_base, size, 0);
-		if ((void *)addr != (void *)-1)
+		addr = (unsigned long) of_claim(claim_base, size, 0);
+		if (addr != -1)
 			break;
 	}
 	if (addr == 0)
diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index 40d95bf7402b..3f35cf0ec432 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -6,7 +6,8 @@ typedef void *ihandle;
 
 void of_init(void *promptr);
 int of_call_prom(const char *service, int nargs, int nret, ...);
-void *of_claim(unsigned long virt, unsigned long size, unsigned long align);
+unsigned int of_claim(unsigned long virt, unsigned long size,
+	unsigned long align);
 void *of_vmlinux_alloc(unsigned long size);
 void of_exit(void);
 void *of_finddevice(const char *name);
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index 96fe4d225abe..cb6a5de0c257 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -149,7 +149,8 @@ static int check_of_version(void)
 	return 1;
 }
 
-void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
+unsigned int of_claim(unsigned long virt, unsigned long size,
+		      unsigned long align)
 {
 	int ret;
 	prom_arg_t result;
@@ -157,32 +158,32 @@ void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
 	if (need_map < 0)
 		need_map = check_of_version();
 	if (align || !need_map)
-		return (void *) of_call_prom("claim", 3, 1, virt, size, align);
+		return of_call_prom("claim", 3, 1, virt, size, align);
 
 	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory,
 			       align, size, virt);
 	if (ret != 0 || result == -1)
-		return (void *) -1;
+		return  -1;
 	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
 			       align, size, virt);
 	/* 0x12 == coherent + read/write */
 	ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu,
 			   0x12, size, virt, virt);
-	return (void *) virt;
+	return virt;
 }
 
 void *of_vmlinux_alloc(unsigned long size)
 {
 	unsigned long start = (unsigned long)_start, end = (unsigned long)_end;
-	void *addr;
+	unsigned long addr;
 	void *p;
 
 	/* With some older POWER4 firmware we need to claim the area the kernel
 	 * will reside in.  Newer firmwares don't need this so we just ignore
 	 * the return value.
 	 */
-	addr = of_claim(start, end - start, 0);
-	printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %p\r\n",
+	addr = (unsigned long) of_claim(start, end - start, 0);
+	printf("Trying to claim from 0x%lx to 0x%lx (0x%lx) got %lx\r\n",
 	       start, end, end - start, addr);
 
 	p = malloc(size);
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 12/18] powerpc/boot: define a routine to enter prom
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This patch defines a 'prom' routine similar to 'enter_prom' in the
kernel.

The difference is in the MSR which is built before entering prom. Big
endian order is enforced as in the kernel but 32bit mode is not. It
prepares ground for the next patches which will introduce Little endian
order.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/crt0.S  |   71 +++++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/boot/oflib.c |    6 ++++
 2 files changed, 77 insertions(+)

diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 0f7428a37efb..dbd99d064828 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -126,3 +126,74 @@ RELACOUNT = 0x6ffffff9
 
 	/* Call start */
 	b	start
+
+#ifdef __powerpc64__
+
+#define PROM_FRAME_SIZE 512
+#define SAVE_GPR(n, base)       std     n,8*(n)(base)
+#define REST_GPR(n, base)       ld      n,8*(n)(base)
+#define SAVE_2GPRS(n, base)     SAVE_GPR(n, base); SAVE_GPR(n+1, base)
+#define SAVE_4GPRS(n, base)     SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
+#define SAVE_8GPRS(n, base)     SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
+#define SAVE_10GPRS(n, base)    SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
+#define REST_2GPRS(n, base)     REST_GPR(n, base); REST_GPR(n+1, base)
+#define REST_4GPRS(n, base)     REST_2GPRS(n, base); REST_2GPRS(n+2, base)
+#define REST_8GPRS(n, base)     REST_4GPRS(n, base); REST_4GPRS(n+4, base)
+#define REST_10GPRS(n, base)    REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+
+/* prom handles the jump into and return from firmware.  The prom args pointer
+   is loaded in r3. */
+.globl prom
+prom:
+	mflr	r0
+	std	r0,16(r1)
+	stdu	r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+	SAVE_GPR(2, r1)
+	SAVE_GPR(13, r1)
+	SAVE_8GPRS(14, r1)
+	SAVE_10GPRS(22, r1)
+	mfcr    r10
+	std     r10,8*32(r1)
+	mfmsr   r10
+	std     r10,8*33(r1)
+
+	/* remove MSR_LE from msr but keep MSR_SF */
+	mfmsr	r10
+	rldicr	r10,r10,0,62
+	mtsrr1	r10
+
+	/* Load FW address, set LR to label 1, and jump to FW */
+	bl	0f
+0:	mflr	r10
+	addi	r11,r10,(1f-0b)
+	mtlr	r11
+
+	ld	r10,(p_prom-0b)(r10)
+	mtsrr0	r10
+
+	rfid
+
+1:	/* Return from OF */
+
+	/* Restore registers and return. */
+	rldicl  r1,r1,0,32
+
+	/* Restore the MSR (back to 64 bits) */
+	ld      r10,8*(33)(r1)
+	mtmsr	r10
+	isync
+
+	/* Restore other registers */
+	REST_GPR(2, r1)
+	REST_GPR(13, r1)
+	REST_8GPRS(14, r1)
+	REST_10GPRS(22, r1)
+	ld      r10,8*32(r1)
+	mtcr	r10
+
+	addi    r1,r1,PROM_FRAME_SIZE
+	ld      r0,16(r1)
+	mtlr    r0
+	blr
+#endif
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index e8697df2df27..488548a607c2 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -29,11 +29,17 @@ struct prom_args {
 
 #define PROM_ERROR (-1u)
 
+#ifdef __powerpc64__
+extern int prom(void *);
+#else
 static int (*prom) (void *);
+#endif
 
 void of_init(void *promptr)
 {
+#ifndef __powerpc64__
 	prom = (int (*)(void *))promptr;
+#endif
 }
 
 #define ADDR(x)		(u32)(unsigned long)(x)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 16/18] powerpc/boot: add support for 64bit big endian wrapper
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

The boot wrapper is now compiled using -m64 for 64bit kernels.

The linker script is generated using the kernel preprocessor flags
to make use of the CONFIG_PPC64 definitions and the wrapper script is
modified to take into account the new elf64ppc format.

zImage is compiled as a position independent executable (-pie) which
makes it loadable at any address by the firmware. This is subject to
comment as I am not sure what are the requirement for the different
boot loaders.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/Makefile     |    9 ++++++++-
 arch/powerpc/boot/wrapper      |   14 +++++++++++++-
 arch/powerpc/boot/zImage.lds.S |   25 ++++++++++++++++++++++++-
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 4db1c47bfe12..e0744781ba5b 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -24,6 +24,9 @@ BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
 		 -isystem $(shell $(CROSS32CC) -print-file-name=include) \
 		 -mbig-endian
+ifdef CONFIG_PPC64
+BOOTCFLAGS	+= -m64
+endif
 BOOTAFLAGS	:= -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
 
 ifdef CONFIG_DEBUG_INFO
@@ -139,7 +142,11 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc
 $(obj)/empty.c:
 	@touch $@
 
-$(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds: $(obj)/%: $(srctree)/$(src)/%.S
+$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S
+	$(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \
+		-D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
+
+$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
 	@cp $< $@
 
 clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 5889c440a66a..baaad96eed7f 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -40,6 +40,7 @@ cacheit=
 binary=
 gzip=.gz
 pie=
+format=
 
 # cross-compilation prefix
 CROSS=
@@ -136,6 +137,13 @@ if [ -z "$kernel" ]; then
     kernel=vmlinux
 fi
 
+elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+case "$elfformat" in
+    elf64-powerpc)	format=elf64ppc	;;
+    elf32-powerpc)	format=elf32ppc	;;
+esac
+
+
 platformo=$object/"$platform".o
 lds=$object/zImage.lds
 ext=strip
@@ -154,6 +162,10 @@ of)
 pseries)
     platformo="$object/pseries-head.o $object/of.o $object/epapr.o"
     link_address='0x4000000'
+    if [ "$format" != "elf32ppc" ]; then
+	link_address=
+	pie=-pie
+    fi
     make_space=n
     ;;
 maple)
@@ -379,7 +391,7 @@ if [ "$platform" != "miboot" ]; then
     if [ -n "$link_address" ] ; then
         text_start="-Ttext $link_address"
     fi
-    ${CROSS}ld -m elf32ppc -T $lds $text_start $pie -o "$ofile" \
+    ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 2bd8731f1365..afecab0aff5c 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -1,4 +1,10 @@
+#include <asm-generic/vmlinux.lds.h>
+
+#ifdef CONFIG_PPC64
+OUTPUT_ARCH(powerpc:common64)
+#else
 OUTPUT_ARCH(powerpc:common)
+#endif
 ENTRY(_zimage_start)
 EXTERN(_zimage_start)
 SECTIONS
@@ -16,7 +22,9 @@ SECTIONS
     *(.rodata*)
     *(.data*)
     *(.sdata*)
+#ifdef CONFIG_PPC32
     *(.got2)
+#endif
   }
   .dynsym : { *(.dynsym) }
   .dynstr : { *(.dynstr) }
@@ -27,7 +35,13 @@ SECTIONS
   }
   .hash : { *(.hash) }
   .interp : { *(.interp) }
-  .rela.dyn : { *(.rela*) }
+  .rela.dyn :
+  {
+#ifdef CONFIG_PPC64
+    __rela_dyn_start = .;
+#endif
+    *(.rela*)
+  }
 
   . = ALIGN(8);
   .kernel:dtb :
@@ -53,6 +67,15 @@ SECTIONS
     _initrd_end =  .;
   }
 
+#ifdef CONFIG_PPC64
+  .got :
+  {
+    __toc_start = .;
+    *(.got)
+    *(.toc)
+  }
+#endif
+
   . = ALIGN(4096);
   .bss       :
   {
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 14/18] powerpc/boot: modify how we enter kernel on 64bit
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/main.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index a28f02165e97..46a7464e13c2 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -205,7 +205,11 @@ void start(void)
 	if (console_ops.close)
 		console_ops.close();
 
+#ifdef __powerpc64__
+	kentry = (kernel_entry_t) &vmlinux.addr;
+#else
 	kentry = (kernel_entry_t) vmlinux.addr;
+#endif
 	if (ft_addr)
 		kentry(ft_addr, 0, NULL);
 	else
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 15/18] powerpc/boot: add a global entry point for pseries
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

When entering the boot wrapper in little endian, we will need to fix
the endian order using a fixup trampoline like in the kernel. This
patch overrides the _zimage_start entry point for this purpose.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/Makefile       |    2 ++
 arch/powerpc/boot/pseries-head.S |    5 +++++
 arch/powerpc/boot/wrapper        |    2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/boot/pseries-head.S

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 90e9d9548660..4db1c47bfe12 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -99,6 +99,8 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
 src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
 src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
 src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
+src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
+
 
 src-wlib := $(sort $(src-wlib-y))
 src-plat := $(sort $(src-plat-y))
diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
new file mode 100644
index 000000000000..655c3d2c321b
--- /dev/null
+++ b/arch/powerpc/boot/pseries-head.S
@@ -0,0 +1,5 @@
+	.text
+
+	.globl _zimage_start
+_zimage_start:
+	b _zimage_start_lib
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index d27a25518b01..5889c440a66a 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -152,7 +152,7 @@ of)
     make_space=n
     ;;
 pseries)
-    platformo="$object/of.o $object/epapr.o"
+    platformo="$object/pseries-head.o $object/of.o $object/epapr.o"
     link_address='0x4000000'
     make_space=n
     ;;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 07/18] powerpc/boot: define typedef ihandle as u32
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

This makes ihandle 64bit friendly.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/of.h    |    2 +-
 arch/powerpc/boot/oflib.c |   10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index 3f35cf0ec432..bf228fea3517 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -2,7 +2,7 @@
 #define _PPC_BOOT_OF_H_
 
 typedef void *phandle;
-typedef void *ihandle;
+typedef u32 ihandle;
 
 void of_init(void *promptr);
 int of_call_prom(const char *service, int nargs, int nret, ...);
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index cb6a5de0c257..5ab9cb57cd31 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -108,7 +108,7 @@ static int string_match(const char *s1, const char *s2)
  */
 static int need_map = -1;
 static ihandle chosen_mmu;
-static phandle memory;
+static ihandle memory;
 
 static int check_of_version(void)
 {
@@ -137,10 +137,10 @@ static int check_of_version(void)
 		printf("no mmu\n");
 		return 0;
 	}
-	memory = (ihandle) of_call_prom("open", 1, 1, "/memory");
-	if (memory == (ihandle) -1) {
-		memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0");
-		if (memory == (ihandle) -1) {
+	memory = of_call_prom("open", 1, 1, "/memory");
+	if (memory == PROM_ERROR) {
+		memory = of_call_prom("open", 1, 1, "/memory@0");
+		if (memory == PROM_ERROR) {
 			printf("no memory node\n");
 			return 0;
 		}
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 09/18] powerpc/boot: define byteswapping routines for little endian
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

These are not the most efficient versions of swab but the wrapper does
not do much byte swapping. On a big endian cpu, these routines are
a no-op.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/of.h   |    7 +++++++
 arch/powerpc/boot/swab.h |   29 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 arch/powerpc/boot/swab.h

diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index bf228fea3517..fc71848f6962 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -1,6 +1,8 @@
 #ifndef _PPC_BOOT_OF_H_
 #define _PPC_BOOT_OF_H_
 
+#include "swab.h"
+
 typedef void *phandle;
 typedef u32 ihandle;
 
@@ -21,7 +23,12 @@ void of_console_init(void);
 
 typedef u32			__be32;
 
+#ifdef __LITTLE_ENDIAN__
+#define cpu_to_be32(x) swab32(x)
+#define be32_to_cpu(x) swab32(x)
+#else
 #define cpu_to_be32(x) (x)
 #define be32_to_cpu(x) (x)
+#endif
 
 #endif /* _PPC_BOOT_OF_H_ */
diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h
new file mode 100644
index 000000000000..d0e1431084ca
--- /dev/null
+++ b/arch/powerpc/boot/swab.h
@@ -0,0 +1,29 @@
+#ifndef _PPC_BOOT_SWAB_H_
+#define _PPC_BOOT_SWAB_H_
+
+static inline u16 swab16(u16 x)
+{
+	return  ((x & (u16)0x00ffU) << 8) |
+		((x & (u16)0xff00U) >> 8);
+}
+
+static inline u32 swab32(u32 x)
+{
+	return  ((x & (u32)0x000000ffUL) << 24) |
+		((x & (u32)0x0000ff00UL) <<  8) |
+		((x & (u32)0x00ff0000UL) >>  8) |
+		((x & (u32)0xff000000UL) >> 24);
+}
+
+static inline u64 swab64(u64 x)
+{
+	return  (u64)((x & (u64)0x00000000000000ffULL) << 56) |
+		(u64)((x & (u64)0x000000000000ff00ULL) << 40) |
+		(u64)((x & (u64)0x0000000000ff0000ULL) << 24) |
+		(u64)((x & (u64)0x00000000ff000000ULL) <<  8) |
+		(u64)((x & (u64)0x000000ff00000000ULL) >>  8) |
+		(u64)((x & (u64)0x0000ff0000000000ULL) >> 24) |
+		(u64)((x & (u64)0x00ff000000000000ULL) >> 40) |
+		(u64)((x & (u64)0xff00000000000000ULL) >> 56);
+}
+#endif /* _PPC_BOOT_SWAB_H_ */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 01/18] powerpc/boot: fix do_div for 64bit wrapper
From: Cédric Le Goater @ 2014-03-20 15:09 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

When the boot wrapper is compiled in 64bit, there is no need to
use __div64_32.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/stdio.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
index 5b57800bbc67..a701261b1781 100644
--- a/arch/powerpc/boot/stdio.c
+++ b/arch/powerpc/boot/stdio.c
@@ -21,6 +21,18 @@ size_t strnlen(const char * s, size_t count)
 	return sc - s;
 }
 
+#ifdef __powerpc64__
+
+# define do_div(n, base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	__rem = ((unsigned long long)(n)) % __base;			\
+	(n) = ((unsigned long long)(n)) / __base;			\
+	__rem;								\
+})
+
+#else
+
 extern unsigned int __div64_32(unsigned long long *dividend,
 			       unsigned int divisor);
 
@@ -39,6 +51,8 @@ extern unsigned int __div64_32(unsigned long long *dividend,
 	__rem;								\
  })
 
+#endif /* __powerpc64__ */
+
 static int skip_atoi(const char **s)
 {
 	int i, c;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 04/18] powerpc/boot: add byteswapping routines in oflib
From: Cédric Le Goater @ 2014-03-20 15:09 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

Values will need to be byte-swapped when calling prom (big endian) from
a little endian boot wrapper.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/of.h        |    3 +++
 arch/powerpc/boot/ofconsole.c |    6 ++++--
 arch/powerpc/boot/oflib.c     |   22 +++++++++++-----------
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index 5da03d9b9463..40d95bf7402b 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -20,4 +20,7 @@ void of_console_init(void);
 
 typedef u32			__be32;
 
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+
 #endif /* _PPC_BOOT_OF_H_ */
diff --git a/arch/powerpc/boot/ofconsole.c b/arch/powerpc/boot/ofconsole.c
index ce0e02424453..8b754702460a 100644
--- a/arch/powerpc/boot/ofconsole.c
+++ b/arch/powerpc/boot/ofconsole.c
@@ -18,7 +18,7 @@
 
 #include "of.h"
 
-static void *of_stdout_handle;
+static unsigned int of_stdout_handle;
 
 static int of_console_open(void)
 {
@@ -27,8 +27,10 @@ static int of_console_open(void)
 	if (((devp = of_finddevice("/chosen")) != NULL)
 	    && (of_getprop(devp, "stdout", &of_stdout_handle,
 			   sizeof(of_stdout_handle))
-		== sizeof(of_stdout_handle)))
+		== sizeof(of_stdout_handle))) {
+		of_stdout_handle = be32_to_cpu(of_stdout_handle);
 		return 0;
+	}
 
 	return -1;
 }
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index 3b0c9458504f..0f72b1a42133 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -42,13 +42,13 @@ int of_call_prom(const char *service, int nargs, int nret, ...)
 	struct prom_args args;
 	va_list list;
 
-	args.service = ADDR(service);
-	args.nargs = nargs;
-	args.nret = nret;
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
 
 	va_start(list, nret);
 	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, prom_arg_t);
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
 	va_end(list);
 
 	for (i = 0; i < nret; i++)
@@ -57,7 +57,7 @@ int of_call_prom(const char *service, int nargs, int nret, ...)
 	if (prom(&args) < 0)
 		return -1;
 
-	return (nret > 0)? args.args[nargs]: 0;
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
 }
 
 static int of_call_prom_ret(const char *service, int nargs, int nret,
@@ -67,13 +67,13 @@ static int of_call_prom_ret(const char *service, int nargs, int nret,
 	struct prom_args args;
 	va_list list;
 
-	args.service = ADDR(service);
-	args.nargs = nargs;
-	args.nret = nret;
+	args.service = cpu_to_be32(ADDR(service));
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nret);
 
 	va_start(list, rets);
 	for (i = 0; i < nargs; i++)
-		args.args[i] = va_arg(list, prom_arg_t);
+		args.args[i] = cpu_to_be32(va_arg(list, prom_arg_t));
 	va_end(list);
 
 	for (i = 0; i < nret; i++)
@@ -84,9 +84,9 @@ static int of_call_prom_ret(const char *service, int nargs, int nret,
 
 	if (rets != (void *) 0)
 		for (i = 1; i < nret; ++i)
-			rets[i-1] = args.args[nargs+i];
+			rets[i-1] = be32_to_cpu(args.args[nargs+i]);
 
-	return (nret > 0)? args.args[nargs]: 0;
+	return (nret > 0) ? be32_to_cpu(args.args[nargs]) : 0;
 }
 
 /* returns true if s2 is a prefix of s1 */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 11/18] powerpc/boot: add little endian support to elf utils
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/elf_util.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
index 1567a0c0f05c..316552dea4d8 100644
--- a/arch/powerpc/boot/elf_util.c
+++ b/arch/powerpc/boot/elf_util.c
@@ -26,7 +26,11 @@ int parse_elf64(void *hdr, struct elf_info *info)
 	      elf64->e_ident[EI_MAG2]  == ELFMAG2	&&
 	      elf64->e_ident[EI_MAG3]  == ELFMAG3	&&
 	      elf64->e_ident[EI_CLASS] == ELFCLASS64	&&
+#ifdef __LITTLE_ENDIAN__
+	      elf64->e_ident[EI_DATA]  == ELFDATA2LSB	&&
+#else
 	      elf64->e_ident[EI_DATA]  == ELFDATA2MSB	&&
+#endif
 	      (elf64->e_type            == ET_EXEC ||
 	       elf64->e_type            == ET_DYN)	&&
 	      elf64->e_machine         == EM_PPC64))
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 18/18] powerpc/boot: add PPC64_BOOT_WRAPPER config option
From: Cédric Le Goater @ 2014-03-20 15:10 UTC (permalink / raw)
  To: benh; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1391788771-16405-1-git-send-email-clg@fr.ibm.com>

The previous patch broke compatibility for 64bit big endian kernel.

This patch adds a config option to compile the boot wrapper in 64bit
only when CPU_LITTLE_ENDIAN is selected. It restores 32bit compilation
and linking for the big endian kernel.

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
 arch/powerpc/boot/Makefile             |    2 +-
 arch/powerpc/boot/wrapper              |    2 +-
 arch/powerpc/boot/zImage.lds.S         |    8 ++++----
 arch/powerpc/platforms/Kconfig.cputype |    5 +++++
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index b7e640028711..35189540b699 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -23,7 +23,7 @@ BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -Os -msoft-float -pipe \
 		 -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
 		 -isystem $(shell $(CROSS32CC) -print-file-name=include)
-ifdef CONFIG_PPC64
+ifdef CONFIG_PPC64_BOOT_WRAPPER
 BOOTCFLAGS	+= -m64
 endif
 ifdef CONFIG_CPU_BIG_ENDIAN
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 3270e2a5c901..1948cf8b8a40 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -140,7 +140,7 @@ fi
 elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
 case "$elfformat" in
     elf64-powerpcle)	format=elf64lppc	;;
-    elf64-powerpc)	format=elf64ppc	;;
+    elf64-powerpc)	format=elf32ppc	;;
     elf32-powerpc)	format=elf32ppc	;;
 esac
 
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index afecab0aff5c..861e72109df2 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -1,6 +1,6 @@
 #include <asm-generic/vmlinux.lds.h>
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
 OUTPUT_ARCH(powerpc:common64)
 #else
 OUTPUT_ARCH(powerpc:common)
@@ -22,7 +22,7 @@ SECTIONS
     *(.rodata*)
     *(.data*)
     *(.sdata*)
-#ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC64_BOOT_WRAPPER
     *(.got2)
 #endif
   }
@@ -37,7 +37,7 @@ SECTIONS
   .interp : { *(.interp) }
   .rela.dyn :
   {
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
     __rela_dyn_start = .;
 #endif
     *(.rela*)
@@ -67,7 +67,7 @@ SECTIONS
     _initrd_end =  .;
   }
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
   .got :
   {
     __toc_start = .;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 434fda39bf8b..a4aadb091a1e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -421,6 +421,7 @@ config CPU_BIG_ENDIAN
 
 config CPU_LITTLE_ENDIAN
 	bool "Build little endian kernel"
+	select PPC64_BOOT_WRAPPER
 	help
 	  Build a little endian kernel.
 
@@ -429,3 +430,7 @@ config CPU_LITTLE_ENDIAN
 	  little endian powerpc.
 
 endchoice
+
+config PPC64_BOOT_WRAPPER
+	def_bool n
+	depends on CPU_LITTLE_ENDIAN
-- 
1.7.10.4

^ permalink raw reply related

* Re: Tasks stuck in futex code (in 3.14-rc6)
From: Davidlohr Bueso @ 2014-03-20 16:31 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: Peter Zijlstra, torvalds, LKML, paulus, tglx, Paul McKenney,
	linuxppc-dev, mingo
In-Reply-To: <1395295019.2612.11.camel@buesod1.americas.hpqcorp.net>

On Wed, 2014-03-19 at 22:56 -0700, Davidlohr Bueso wrote:
> On Thu, 2014-03-20 at 11:03 +0530, Srikar Dronamraju wrote:
> > > > Joy,.. let me look at that with ppc in mind.
> > > 
> > > OK; so while pretty much all the comments from that patch are utter
> > > nonsense (what was I thinking), I cannot actually find a real bug.
> > > 
> > > But could you try the below which replaces a control dependency with a
> > > full barrier. The control flow is plenty convoluted that I think the
> > > control barrier isn't actually valid anymore and that might indeed
> > > explain the fail.
> > > 
> > 
> > Unfortunately the patch didnt help. Still seeing tasks stuck
> > 
> > # ps -Ao pid,tt,user,fname,tmout,f,wchan | grep futex
> > 14680 pts/0    root     java         - 0 futex_wait_queue_me
> > 14797 pts/0    root     java         - 0 futex_wait_queue_me
> > # :> /var/log/messages
> > # echo t > /proc/sysrq-trigger 
> > # grep futex_wait_queue_me /var/log/messages | wc -l 
> > 334
> > #
> > 
> > [ 6904.211478] Call Trace:
> > [ 6904.211481] [c000000fa1f1b4d0] [0000000000000020] 0x20 (unreliable)
> > [ 6904.211486] [c000000fa1f1b6a0] [c000000000015208] .__switch_to+0x1e8/0x330
> > [ 6904.211491] [c000000fa1f1b750] [c000000000702f00] .__schedule+0x360/0x8b0
> > [ 6904.211495] [c000000fa1f1b9d0] [c000000000147348] .futex_wait_queue_me+0xf8/0x1a0
> > [ 6904.211500] [c000000fa1f1ba60] [c0000000001486dc] .futex_wait+0x17c/0x2a0
> > [ 6904.211505] [c000000fa1f1bc10] [c00000000014a614] .do_futex+0x254/0xd80
> > [ 6904.211510] [c000000fa1f1bd60] [c00000000014b25c] .SyS_futex+0x11c/0x1d0
> > [ 6904.238874] [c000000fa1f1be30] [c00000000000a0fc] syscall_exit+0x0/0x7c
> > [ 6904.238879] java            S 00003fff825f6044     0 14682  14076 0x00000080
> > 
> > Is there any other information that I provide that can help?
> 
> This problem suggests that we missed a wakeup for a task that was adding
> itself to the queue in a wait path. And the only place that can happen
> is with the hb spinlock check for any pending waiters. Just in case we
> missed some assumption about checking the hash bucket spinlock as a way
> of detecting any waiters (powerpc?), could you revert this commit and
> try the original atomic operations variant:
> 
> https://lkml.org/lkml/2013/12/19/630

hmmm looking at ppc spinlock code, it seems that it doesn't have ticket
spinlocks -- in fact Torsten Duwe has been trying to get them upstream
very recently. Since we rely on the counter for detecting waiters, this
might explain the issue. Could someone confirm this spinlock
implementation difference? 

^ permalink raw reply

* Re: Tasks stuck in futex code (in 3.14-rc6)
From: Linus Torvalds @ 2014-03-20 16:41 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Srikar Dronamraju, Peter Zijlstra, LKML, Paul Mackerras,
	Thomas Gleixner, Paul McKenney, ppc-dev, Ingo Molnar
In-Reply-To: <1395295019.2612.11.camel@buesod1.americas.hpqcorp.net>

[-- Attachment #1: Type: text/plain, Size: 1678 bytes --]

On Wed, Mar 19, 2014 at 10:56 PM, Davidlohr Bueso <davidlohr@hp.com> wrote:
>
> This problem suggests that we missed a wakeup for a task that was adding
> itself to the queue in a wait path. And the only place that can happen
> is with the hb spinlock check for any pending waiters.

Ok, so thinking about hb_waiters_pending():

 - spin_is_locked() test
 - read barrier
 - plist_head_empty() test

seems to be broken after all.

The race is against futex_wait() that does

 - futex_wait_setup():
   - queue_lock()
   - get_futex_value_locked()
 - futex_wait_queue_me()
   - queue_me()
     - plist_add()

right?

It strikes me that the "spin_is_locked()" test has no barriers wrt the
writing of the new futex value on the wake path. And the read barrier
obviously does nothing wrt the write either. Or am I missing
something? So the write that actually released the futex might be
almost arbitrarily delayed on the waking side. So the waiting side may
not see the new value, even though the waker assumes it does due to
the ordering of it doing the write first.

So maybe we need a memory barrier in hb_waiters_pending() just to make
sure that the new value is written.

But at that point, I suspect that Davidlohrs original patch that just
had explicit waiting counts is just as well. The whole point with the
head empty test was to emulate that "do we have waiters" without
having to actually add the atomics, but a memory barrier is really no
worse.

The attached is a TOTALLY UNTESTED interdiff that adds back Davidlohrs
atomic count. It may be terminally broken, I literally didn't test it
at all.

Davidlohr, mind checking and correcting this?

                     Linus

[-- Attachment #2: patch.diff --]
[-- Type: text/plain, Size: 3576 bytes --]

 kernel/futex.c | 53 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 44a1261cb9ff..08ec814ad9d2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -234,6 +234,7 @@ static const struct futex_q futex_q_init = {
  * waiting on a futex.
  */
 struct futex_hash_bucket {
+	atomic_t waiters;
 	spinlock_t lock;
 	struct plist_head chain;
 } ____cacheline_aligned_in_smp;
@@ -253,22 +254,37 @@ static inline void futex_get_mm(union futex_key *key)
 	smp_mb__after_atomic_inc();
 }
 
-static inline bool hb_waiters_pending(struct futex_hash_bucket *hb)
+/*
+ * Reflects a new waiter being added to the waitqueue.
+ */
+static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
 {
 #ifdef CONFIG_SMP
+	atomic_inc(&hb->waiters);
 	/*
-	 * Tasks trying to enter the critical region are most likely
-	 * potential waiters that will be added to the plist. Ensure
-	 * that wakers won't miss to-be-slept tasks in the window between
-	 * the wait call and the actual plist_add.
+	 * Full barrier (A), see the ordering comment above.
 	 */
-	if (spin_is_locked(&hb->lock))
-		return true;
-	smp_rmb(); /* Make sure we check the lock state first */
+	smp_mb__after_atomic_inc();
+#endif
+}
+
+/*
+ * Reflects a waiter being removed from the waitqueue by wakeup
+ * paths.
+ */
+static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+	atomic_dec(&hb->waiters);
+#endif
+}
 
-	return !plist_head_empty(&hb->chain);
+static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+	return atomic_read(&hb->waiters);
 #else
-	return true;
+	return 1;
 #endif
 }
 
@@ -954,6 +970,7 @@ static void __unqueue_futex(struct futex_q *q)
 
 	hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
 	plist_del(&q->list, &hb->chain);
+	hb_waiters_dec(hb);
 }
 
 /*
@@ -1257,7 +1274,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 	 */
 	if (likely(&hb1->chain != &hb2->chain)) {
 		plist_del(&q->list, &hb1->chain);
+		hb_waiters_dec(hb1);
 		plist_add(&q->list, &hb2->chain);
+		hb_waiters_inc(hb2);
 		q->lock_ptr = &hb2->lock;
 	}
 	get_futex_key_refs(key2);
@@ -1600,6 +1619,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
 	struct futex_hash_bucket *hb;
 
 	hb = hash_futex(&q->key);
+
+	/*
+	 * Increment the counter before taking the lock so that
+	 * a potential waker won't miss a to-be-slept task that is
+	 * waiting for the spinlock. This is safe as all queue_lock()
+	 * users end up calling queue_me(). Similarly, for housekeeping,
+	 * decrement the counter at queue_unlock() when some error has
+	 * occurred and we don't end up adding the task to the list.
+	 */
+	hb_waiters_inc(hb);
+
 	q->lock_ptr = &hb->lock;
 
 	spin_lock(&hb->lock); /* implies MB (A) */
@@ -1611,6 +1641,7 @@ queue_unlock(struct futex_hash_bucket *hb)
 	__releases(&hb->lock)
 {
 	spin_unlock(&hb->lock);
+	hb_waiters_dec(hb);
 }
 
 /**
@@ -2342,6 +2373,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
 		 * Unqueue the futex_q and determine which it was.
 		 */
 		plist_del(&q->list, &hb->chain);
+		hb_waiters_dec(hb);
 
 		/* Handle spurious wakeups gracefully */
 		ret = -EWOULDBLOCK;
@@ -2875,6 +2907,7 @@ static int __init futex_init(void)
 		futex_cmpxchg_enabled = 1;
 
 	for (i = 0; i < futex_hashsize; i++) {
+		atomic_set(&futex_queues[i].waiters, 0);
 		plist_head_init(&futex_queues[i].chain);
 		spin_lock_init(&futex_queues[i].lock);
 	}

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox