LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/7] DMA: Freescale: move functions to avoid forward declarations
From: hongbo.zhang @ 2014-01-16  5:47 UTC (permalink / raw)
  To: vinod.koul, dan.j.williams, dmaengine
  Cc: scottwood, Hongbo Zhang, linuxppc-dev, linux-kernel
In-Reply-To: <1389851246-8564-1-git-send-email-hongbo.zhang@freescale.com>

From: Hongbo Zhang <hongbo.zhang@freescale.com>

These functions will be modified in the next patch in the series. By moving the
function in a patch separate from the changes, it will make review easier.

Signed-off-by: Hongbo Zhang <hongbo.zhang@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
---
 drivers/dma/fsldma.c |  192 +++++++++++++++++++++++++-------------------------
 1 file changed, 96 insertions(+), 96 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index ad73538..7b6fd3c 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -463,6 +463,102 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
 }
 
 /**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = get_mr(chan);
+		mode &= ~FSL_DMA_MR_CS;
+		set_mr(chan, mode);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+/**
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
+				      struct fsl_desc_sw *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+	/* Run the link descriptor callback function */
+	if (txd->callback) {
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p callback\n", desc);
+#endif
+		txd->callback(txd->callback_param);
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	dma_descriptor_unmap(txd);
+	fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
  * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
  * @chan : Freescale DMA channel
  *
@@ -807,102 +903,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 }
 
 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
- * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
- *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
- */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
-				      struct fsl_desc_sw *desc)
-{
-	struct dma_async_tx_descriptor *txd = &desc->async_tx;
-
-	/* Run the link descriptor callback function */
-	if (txd->callback) {
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p callback\n", desc);
-#endif
-		txd->callback(txd->callback_param);
-	}
-
-	/* Run any dependencies */
-	dma_run_dependencies(txd);
-
-	dma_descriptor_unmap(txd);
-	fsl_dma_free_descriptor(chan, desc);
-}
-
-/**
- * fsl_chan_xfer_ld_queue - transfer any pending transactions
- * @chan : Freescale DMA channel
- *
- * HARDWARE STATE: idle
- * LOCKING: must hold chan->desc_lock
- */
-static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc;
-
-	/*
-	 * If the list of pending descriptors is empty, then we
-	 * don't need to do any work at all
-	 */
-	if (list_empty(&chan->ld_pending)) {
-		chan_dbg(chan, "no pending LDs\n");
-		return;
-	}
-
-	/*
-	 * The DMA controller is not idle, which means that the interrupt
-	 * handler will start any queued transactions when it runs after
-	 * this transaction finishes
-	 */
-	if (!chan->idle) {
-		chan_dbg(chan, "DMA controller still busy\n");
-		return;
-	}
-
-	/*
-	 * If there are some link descriptors which have not been
-	 * transferred, we need to start the controller
-	 */
-
-	/*
-	 * Move all elements from the queue of pending transactions
-	 * onto the list of running transactions
-	 */
-	chan_dbg(chan, "idle, starting controller\n");
-	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
-	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
-
-	/*
-	 * The 85xx DMA controller doesn't clear the channel start bit
-	 * automatically at the end of a transfer. Therefore we must clear
-	 * it in software before starting the transfer.
-	 */
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		u32 mode;
-
-		mode = get_mr(chan);
-		mode &= ~FSL_DMA_MR_CS;
-		set_mr(chan, mode);
-	}
-
-	/*
-	 * Program the descriptor's address into the DMA controller,
-	 * then start the DMA transaction
-	 */
-	set_cdar(chan, desc->async_tx.phys);
-	get_cdar(chan);
-
-	dma_start(chan);
-	chan->idle = false;
-}
-
-/**
  * fsl_dma_memcpy_issue_pending - Issue the DMA start command
  * @chan : Freescale DMA channel
  */
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 5/7] DMA: Freescale: change descriptor release process for supporting async_tx
From: hongbo.zhang @ 2014-01-16  5:47 UTC (permalink / raw)
  To: vinod.koul, dan.j.williams, dmaengine
  Cc: scottwood, Hongbo Zhang, linuxppc-dev, linux-kernel
In-Reply-To: <1389851246-8564-1-git-send-email-hongbo.zhang@freescale.com>

From: Hongbo Zhang <hongbo.zhang@freescale.com>

Fix the potential risk when enable config NET_DMA and ASYNC_TX. Async_tx is
lack of support in current release process of dma descriptor, all descriptors
will be released whatever is acked or no-acked by async_tx, so there is a
potential race condition when dma engine is uesd by others clients (e.g. when
enable NET_DMA to offload TCP).

In our case, a race condition which is raised when use both of talitos and
dmaengine to offload xor is because napi scheduler will sync all pending
requests in dma channels, it affects the process of raid operations due to
ack_tx is not checked in fsl dma. The no-acked descriptor is freed which is
submitted just now, as a dependent tx, this freed descriptor trigger
BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().

TASK = ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0
GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4 00000000 00000001
GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4 ed576d98 00000000
GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000 ed3015e8 c15a7aa0
GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0 ef640c30 ecf41ca0
NIP [c02b048c] async_tx_submit+0x6c/0x2b4
LR [c02b068c] async_tx_submit+0x26c/0x2b4
Call Trace:
[ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable)
[ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c
[ecf41d20] [c0421064] async_copy_data+0xa0/0x17c
[ecf41d70] [c0421cf4] __raid_run_ops+0x874/0xe10
[ecf41df0] [c0426ee4] handle_stripe+0x820/0x25e8
[ecf41e90] [c0429080] raid5d+0x3d4/0x5b4
[ecf41f40] [c04329b8] md_thread+0x138/0x16c
[ecf41f90] [c008277c] kthread+0x8c/0x90
[ecf41ff0] [c0011630] kernel_thread+0x4c/0x68

Another modification in this patch is the change of completed descriptors,
there is a potential risk which caused by exception interrupt, all descriptors
in ld_running list are seemed completed when an interrupt raised, it works fine
under normal condition, but if there is an exception occured, it cannot work as
our excepted. Hardware should not be depend on s/w list, the right way is to
read current descriptor address register to find the last completed descriptor.
If an interrupt is raised by an error, all descriptors in ld_running should not
be seemed finished, or these unfinished descriptors in ld_running will be
released wrongly.

A simple way to reproduce:
Enable dmatest first, then insert some bad descriptors which can trigger
Programming Error interrupts before the good descriptors. Last, the good
descriptors will be freed before they are processsed because of the exception
intrerrupt.

Note: the bad descriptors are only for simulating an exception interrupt.  This
case can illustrate the potential risk in current fsl-dma very well.

Signed-off-by: Hongbo Zhang <hongbo.zhang@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
---
 drivers/dma/fsldma.c |  199 ++++++++++++++++++++++++++++++++++++--------------
 drivers/dma/fsldma.h |   17 ++++-
 2 files changed, 160 insertions(+), 56 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 7b6fd3c..bbace54 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -463,6 +463,89 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
 }
 
 /**
+ * fsldma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: Freescale DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ * All descriptors should only be freed in this function.
+ */
+static void fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node)
+		if (async_tx_test_ack(&desc->async_tx))
+			fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * fsldma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ * @cookie: Freescale DMA transaction identifier
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc, dma_cookie_t cookie)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+	BUG_ON(txd->cookie < 0);
+
+	if (txd->cookie > 0) {
+		cookie = txd->cookie;
+
+		/* Run the link descriptor callback function */
+		if (txd->callback) {
+#ifdef FSL_DMA_LD_DEBUG
+			chan_dbg(chan, "LD %p callback\n", desc);
+#endif
+			txd->callback(txd->callback_param);
+		}
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	return cookie;
+}
+
+/**
+ * fsldma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: Freescale DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api, or move it to
+ * queue ld_completed.
+ */
+static void fsldma_clean_running_descriptor(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc)
+{
+	/* Remove from the list of transactions */
+	list_del(&desc->node);
+
+	/*
+	 * the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/*
+		 * Move this descriptor to the list of descriptors which is
+		 * completed, but still awaiting the 'ack' bit to be set.
+		 */
+		list_add_tail(&desc->node, &chan->ld_completed);
+		return;
+	}
+
+	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
  * fsl_chan_xfer_ld_queue - transfer any pending transactions
  * @chan : Freescale DMA channel
  *
@@ -530,32 +613,58 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 }
 
 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * fsldma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
  * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
  *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
  */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
-				      struct fsl_desc_sw *desc)
+static void fsldma_cleanup_descriptors(struct fsldma_chan *chan)
 {
-	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	struct fsl_desc_sw *desc, *_desc;
+	dma_cookie_t cookie = 0;
+	dma_addr_t curr_phys = get_cdar(chan);
+	int seen_current = 0;
 
-	/* Run the link descriptor callback function */
-	if (txd->callback) {
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p callback\n", desc);
-#endif
-		txd->callback(txd->callback_param);
+	fsldma_clean_completed_descriptor(chan);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
+		/*
+		 * do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/*
+		 * stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (desc->async_tx.phys == curr_phys) {
+			seen_current = 1;
+			if (!dma_is_idle(chan))
+				break;
+		}
+
+		cookie = fsldma_run_tx_complete_actions(chan, desc, cookie);
+
+		fsldma_clean_running_descriptor(chan, desc);
 	}
 
-	/* Run any dependencies */
-	dma_run_dependencies(txd);
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
 
-	dma_descriptor_unmap(txd);
-	fsl_dma_free_descriptor(chan, desc);
+	if (cookie > 0)
+		chan->common.completed_cookie = cookie;
 }
 
 /**
@@ -626,8 +735,10 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 
 	chan_dbg(chan, "free all channel resources\n");
 	spin_lock_irqsave(&chan->desc_lock, flags);
+	fsldma_cleanup_descriptors(chan);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
+	fsldma_free_desc_list(chan, &chan->ld_completed);
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
 	dma_pool_destroy(chan->desc_pool);
@@ -865,6 +976,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 		/* Remove and free all of the descriptors in the LD queue */
 		fsldma_free_desc_list(chan, &chan->ld_pending);
 		fsldma_free_desc_list(chan, &chan->ld_running);
+		fsldma_free_desc_list(chan, &chan->ld_completed);
 		chan->idle = true;
 
 		spin_unlock_irqrestore(&chan->desc_lock, flags);
@@ -924,6 +1036,17 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 					dma_cookie_t cookie,
 					struct dma_tx_state *txstate)
 {
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_bh(&chan->desc_lock);
+	fsldma_cleanup_descriptors(chan);
+	spin_unlock_bh(&chan->desc_lock);
+
 	return dma_cookie_status(dchan, cookie, txstate);
 }
 
@@ -1001,52 +1124,19 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
-	struct fsl_desc_sw *desc, *_desc;
-	LIST_HEAD(ld_cleanup);
 	unsigned long flags;
 
 	chan_dbg(chan, "tasklet entry\n");
 
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* update the cookie if we have some descriptors to cleanup */
-	if (!list_empty(&chan->ld_running)) {
-		dma_cookie_t cookie;
-
-		desc = to_fsl_desc(chan->ld_running.prev);
-		cookie = desc->async_tx.cookie;
-		dma_cookie_complete(&desc->async_tx);
-
-		chan_dbg(chan, "completed_cookie=%d\n", cookie);
-	}
-
-	/*
-	 * move the descriptors to a temporary list so we can drop the lock
-	 * during the entire cleanup operation
-	 */
-	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
-
 	/* the hardware is now idle and ready for more */
 	chan->idle = true;
 
-	/*
-	 * Start any pending transactions automatically
-	 *
-	 * In the ideal case, we keep the DMA controller busy while we go
-	 * ahead and free the descriptors below.
-	 */
-	fsl_chan_xfer_ld_queue(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	/* Run the callback for each descriptor, in order */
-	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
-
-		/* Remove from the list of transactions */
-		list_del(&desc->node);
+	/* Run all cleanup for descriptors which have been completed */
+	fsldma_cleanup_descriptors(chan);
 
-		/* Run all cleanup for this descriptor */
-		fsldma_cleanup_descriptor(chan, desc);
-	}
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
 	chan_dbg(chan, "tasklet exit\n");
 }
@@ -1230,6 +1320,7 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
 	spin_lock_init(&chan->desc_lock);
 	INIT_LIST_HEAD(&chan->ld_pending);
 	INIT_LIST_HEAD(&chan->ld_running);
+	INIT_LIST_HEAD(&chan->ld_completed);
 	chan->idle = true;
 
 	chan->common.device = &fdev->common;
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index d56e835..ec19517 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -138,8 +138,21 @@ struct fsldma_chan {
 	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
 	spinlock_t desc_lock;		/* Descriptor operation lock */
-	struct list_head ld_pending;	/* Link descriptors queue */
-	struct list_head ld_running;	/* Link descriptors queue */
+	/*
+	 * Descriptors which are queued to run, but have not yet been
+	 * submitted to the hardware for execution
+	 */
+	struct list_head ld_pending;
+	/*
+	 * Descriptors which are currently being executed by the hardware
+	 */
+	struct list_head ld_running;
+	/*
+	 * Descriptors which have finished execution by the hardware. These
+	 * descriptors have already had their cleanup actions run. They are
+	 * waiting for the ACK bit to be set by the async_tx API.
+	 */
+	struct list_head ld_completed;	/* Link descriptors queue */
 	struct dma_chan common;		/* DMA common channel */
 	struct dma_pool *desc_pool;	/* Descriptors pool */
 	struct device *dev;		/* Channel device */
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 7/7] DMA: Freescale: add suspend resume functions for DMA driver
From: hongbo.zhang @ 2014-01-16  5:47 UTC (permalink / raw)
  To: vinod.koul, dan.j.williams, dmaengine
  Cc: scottwood, Hongbo Zhang, linuxppc-dev, linux-kernel
In-Reply-To: <1389851246-8564-1-git-send-email-hongbo.zhang@freescale.com>

From: Hongbo Zhang <hongbo.zhang@freescale.com>

This patch adds suspend resume functions for Freescale DMA driver.
.prepare callback is used to stop further descriptors from being added into the
pending queue, and also issue pending queues into execution if there is any.
.suspend callback makes sure all the pending jobs are cleaned up and all the
channels are idle, and save the mode registers.
.resume callback re-initializes the channels by restore the mode registers.

Signed-off-by: Hongbo Zhang <hongbo.zhang@freescale.com>
---
 drivers/dma/fsldma.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/fsldma.h |   16 ++++++++
 2 files changed, 115 insertions(+)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 437794e..fb94eb3 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -400,6 +400,14 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	spin_lock_bh(&chan->desc_lock);
 
+#ifdef CONFIG_PM
+	if (unlikely(chan->pm_state != RUNNING)) {
+		chan_dbg(chan, "cannot submit due to suspend\n");
+		spin_unlock_bh(&chan->desc_lock);
+		return -1;
+	}
+#endif
+
 	/*
 	 * assign cookies to all of the software descriptors
 	 * that make up this transaction
@@ -1317,6 +1325,9 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
 	INIT_LIST_HEAD(&chan->ld_running);
 	INIT_LIST_HEAD(&chan->ld_completed);
 	chan->idle = true;
+#ifdef CONFIG_PM
+	chan->pm_state = RUNNING;
+#endif
 
 	chan->common.device = &fdev->common;
 	dma_cookie_init(&chan->common);
@@ -1456,6 +1467,91 @@ static int fsldma_of_remove(struct platform_device *op)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int fsldma_prepare(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fsldma_device *fdev = platform_get_drvdata(pdev);
+	struct fsldma_chan *chan;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		chan->pm_state = SUSPENDING;
+		if (!list_empty(&chan->ld_pending))
+			fsl_chan_xfer_ld_queue(chan);
+		spin_unlock_bh(&chan->desc_lock);
+	}
+
+	return 0;
+}
+
+static int fsldma_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fsldma_device *fdev = platform_get_drvdata(pdev);
+	struct fsldma_chan *chan;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		if (!chan->idle)
+			goto out;
+		chan->regs_save.mr = DMA_IN(chan, &chan->regs->mr, 32);
+		chan->pm_state = SUSPENDED;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+	return 0;
+
+out:
+	for (; i >= 0; i--) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+	return -EBUSY;
+}
+
+static int fsldma_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fsldma_device *fdev = platform_get_drvdata(pdev);
+	struct fsldma_chan *chan;
+	u32 mode;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		mode = chan->regs_save.mr
+			& ~FSL_DMA_MR_CS & ~FSL_DMA_MR_CC & ~FSL_DMA_MR_CA;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+		chan->pm_state = RUNNING;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops fsldma_pm_ops = {
+	.prepare	= fsldma_prepare,
+	.suspend	= fsldma_suspend,
+	.resume		= fsldma_resume,
+};
+#endif
+
 static const struct of_device_id fsldma_of_ids[] = {
 	{ .compatible = "fsl,elo3-dma", },
 	{ .compatible = "fsl,eloplus-dma", },
@@ -1468,6 +1564,9 @@ static struct platform_driver fsldma_of_driver = {
 		.name = "fsl-elo-dma",
 		.owner = THIS_MODULE,
 		.of_match_table = fsldma_of_ids,
+#ifdef CONFIG_PM
+		.pm = &fsldma_pm_ops,
+#endif
 	},
 	.probe = fsldma_of_probe,
 	.remove = fsldma_of_remove,
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index ec19517..eecaf9e 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -134,6 +134,18 @@ struct fsldma_device {
 #define FSL_DMA_CHAN_PAUSE_EXT	0x00001000
 #define FSL_DMA_CHAN_START_EXT	0x00002000
 
+#ifdef CONFIG_PM
+struct fsldma_chan_regs_save {
+	u32 mr;
+};
+
+enum fsldma_pm_state {
+	RUNNING = 0,
+	SUSPENDING,
+	SUSPENDED,
+};
+#endif
+
 struct fsldma_chan {
 	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
@@ -161,6 +173,10 @@ struct fsldma_chan {
 	struct tasklet_struct tasklet;
 	u32 feature;
 	bool idle;			/* DMA controller is idle */
+#ifdef CONFIG_PM
+	struct fsldma_chan_regs_save regs_save;
+	enum fsldma_pm_state pm_state;
+#endif
 
 	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
 	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH] DMA: Freescale: change BWC from 256 bytes to 1024 bytes
From: hongbo.zhang @ 2014-01-16  6:10 UTC (permalink / raw)
  To: vinod.koul, dan.j.williams, dmaengine
  Cc: scottwood, Hongbo Zhang, linuxppc-dev, linux-kernel

From: Hongbo Zhang <hongbo.zhang@freescale.com>

Freescale DMA has a feature of BandWidth Control (ab. BWC), which is currently
256 bytes and should be changed to 1024 bytes for best DMA throughput.
Changing BWC from 256 to 1024 will improve DMA performance much, in cases
whatever one channel is running or multi channels are running simultanously,
large or small buffers are copied.  And this change doesn't impact memory
access performance remarkably, lmbench tests show that for some cases the
memory performance are decreased very slightly, while the others are even
better.
Tested on T4240.

Signed-off-by: Hongbo Zhang <hongbo.zhang@freescale.com>
---
 drivers/dma/fsldma.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 1ffc244..d56e835 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -41,7 +41,7 @@
  * channel is allowed to transfer before the DMA engine pauses
  * the current channel and switches to the next channel
  */
-#define FSL_DMA_MR_BWC         0x08000000
+#define FSL_DMA_MR_BWC         0x0A000000
 
 /* Special MR definition for MPC8349 */
 #define FSL_DMA_MR_EOTIE	0x00000080
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH] powerpc/pci: Fix IMMRBAR address
From: Minghuan Lian @ 2014-01-16  8:14 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Scott Wood, Minghuan Lian, Zang Roy-R61911

For PEXCSRBAR, bit 3-0 indicate prefetchable and address type.
So when getting base address, these bits should be masked,
otherwise we may get incorrect base address.

Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
---
 arch/powerpc/sysdev/fsl_pci.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 4dfd61d..44eae39 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -868,6 +868,14 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose)
 
 		pci_bus_read_config_dword(hose->bus,
 			PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base);
+
+		/*
+		 * For PEXCSRBAR, bit 3-0 indicate prefetchable and
+		 * address type. So when getting base address, these
+		 * bits should be masked
+		 */
+		base &= 0xfffffff0;
+
 		return base;
 	}
 #endif
-- 
1.8.1.2

^ permalink raw reply related

* Re: [PATCH v2] drivers/tty/hvc: don't use module_init in non-modular hyp. console code
From: David Vrabel @ 2014-01-16 10:47 UTC (permalink / raw)
  To: Paul Gortmaker
  Cc: Konrad Rzeszutek Wilk, Richard Weinberger, linux-kernel,
	Greg Kroah-Hartman, xen-devel, Boris Ostrovsky, linuxppc-dev
In-Reply-To: <1389821743-25081-1-git-send-email-paul.gortmaker@windriver.com>

On 15/01/14 21:35, Paul Gortmaker wrote:
> The HVC_OPAL/RTAS/UDBG/XEN options are all bool, and hence their support
> is either present or absent.  It will never be modular, so using
> module_init as an alias for __initcall is rather misleading.
> 
> Fix this up now, so that we can relocate module_init from
> init.h into module.h in the future.  If we don't do this, we'd
> have to add module.h to obviously non-modular code, and that
> would be a worse thing.
> 
> Note that direct use of __initcall is discouraged, vs. one
> of the priority categorized subgroups.  As __initcall gets
> mapped onto device_initcall, our use of device_initcall
> directly in this change means that the runtime impact is
> zero -- it will remain at level 6 in initcall ordering.
> 
> Also the __exitcall functions have been outright deleted since
> they are only ever of interest to UML, and UML will never be
> using any of this code.

For the hvc_xen changes

Acked-by: David Vrabel <david.vrabel@citrix.com>

Thanks

David

^ permalink raw reply

* [PATCH 3/4 v2]powerpc/fsl-booke: Add support for T2080/T2081 SoC
From: Shengzhou Liu @ 2014-01-16 10:02 UTC (permalink / raw)
  To: linuxppc-dev, scottwood; +Cc: Shengzhou Liu

Add support for T2080/T2081 SoC without DPAA components.

The T2080 SoC includes the following function and features:
- Four dual-threaded 64-bit Power architecture e6500 cores, up to 1.8GHz
- 2MB L2 cache and 512KB CoreNet platform cache (CPC)
- Hierarchical interconnect fabric
- One 32-/64-bit DDR3/3L SDRAM memory controllers with ECC and interleaving
- Data Path Acceleration Architecture (DPAA) incorporating acceleration
- 16 SerDes lanes up to 10.3125 GHz
- 8 Ethernet interfaces (multiple 1G/2.5G/10G MACs)
- High-speed peripheral interfaces
  - Four PCI Express controllers (two PCIe 2.0 and two PCIe 3.0)
  - Two Serial RapidIO 2.0 controllers/ports running at up to 5 GHz
- Additional peripheral interfaces
  - Two serial ATA (SATA 2.0) controllers
  - Two high-speed USB 2.0 controllers with integrated PHY
  - Enhanced secure digital host controller (SD/SDXC/eMMC)
  - Enhanced serial peripheral interface (eSPI)
  - Four I2C controllers
  - Four 2-pin UARTs or two 4-pin UARTs
  - Integrated Flash Controller supporting NAND and NOR flash
- Three eight-channel DMA engines
- Support for hardware virtualization and partitioning enforcement
- QorIQ Platform's Trust Architecture 2.0

T2081 is a reduced personality of T2080 without SATA, sRIO, RMan,
Aurora, and with less SerDes lanes and ethernet interfaces.

Signed-off-by: Shengzhou Liu <Shengzhou.Liu@freescale.com>
---
v2: remove wildcards in compatible strings.

 arch/powerpc/boot/dts/fsl/t2080si-post.dtsi |  60 +++++
 arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 383 ++++++++++++++++++++++++++++
 arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi  |  95 +++++++
 arch/powerpc/include/asm/mpc85xx.h          |   2 +
 4 files changed, 540 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
 create mode 100644 arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi

diff --git a/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
new file mode 100644
index 0000000..1a902fe
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080si-post.dtsi
@@ -0,0 +1,60 @@
+/*
+ * T2080 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t2081si-post.dtsi"
+
+&soc {
+/include/ "qoriq-sata2-0.dtsi"
+/include/ "qoriq-sata2-1.dtsi"
+};
+
+&rio {
+	compatible = "fsl,srio";
+	interrupts = <16 2 1 11>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	ranges;
+
+	port1 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <1>;
+	};
+
+	port2 {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		cell-index = <2>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
new file mode 100644
index 0000000..d7e036c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -0,0 +1,383 @@
+/*
+ * T2081 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&ifc {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	compatible = "fsl,ifc", "simple-bus";
+	interrupts = <25 2 0 0>;
+};
+
+/* controller at 0x240000 */
+&pci0 {
+	compatible = "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <20 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <20 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 40 1 0 0
+			0000 0 0 2 &mpic 1 1 0 0
+			0000 0 0 3 &mpic 2 1 0 0
+			0000 0 0 4 &mpic 3 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x250000 */
+&pci1 {
+	compatible = "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 0xff>;
+	interrupts = <21 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <21 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 41 1 0 0
+			0000 0 0 2 &mpic 5 1 0 0
+			0000 0 0 3 &mpic 6 1 0 0
+			0000 0 0 4 &mpic 7 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x260000 */
+&pci2 {
+	compatible = "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <22 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <22 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 42 1 0 0
+			0000 0 0 2 &mpic 9 1 0 0
+			0000 0 0 3 &mpic 10 1 0 0
+			0000 0 0 4 &mpic 11 1 0 0
+			>;
+	};
+};
+
+/* controller at 0x270000 */
+&pci3 {
+	compatible = "fsl,qoriq-pcie";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0x0 0xff>;
+	interrupts = <23 2 0 0>;
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <23 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0 0 1 &mpic 43 1 0 0
+			0000 0 0 2 &mpic 0 1 0 0
+			0000 0 0 3 &mpic 4 1 0 0
+			0000 0 0 4 &mpic 8 1 0 0
+			>;
+	};
+};
+
+&dcsr {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,dcsr", "simple-bus";
+
+	dcsr-epu@0 {
+		compatible = "fsl,dcsr-epu";
+		interrupts = <52 2 0 0
+			      84 2 0 0
+			      85 2 0 0
+			      94 2 0 0
+			      95 2 0 0>;
+		reg = <0x0 0x1000>;
+	};
+	dcsr-npc {
+		compatible = "fsl,dcsr-cnpc";
+		reg = <0x1000 0x1000 0x1002000 0x10000>;
+	};
+	dcsr-nxc@2000 {
+		compatible = "fsl,dcsr-nxc";
+		reg = <0x2000 0x1000>;
+	};
+	dcsr-corenet {
+		compatible = "fsl,dcsr-corenet";
+		reg = <0x8000 0x1000 0x1A000 0x1000>;
+	};
+	dcsr-ocn@11000 {
+		compatible = "fsl,dcsr-ocn";
+		reg = <0x11000 0x1000>;
+	};
+	dcsr-ddr@12000 {
+		compatible = "fsl,dcsr-ddr";
+		dev-handle = <&ddr1>;
+		reg = <0x12000 0x1000>;
+	};
+	dcsr-nal@18000 {
+		compatible = "fsl,dcsr-nal";
+		reg = <0x18000 0x1000>;
+	};
+	dcsr-rcpm@22000 {
+		compatible = "fsl,dcsr-rcpm";
+		reg = <0x22000 0x1000>;
+	};
+	dcsr-snpc@30000 {
+		compatible = "fsl,dcsr-snpc";
+		reg = <0x30000 0x1000 0x1022000 0x10000>;
+	};
+	dcsr-snpc@31000 {
+		compatible = "fsl,dcsr-snpc";
+		reg = <0x31000 0x1000 0x1042000 0x10000>;
+	};
+	dcsr-snpc@32000 {
+		compatible = "fsl,dcsr-snpc";
+		reg = <0x32000 0x1000 0x1062000 0x10000>;
+	};
+	dcsr-cpu-sb-proxy@100000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu0>;
+		reg = <0x100000 0x1000 0x101000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@108000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu1>;
+		reg = <0x108000 0x1000 0x109000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@110000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu2>;
+		reg = <0x110000 0x1000 0x111000 0x1000>;
+	};
+	dcsr-cpu-sb-proxy@118000 {
+		compatible = "fsl,dcsr-e6500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+		cpu-handle = <&cpu3>;
+		reg = <0x118000 0x1000 0x119000 0x1000>;
+	};
+};
+
+&soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	device_type = "soc";
+	compatible = "simple-bus";
+
+	soc-sram-error {
+		compatible = "fsl,soc-sram-error";
+		interrupts = <16 2 1 29>;
+	};
+
+	corenet-law@0 {
+		compatible = "fsl,corenet-law";
+		reg = <0x0 0x1000>;
+		fsl,num-laws = <32>;
+	};
+
+	ddr1: memory-controller@8000 {
+		compatible = "fsl,qoriq-memory-controller-v4.7",
+				"fsl,qoriq-memory-controller";
+		reg = <0x8000 0x1000>;
+		interrupts = <16 2 1 23>;
+	};
+
+	cpc: l3-cache-controller@10000 {
+		compatible = "cache";
+		reg = <0x10000 0x1000
+		       0x11000 0x1000
+		       0x12000 0x1000>;
+		interrupts = <16 2 1 27
+			      16 2 1 26
+			      16 2 1 25>;
+	};
+
+	corenet-cf@18000 {
+		compatible = "fsl,corenet2-cf";
+		reg = <0x18000 0x1000>;
+		interrupts = <16 2 1 31>;
+		fsl,ccf-num-csdids = <32>;
+		fsl,ccf-num-snoopids = <32>;
+	};
+
+	iommu@20000 {
+		compatible = "fsl,pamu-v1.0", "fsl,pamu";
+		reg = <0x20000 0x6000>;
+		interrupts = <
+			24 2 0 0
+			16 2 1 30>;
+	};
+
+/include/ "qoriq-mpic4.3.dtsi"
+
+	guts: global-utilities@e0000 {
+		compatible = "fsl,qoriq-device-config-2.0";
+		reg = <0xe0000 0xe00>;
+		fsl,has-rstcr;
+		fsl,liodn-bits = <12>;
+	};
+
+	clockgen: global-utilities@e1000 {
+		compatible = "fsl,qoriq-clockgen-2.0", "fixed-clock";
+		reg = <0xe1000 0x1000>;
+		clock-output-names = "sysclk";
+		#clock-cells = <0>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+		pll0: pll0@800 {
+			#clock-cells = <1>;
+			reg = <0x800>;
+			compatible = "fsl,core-pll-clock";
+			clocks = <&clockgen>;
+			clock-output-names = "pll0", "pll0-div2", "pll0-div4";
+		};
+		pll1: pll1@820 {
+			#clock-cells = <1>;
+			reg = <0x820>;
+			compatible = "fsl,core-pll-clock";
+			clocks = <&clockgen>;
+			clock-output-names = "pll1", "pll1-div2", "pll1-div4";
+		};
+		mux0: mux0@0 {
+			#clock-cells = <0>;
+			reg = <0x0>;
+			compatible = "fsl,core-mux-clock";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0_0", "pll0_1", "pll0_2",
+				      "pll1_0", "pll1_1", "pll1_2";
+			clock-output-names = "cmux0";
+		};
+		mux1: mux1@20 {
+			#clock-cells = <0>;
+			reg = <0x20>;
+			compatible = "fsl,core-mux-clock";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
+				 <&pll1 0>, <&pll1 1>, <&pll1 2>;
+			clock-names = "pll0_0", "pll0_1", "pll0_2",
+				      "pll1_0", "pll1_1", "pll1_2";
+			clock-output-names = "cmux1";
+		};
+	};
+
+	rcpm: global-utilities@e2000 {
+		compatible = "fsl,qoriq-rcpm-2.0";
+		reg = <0xe2000 0x1000>;
+	};
+
+	sfp: sfp@e8000 {
+		compatible = "fsl,t2080-sfp";
+		reg = <0xe8000 0x1000>;
+	};
+
+	serdes: serdes@ea000 {
+		compatible = "fsl,t2080-serdes";
+		reg = <0xea000 0x4000>;
+	};
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+/include/ "elo3-dma-2.dtsi"
+
+/include/ "qoriq-espi-0.dtsi"
+	spi@110000 {
+		fsl,espi-num-chipselects = <4>;
+	};
+
+/include/ "qoriq-esdhc-0.dtsi"
+	sdhc@114000 {
+		compatible = "fsl,esdhc";
+		sdhci,auto-cmd12;
+	};
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+	usb0: usb@210000 {
+		compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+		phy_type = "utmi";
+		port0;
+	};
+/include/ "qoriq-usb2-dr-0.dtsi"
+	usb1: usb@211000 {
+		compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+		dr_mode = "host";
+		phy_type = "utmi";
+	};
+/include/ "qoriq-sec5.2-0.dtsi"
+
+	L2_1: l2-cache-controller@c20000 {
+		/* Cluster 0 L2 cache */
+		compatible = "fsl,t2080-l2-cache-controller";
+		reg = <0xc20000 0x40000>;
+		next-level-cache = <&cpc>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
new file mode 100644
index 0000000..1283e26
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
@@ -0,0 +1,95 @@
+/*
+ * T2080/T2081 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e6500_power_isa.dtsi"
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	aliases {
+		ccsr = &soc;
+		dcsr = &dcsr;
+
+		serial0 = &serial0;
+		serial1 = &serial1;
+		serial2 = &serial2;
+		serial3 = &serial3;
+
+		crypto = &crypto;
+		pci0 = &pci0;
+		pci1 = &pci1;
+		pci2 = &pci2;
+		pci3 = &pci3;
+		usb0 = &usb0;
+		usb1 = &usb1;
+		dma0 = &dma0;
+		dma1 = &dma1;
+		dma2 = &dma2;
+		sdhc = &sdhc;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: PowerPC,e6500@0 {
+			device_type = "cpu";
+			reg = <0 1>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+		};
+		cpu1: PowerPC,e6500@2 {
+			device_type = "cpu";
+			reg = <2 3>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+		};
+		cpu2: PowerPC,e6500@4 {
+			device_type = "cpu";
+			reg = <4 5>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+		};
+		cpu3: PowerPC,e6500@6 {
+			device_type = "cpu";
+			reg = <6 7>;
+			clocks = <&mux0>;
+			next-level-cache = <&L2_1>;
+		};
+	};
+};
diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h
index 736d4ac..3bef74a 100644
--- a/arch/powerpc/include/asm/mpc85xx.h
+++ b/arch/powerpc/include/asm/mpc85xx.h
@@ -77,6 +77,8 @@
 #define SVR_T1020	0x852100
 #define SVR_T1021	0x852101
 #define SVR_T1022	0x852102
+#define SVR_T2080	0x853000
+#define SVR_T2081	0x853100
 
 #define SVR_8610	0x80A000
 #define SVR_8641	0x809000
-- 
1.8.0

^ permalink raw reply related

* [PATCH v2] powerpc/powernv: Platform dump interface
From: Vasant Hegde @ 2014-01-16 12:16 UTC (permalink / raw)
  To: linuxppc-dev

This patch adds interface to retrieve FSP and Platform dump.

Flow:
  - We register for OPAL notification events.
  - OPAL sends new dump available notification.
  - We retrieve the dump via OPAL interface and send it to debugfs.
  - User copies the dump data and end ACKs via debugfs.
  - We send ACK to OPAL.

OPAL APIs:
  - opal_dump_init()
  - opal_dump_info()
  - opal_dump_read()
  - opal_dump_ack()
  - opal_dump_resend_notification()

debugfs files:
  We create below dump related files under "fsp" directory.
    - dump            : Dump data
    - dump_available  : New dump available notification to userspace
    - dump_control    : ACK dump. Also initiate new FSP dump
    - README          : README

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
---
Ben,
  I have rebased this patch on top of your next branch.

-Vasant

 arch/powerpc/include/asm/opal.h                |   12 +
 arch/powerpc/platforms/powernv/Makefile        |    2 
 arch/powerpc/platforms/powernv/opal-dump.c     |  436 ++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S |    5 
 arch/powerpc/platforms/powernv/opal.c          |    2 
 5 files changed, 456 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-dump.c

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9a87b44..0f4c7ff 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -154,8 +154,13 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_FLASH_VALIDATE			76
 #define OPAL_FLASH_MANAGE			77
 #define OPAL_FLASH_UPDATE			78
+#define OPAL_DUMP_INIT				81
+#define OPAL_DUMP_INFO				82
+#define OPAL_DUMP_READ				83
+#define OPAL_DUMP_ACK				84
 #define OPAL_GET_MSG				85
 #define OPAL_CHECK_ASYNC_COMPLETION		86
+#define OPAL_DUMP_RESEND			91
 
 #ifndef __ASSEMBLY__
 
@@ -236,6 +241,7 @@ enum OpalPendingState {
 	OPAL_EVENT_EPOW			= 0x80,
 	OPAL_EVENT_LED_STATUS		= 0x100,
 	OPAL_EVENT_PCI_ERROR		= 0x200,
+	OPAL_EVENT_DUMP_AVAIL		= 0x400,
 	OPAL_EVENT_MSG_PENDING		= 0x800,
 };
 
@@ -825,6 +831,11 @@ int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
 int64_t opal_manage_flash(uint8_t op);
 int64_t opal_update_flash(uint64_t blk_list);
+int64_t opal_dump_init(uint8_t dump_type);
+int64_t opal_dump_info(uint32_t *dump_id, uint32_t *dump_size);
+int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer);
+int64_t opal_dump_ack(uint32_t dump_id);
+int64_t opal_dump_resend_notification(void);
 
 int64_t opal_get_msg(uint64_t buffer, size_t size);
 int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token);
@@ -859,6 +870,7 @@ extern void opal_get_rtc_time(struct rtc_time *tm);
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
 extern void opal_flash_init(void);
+extern void opal_platform_dump_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 8d767fd..3528c11 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,6 +1,6 @@
 obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
-obj-y			+= rng.o
+obj-y			+= rng.o opal-dump.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
new file mode 100644
index 0000000..4447027
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -0,0 +1,436 @@
+/*
+ * PowerNV OPAL Dump Interface
+ *
+ * Copyright 2013 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kobject.h>
+#include <linux/debugfs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+/* Dump type */
+#define DUMP_TYPE_FSP	0x01
+
+/* Extract failed */
+#define DUMP_NACK_ID	0x00
+
+/* Dump record */
+struct dump_record {
+	uint8_t		type;
+	uint32_t	id;
+	uint32_t	size;
+	char		*buffer;
+};
+static struct dump_record dump_record;
+
+/* Dump available status */
+static u32 dump_avail;
+
+/* Binary blobs */
+static struct debugfs_blob_wrapper dump_blob;
+static struct debugfs_blob_wrapper readme_blob;
+
+/* Ignore dump notification, if we fail to create debugfs files */
+static bool dump_disarmed = false;
+
+
+static void free_dump_sg_list(struct opal_sg_list *list)
+{
+	struct opal_sg_list *sg1;
+	while (list) {
+		sg1 = list->next;
+		kfree(list);
+		list = sg1;
+	}
+	list = NULL;
+}
+
+/*
+ * Build dump buffer scatter gather list
+ */
+static struct opal_sg_list *dump_data_to_sglist(void)
+{
+	struct opal_sg_list *sg1, *list = NULL;
+	void *addr;
+	int64_t size;
+
+	addr = dump_record.buffer;
+	size = dump_record.size;
+
+	sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!sg1)
+		goto nomem;
+
+	list = sg1;
+	sg1->num_entries = 0;
+	while (size > 0) {
+		/* Translate virtual address to physical address */
+		sg1->entry[sg1->num_entries].data =
+			(void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT);
+
+		if (size > PAGE_SIZE)
+			sg1->entry[sg1->num_entries].length = PAGE_SIZE;
+		else
+			sg1->entry[sg1->num_entries].length = size;
+
+		sg1->num_entries++;
+		if (sg1->num_entries >= SG_ENTRIES_PER_NODE) {
+			sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!sg1->next)
+				goto nomem;
+
+			sg1 = sg1->next;
+			sg1->num_entries = 0;
+		}
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	return list;
+
+nomem:
+	pr_err("%s : Failed to allocate memory\n", __func__);
+	free_dump_sg_list(list);
+	return NULL;
+}
+
+/*
+ * Translate sg list address to absolute
+ */
+static void sglist_to_phy_addr(struct opal_sg_list *list)
+{
+	struct opal_sg_list *sg, *next;
+
+	for (sg = list; sg; sg = next) {
+		next = sg->next;
+		/* Don't translate NULL pointer for last entry */
+		if (sg->next)
+			sg->next = (struct opal_sg_list *)__pa(sg->next);
+		else
+			sg->next = NULL;
+
+		/* Convert num_entries to length */
+		sg->num_entries =
+			sg->num_entries * sizeof(struct opal_sg_entry) + 16;
+	}
+}
+
+static void free_dump_data_buf(void)
+{
+	vfree(dump_record.buffer);
+	dump_record.size = 0;
+}
+
+/*
+ * Allocate dump data buffer.
+ */
+static int alloc_dump_data_buf(void)
+{
+	dump_record.buffer = vzalloc(PAGE_ALIGN(dump_record.size));
+	if (!dump_record.buffer) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+/*
+ * Initiate FipS dump
+ */
+static int64_t dump_fips_init(uint8_t type)
+{
+	int rc;
+
+	rc = opal_dump_init(type);
+	if (rc)
+		pr_warn("%s: Failed to initiate FipS dump (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+/*
+ * Get dump ID and size.
+ */
+static int64_t dump_read_info(void)
+{
+	int rc;
+
+	rc = opal_dump_info(&dump_record.id, &dump_record.size);
+	if (rc)
+		pr_warn("%s: Failed to get dump info (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+/*
+ * Send acknowledgement to OPAL
+ */
+static int64_t dump_send_ack(uint32_t dump_id)
+{
+	int rc;
+
+	rc = opal_dump_ack(dump_id);
+	if (rc)
+		pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
+			__func__, dump_id, rc);
+	return rc;
+}
+
+/*
+ * Retrieve dump data
+ */
+static int64_t dump_read_data(void)
+{
+	struct opal_sg_list *list;
+	uint64_t addr;
+	int64_t rc;
+
+	/* Allocate memory */
+	rc = alloc_dump_data_buf();
+	if (rc)
+		goto out;
+
+	/* Generate SG list */
+	list = dump_data_to_sglist();
+	if (!list) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Translate sg list addr to real address */
+	sglist_to_phy_addr(list);
+
+	/* First entry address */
+	addr = __pa(list);
+
+	/* Fetch data */
+	rc = OPAL_BUSY_EVENT;
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_dump_read(dump_record.id, addr);
+		if (rc == OPAL_BUSY_EVENT) {
+			opal_poll_events(NULL);
+			msleep(10);
+		}
+	}
+
+	if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
+		pr_warn("%s: Extract dump failed for ID 0x%x\n",
+			__func__, dump_record.id);
+
+	/* Free SG list */
+	free_dump_sg_list(list);
+
+out:
+	return rc;
+}
+
+static int extract_dump(void)
+{
+	int rc;
+
+	/* Get dump ID, size */
+	rc = dump_read_info();
+	if (rc != OPAL_SUCCESS)
+		return rc;
+
+	/* Read dump data */
+	rc = dump_read_data();
+	if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
+		/*
+		 * Failed to allocate memory to retrieve dump. Lets send
+		 * negative ack so that we get notification again.
+		 */
+		dump_send_ack(DUMP_NACK_ID);
+
+		/* Free dump buffer */
+		free_dump_data_buf();
+
+		return rc;
+	}
+	if (rc == OPAL_PARTIAL)
+		pr_info("%s: Platform dump partially read. ID = 0x%x\n",
+			__func__, dump_record.id);
+	else
+		pr_info("%s: New platform dump available. ID = 0x%x\n",
+			__func__, dump_record.id);
+
+	/* Update dump blob */
+	dump_blob.data = (void *)dump_record.buffer;
+	dump_blob.size = dump_record.size;
+
+	/* Update dump available status */
+	dump_avail = 1;
+
+	return rc;
+}
+
+static void dump_extract_fn(struct work_struct *work)
+{
+	extract_dump();
+}
+
+static DECLARE_WORK(dump_work, dump_extract_fn);
+
+/* Workqueue to extract dump */
+static void schedule_extract_dump(void)
+{
+	schedule_work(&dump_work);
+}
+
+/*
+ * New dump available notification
+ *
+ * Once we get notification, we extract dump via OPAL call
+ * and then pass dump to userspace via debugfs interface.
+ */
+static int dump_event(struct notifier_block *nb,
+		      unsigned long events, void *change)
+{
+	/*
+	 * Don't retrieve dump, if we don't have debugfs
+	 * interface to pass data to userspace.
+	 */
+	if (dump_disarmed)
+		return 0;
+
+	/* Check for dump available notification */
+	if (events & OPAL_EVENT_DUMP_AVAIL)
+		schedule_extract_dump();
+
+	return 0;
+}
+
+static struct notifier_block dump_nb = {
+	.notifier_call  = dump_event,
+	.next           = NULL,
+	.priority       = 0
+};
+
+
+/* debugfs README message */
+static const char readme_msg[] =
+	"Platform dump HOWTO:\n\n"
+	"files:\n"
+	"  dump                  - Binary file, contains actual dump data\n"
+	"  dump_available (r--)  - New dump available notification\n"
+	"                          0 : No dump available\n"
+	"                          1 : New dump available\n"
+	"  dump_control(-w-)     - Dump control file\n"
+	"                          1 : Send acknowledgement (dump copied)\n"
+	"                          2 : Initiate FipS dump\n";
+
+/* debugfs dump_control file operations */
+static ssize_t dump_control_write(struct file *file,
+				  const char __user *user_buf,
+				  size_t count, loff_t *ppos)
+{
+	char buf[4];
+	size_t buf_size;
+
+	buf_size = min(count, (sizeof(buf) - 1));
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	switch (buf[0]) {
+	case '1':	/* Dump send ack */
+		if (dump_avail) {
+			dump_avail = 0;
+			free_dump_data_buf();
+			dump_send_ack(dump_record.id);
+		}
+		break;
+	case '2':	/* Initiate FipS dump */
+		dump_fips_init(DUMP_TYPE_FSP);
+		break;
+	default:
+		break;
+	}
+	return count;
+}
+
+static const struct file_operations dump_control_fops = {
+	.open	= simple_open,
+	.write	= dump_control_write,
+	.llseek	= default_llseek,
+};
+
+/*
+ * Create dump debugfs file
+ */
+static int debugfs_dump_init(void)
+{
+	struct dentry *dir, *file;
+
+	/* FSP dump directory */
+	dir = debugfs_create_dir("fsp", NULL);
+	if (!dir)
+		goto out;
+
+	/* README */
+	readme_blob.data = (void *)readme_msg;
+	readme_blob.size = strlen(readme_msg);
+	file = debugfs_create_blob("README", 0400, dir, &readme_blob);
+	if (!file)
+		goto remove_dir;
+
+	/* Dump available notification */
+	file = debugfs_create_u32("dump_avail", 0400, dir, &dump_avail);
+	if (!file)
+		goto remove_dir;
+
+	/* data file */
+	dump_blob.data = (void *)dump_record.buffer;
+	dump_blob.size = dump_record.size;
+	file = debugfs_create_blob("dump", 0400, dir, &dump_blob);
+	if (!file)
+		goto remove_dir;
+
+	/* Control file */
+	file = debugfs_create_file("dump_control", 0200, dir,
+				   NULL, &dump_control_fops);
+	if (!file)
+		goto remove_dir;
+
+	return 0;
+
+remove_dir:
+	debugfs_remove_recursive(dir);
+
+out:
+	dump_disarmed = true;
+	return -1;
+}
+
+void __init opal_platform_dump_init(void)
+{
+	int rc;
+
+	/* debugfs interface */
+	rc = debugfs_dump_init();
+	if (rc) {
+		pr_warn("%s: Failed to create debugfs interface (%d)\n",
+			__func__, rc);
+		return;
+	}
+
+	/* Register for opal notifier */
+	rc = opal_notifier_register(&dump_nb);
+	if (rc) {
+		pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+			__func__, rc);
+		return;
+	}
+
+	/* Request to resend dump available notification */
+	opal_dump_resend_notification();
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 719aa5c..c1e979b 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -126,5 +126,10 @@ OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
 OPAL_CALL(opal_validate_flash,			OPAL_FLASH_VALIDATE);
 OPAL_CALL(opal_manage_flash,			OPAL_FLASH_MANAGE);
 OPAL_CALL(opal_update_flash,			OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_dump_init,			OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info,			OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_read,			OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack,			OPAL_DUMP_ACK);
 OPAL_CALL(opal_get_msg,				OPAL_GET_MSG);
 OPAL_CALL(opal_check_completion,		OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification,	OPAL_DUMP_RESEND);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 7a184a0..1b00e99 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -473,6 +473,8 @@ static int __init opal_init(void)
 	if (rc == 0) {
 		/* Setup code update interface */
 		opal_flash_init();
+		/* Setup platform dump extract interface */
+		opal_platform_dump_init();
 	}
 
 	return 0;

^ permalink raw reply related

* [PATCH RFC] powerpc/mpc85xx: add support for the kmp204x reference board
From: Valentin Longchamp @ 2014-01-16 13:38 UTC (permalink / raw)
  To: scottwood, galak; +Cc: Valentin Longchamp, linuxppc-dev

This patch introduces the support for Keymile's kmp204x reference
design. This design is based on Freescale's P2040/P2041 SoC.

The peripherals used by this design are:
- SPI NOR Flash as bootloader medium
- NAND Flash with a ubi partition
- 2 PCIe busses (hosts 1 and 3)
- 3 FMAN Ethernet devices (FMAN1 DTSEC1/2/5)
- 4 Local Bus windows, with one dedicated to the QRIO reset/power mgmt
  FPGA
- 2 HW I2C busses
- last but not least, the mandatory serial port

The patch also adds a defconfig file for this reference design and a DTS
file for the kmcoge4 board which is the first one based on this
reference design.

To try to avoid code duplication, the support was added directly to the
corenet_generic.c file.

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
---
 arch/powerpc/boot/dts/kmcoge4.dts             | 165 ++++++++++++++++++
 arch/powerpc/configs/85xx/kmp204x_defconfig   | 231 ++++++++++++++++++++++++++
 arch/powerpc/platforms/85xx/Kconfig           |  14 ++
 arch/powerpc/platforms/85xx/Makefile          |   1 +
 arch/powerpc/platforms/85xx/corenet_generic.c |  52 ++++++
 5 files changed, 463 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/kmcoge4.dts
 create mode 100644 arch/powerpc/configs/85xx/kmp204x_defconfig

diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts
new file mode 100644
index 0000000..c10df6d
--- /dev/null
+++ b/arch/powerpc/boot/dts/kmcoge4.dts
@@ -0,0 +1,165 @@
+/*
+ * Keymile kmcoge4 Device Tree Source, based on the P2041RDB DTS
+ *
+ * (C) Copyright 2014
+ * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "fsl/p2041si-pre.dtsi"
+
+/ {
+	model = "keymile,kmcoge4";
+	compatible = "keymile,kmp204x";
+	#address-cells = <2>;
+	#size-cells = <2>;
+	interrupt-parent = <&mpic>;
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25fl256s1";
+				reg = <0>;
+				spi-max-frequency = <20000000>; /* input clock */
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@env {
+					label = "env";
+					reg = <0x00100000 0x00010000>;
+				};
+				partition@envred {
+					label = "envred";
+					reg = <0x00110000 0x00010000>;
+				};
+				partition@fman {
+					label = "fman-ucode";
+					reg = <0x00120000 0x00010000>;
+					read-only;
+				};
+			};
+
+			zl30343@1 {
+				compatible = "gen,spidev";
+				reg = <1>;
+				spi-max-frequency = <8000000>;
+			};
+
+			flash@2 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,m25p32";
+				reg = <2>;
+				spi-max-frequency = <15000000>;
+				partition@fpga-config {
+					label = "fpga-config";
+					reg = <0x00000000 0x00400000>;
+				};
+			};
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+
+		sata@220000 {
+			status = "disabled";
+		};
+
+		sata@221000 {
+			status = "disabled";
+		};
+	};
+
+	rio: rapidio@ffe0c0000 {
+		status = "disabled";
+	};
+
+	lbc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x1000>;
+		ranges = <0 0 0xf 0xffa00000 0x00040000		/* LB 0 */
+			  1 0 0xf 0xfb000000 0x00010000		/* LB 1 */
+			  2 0 0xf 0xd0000000 0x10000000		/* LB 2 */
+			  3 0 0xf 0xe0000000 0x10000000>;	/* LB 3 */
+
+		nand@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,elbc-fcm-nand";
+			reg = <0 0 0x40000>;
+
+			partition@0 {
+				label = "ubi0";
+				reg = <0x0 0x8000000>;
+			};
+		};
+	};
+
+	pci0: pcie@ffe200000 {
+		reg = <0xf 0xfe200000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe201000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe202000 {
+		reg = <0xf 0xfe202000 0 0x1000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};
+
+/include/ "fsl/p2041si-post.dtsi"
diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
new file mode 100644
index 0000000..3bbf4fa
--- /dev/null
+++ b/arch/powerpc/configs/85xx/kmp204x_defconfig
@@ -0,0 +1,231 @@
+CONFIG_PPC_85xx=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+CONFIG_KMP204X=y
+CONFIG_MPIC_MSGR=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_HIGHMEM=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_KEXEC=y
+CONFIG_FORCE_MAX_ZONEORDER=13
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCI_MSI=y
+CONFIG_ADVANCED_OPTIONS=y
+CONFIG_LOWMEM_SIZE_BOOL=y
+CONFIG_LOWMEM_SIZE=0x20000000
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_KEY=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_IPCOMP=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_IP_SCTP=m
+CONFIG_TIPC=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=y
+CONFIG_NET_SCH_HTB=y
+CONFIG_NET_SCH_HFSC=y
+CONFIG_NET_SCH_PRIO=y
+CONFIG_NET_SCH_MULTIQ=y
+CONFIG_NET_SCH_RED=y
+CONFIG_NET_SCH_SFQ=y
+CONFIG_NET_SCH_TEQL=y
+CONFIG_NET_SCH_TBF=y
+CONFIG_NET_SCH_GRED=y
+CONFIG_NET_CLS_BASIC=y
+CONFIG_NET_CLS_TCINDEX=y
+CONFIG_NET_CLS_U32=y
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_FLOW=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/mdev"
+CONFIG_DEVTMPFS=y
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_PHRAM=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_ECC_BCH=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=2
+CONFIG_BLK_DEV_RAM_SIZE=2048
+CONFIG_EEPROM_AT24=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_ALTEON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_EXAR is not set
+CONFIG_FSL_PQ_MDIO=y
+CONFIG_FSL_XGMAC_MDIO=y
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MELLANOX is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROCHIP is not set
+# CONFIG_NET_VENDOR_MYRI is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
+# CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_MARVELL_PHY=y
+CONFIG_VITESSE_PHY=y
+CONFIG_FIXED_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_PPC_EPAPR_HV_BYTECHAN=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_NVRAM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C_MUX_PCA954x=y
+CONFIG_I2C_MPC=y
+CONFIG_SPI=y
+CONFIG_SPI_GPIO=y
+CONFIG_SPI_FSL_SPI=y
+CONFIG_SPI_FSL_ESPI=y
+CONFIG_SPI_SPIDEV=m
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
+CONFIG_EDAC_MPC85XX=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_DS3232=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_UIO=y
+CONFIG_STAGING=y
+# CONFIG_NET_VENDOR_SILICOM is not set
+CONFIG_FSL_PAMU=y
+CONFIG_EXT2_FS=y
+CONFIG_NTFS_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_CRAMFS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=m
+CONFIG_CRC_ITU_T=m
+CONFIG_DEBUG_INFO=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_RCU_TRACE=y
+CONFIG_UPROBE_EVENT=y
+CONFIG_CRYPTO_NULL=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 4d46349..78b849b 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -263,6 +263,20 @@ config CORENET_GENERIC
 	  The following boards are supported for both 32bit and 64bit kernel:
 	    P5020 DS and P5040 DS
 
+config KMP204X
+	bool "Keymile kmp204x generic platform"
+	select DEFAULT_UIMAGE
+	select E500
+	select PPC_E500MC
+	select PHYS_64BIT
+	select SWIOTLB
+	select ARCH_REQUIRE_GPIOLIB
+	select GPIO_MPC8XXX
+	select PPC_EPAPR_HV_PIC
+	help
+	  This option enables support for the Keymile boards based on the
+	  kmp204x platform.
+
 endif # FSL_SOC_BOOKE
 
 config TQM85xx
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index dd4c0b5..76042e1 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o
 obj-$(CONFIG_GE_IMP3A)	  += ge_imp3a.o
 obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o
 obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o
+obj-$(CONFIG_KMP204X)     += corenet_generic.o
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index fbd871e..8e84e1c 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -122,6 +122,7 @@ static const char * const hv_boards[] __initconst = {
 	NULL
 };
 
+#ifdef CONFIG_CORENET_GENERIC
 /*
  * Called very early, device-tree isn't unflattened
  */
@@ -180,3 +181,54 @@ machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
 #ifdef CONFIG_SWIOTLB
 machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
 #endif
+#endif
+
+#ifdef CONFIG_KMP204X
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init kmp204x_generic_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "keymile,kmp204x");
+}
+
+
+/*
+ * Setup the architecture
+ */
+void __init kmp204x_gen_setup_arch(void)
+{
+	mpc85xx_smp_init();
+
+	swiotlb_detect_4g();
+
+	pr_info("%s platform from Keymile\n", ppc_md.name);
+}
+
+define_machine(kmp204x) {
+	.name			= "kmp204x",
+	.probe			= kmp204x_generic_probe,
+	.setup_arch		= kmp204x_gen_setup_arch,
+	.init_IRQ		= corenet_gen_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+#ifdef CONFIG_PPC64
+	.power_save		= book3e_idle,
+#else
+	.power_save		= e500_idle,
+#endif
+};
+
+machine_arch_initcall(kmp204x, corenet_gen_publish_devices);
+
+#ifdef CONFIG_SWIOTLB
+machine_arch_initcall(kmp204x, swiotlb_setup_bus_notifier);
+#endif
+#endif
-- 
1.8.0.1

^ permalink raw reply related

* Re: [PATCH RFC v6 4/5] dma: mpc512x: register for device tree channel lookup
From: Gerhard Sittig @ 2014-01-16 14:26 UTC (permalink / raw)
  To: Alexander Popov
  Cc: Lars-Peter Clausen, Arnd Bergmann, Vinod Koul, dmaengine,
	Dan Williams, Anatolij Gustschin, linuxppc-dev
In-Reply-To: <CAF0T0X67RVNoJAM+9DyDM=HKw_5P4MExgXWaEj5_asqVd5hvDQ@mail.gmail.com>

On Mon, Jan 13, 2014 at 12:17 +0400, Alexander Popov wrote:
> 
> Thanks for your replies, Gerhard and Vinod.
> 
> 2014/1/9 Vinod Koul <vinod.koul@intel.com>:
> > On Wed, Jan 08, 2014 at 05:47:19PM +0100, Gerhard Sittig wrote:
> >> [ what is the semantics of DMA_PRIVATE capability flag?
> >>   is documentation available beyond the initial commit message?
> >>   need individual channels be handled instead of controllers? ]
> >
> > The DMA_PRIVATE means that your channels are not to be used for global memcpy,
> > as one can do in async cases (this is hwere DMAengine came into existence)
> >
> > If the device has the capablity of doing genric memcpy then it should not set
> > this. For slave dma usage the dam channel can transfer data to a specfic
> > slave device(s), hence we should use this is geric fashion so setting
> > DMA_PRIVATE makes sense in those cases.
> 
> Each DMA channel of MPC512x DMA controller can do _both_
> mem-to-mem transfers and transfers between mem and some slave peripheral
> (only one DMA channel is fully dedicated to DDR).
> All DMA channels of MPC512x DMA controller belong to one dma_device.
> So we _don't_ need setting DMA_PRIVATE flag for this dma_device at all, do we?

I'd phrase it a little stronger.  It's not that we don't _need_
the DMA_PRIVATE flag, it's actually that we _must_not_ use it
(unless I'm being dense, and keep missing something).  With the
DMA_PRIVATE flag set, the generic allocator will refuse to use
any channel of the only DMA controller, which totally eliminates
general use, and only leaves us with explicitly configured uses
(that would be MMC only in mainline, and nothing else).

> >> Still I see a difference in the lookup approaches:  Yours applies
> >> DMA_PRIVATE globally and in advance, preventing _any_ use of DMA
> >> for memory transfers.  While the __dma_request_channel() routine
> >> only applies it _temporarily_ around a dma_chan_get() operation.
> >> Allowing for use of DMA channels by both individual peripherals
> >> as well as memory transfers.
> >>
> > No it doesnt prevent. You can still use it for memcpy once you have the channel.

Vinod, what am I missing here?  Before probe() there is no DMA
controller.  After probe() the DMA_PRIVATE flag is set and thus
general allocation won't happen.  How exactly does one get to
"have the channel" for memory transfers?  Aren't the channel
references acquired upon demand, as the need arises?  While the
DMA controller has no means to know whether "all memory transfer
channel aquisition was done" or whether "all slave peripherals
have their channel" (if at all such a situation exists, given we
have dynamically loadable modules), such that the DMA_PRIVATE
toggle could get thrown one way or another?

This brings me back to a question I raised earlier:  Am I
overestimating the benefit or importance of DMA supported memory
transfers?  Am I wrong assuming that there are users of this
feature which need not get configured explicitly (i.e. they
operate in transparent ways, using whatever they find to be
available), and that the set of these users and their consumption
of DMA resources is something that is dynamic (i.e. driven by
demand, instead of pre-allocated and then probably inappropriate
for the workload they see)?

> Excuse me, I don't completely understand why dma_request_channel()
> needs to set DMA_PRIVATE flag.
> If dma_request_channel() for some dma_device without DMA_PRIVATE
> is called before the first dmaengine_get()
> then no DMA channels of this dma_device will become available for memcpy
> by slab allocator.
> Could you give me a clue?
> 
> >> > > Consider the fact that this driver
> >> > > handles both MPC5121 as well as MPC8308 hardware.
> >> >
> >> > Ah, yes, sorry. I should certainly fix this, if setting of DMA_PRIVATE flag
> >> > is needed at all.
> >>
> >> What I meant here is that implications for all affected platforms
> >> should be considered.  There is one driver source, but the driver
> >> applies to more than one platform (another issue of the driver is
> >> that this is not apparent from the doc nor the compat strings).
> 
> I'll add a comment with information about the supported platforms to
> mpc512x_dma.c
> in RFC PATCH 1/5. Ok?
> 
> >> So blocking memory transfers in mpc512x_dma.c is a total breakage
> >> for MPC8308 (removes the only previous feature and adds nothing),
> >> and is a regression for MPC512x (removes the previously supported
> >> memory transfers, while it may add peripheral supports with very
> >> few users).
> 
> Yes, I see. MPC512x and MPC8308 should be treated differently.

Alexander, are you suggesting to treat 512x and 8308 differently,
and did you decide how to do that?  Previous review feedback
raised the question whether this is needed or appropriate, while
there has not been an answer yet AFAICT.  I would not jump to
conclusions here, especially when you cannot test what you
change.


virtually yours
Gerhard Sittig
-- 
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr. 5, D-82194 Groebenzell, Germany
Phone: +49-8142-66989-0 Fax: +49-8142-66989-80  Email: office@denx.de

^ permalink raw reply

* Kernel stack overflows due to  "powerpc: Remove ksp_limit on ppc64" with v3.13-rc8 on ppc32 (P2020)
From: Guenter Roeck @ 2014-01-16 18:05 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev; +Cc: Linus Torvalds

Hi all,

I am getting kernel stack overflows with v3.13-rc8 on a system with P2020 CPU.
The kernel is patched for the target, but I don't think that is related.
Stack overflows are in different areas, but always in calls from __do_softirq.

Crashes happen reliably either during boot or if I put any kind of load
onto the system.

Example:

Kernel stack overflow in process eb3e5a00, r1=eb79df90
CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
task: eb3e5a00 ti: c0616000 task.ti: ef440000
NIP: c003a420 LR: c003a410 CTR: c0017518
REGS: eb79dee0 TRAP: 0901   Not tainted (3.13.0-rc8-juniper-00146-g19eca00)
MSR: 00029000 <CE,EE,ME>  CR: 24008444  XER: 00000000
GPR00: c003a410 eb79df90 eb3e5a00 00000000 eb05d900 00000001 65d87646 00000000
GPR08: 00000000 020b8000 00000000 00000000 44008442
NIP [c003a420] __do_softirq+0x94/0x1ec
LR [c003a410] __do_softirq+0x84/0x1ec
Call Trace:
[eb79df90] [c003a410] __do_softirq+0x84/0x1ec (unreliable)
[eb79dfe0] [c003a970] irq_exit+0xbc/0xc8
[eb79dff0] [c000cc1c] call_do_irq+0x24/0x3c
[ef441f20] [c00046a8] do_IRQ+0x8c/0xf8
[ef441f40] [c000e7f4] ret_from_except+0x0/0x18
--- Exception: 501 at 0xfcda524
    LR = 0x10024900
Instruction dump:
7c781b78 3b40000a 3a73b040 543c0024 3a800000 3b3913a0 7ef5bb78 48201bf9
5463103a 7d3b182e 7e89b92e 7c008146 <3ba00000> 7e7e9b78 48000014 57fff87f
Kernel panic - not syncing: kernel stack overflow
CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
Call Trace:
Rebooting in 180 seconds..

Reverting the following commit fixes the problem.

cbc9565ee8 "powerpc: Remove ksp_limit on ppc64"

Should I submit a patch reverting this commit, or is there a better way to fix
the problem on short notice (given that 3.13 is close) ?

Thanks,
Guenter

^ permalink raw reply

* Re: [PATCH RFC] powerpc/mpc85xx: add support for the kmp204x reference board
From: Scott Wood @ 2014-01-16 23:35 UTC (permalink / raw)
  To: Valentin Longchamp; +Cc: linuxppc-dev
In-Reply-To: <1389879525-27130-1-git-send-email-valentin.longchamp@keymile.com>

On Thu, 2014-01-16 at 14:38 +0100, Valentin Longchamp wrote:
> This patch introduces the support for Keymile's kmp204x reference
> design. This design is based on Freescale's P2040/P2041 SoC.
> 
> The peripherals used by this design are:
> - SPI NOR Flash as bootloader medium
> - NAND Flash with a ubi partition
> - 2 PCIe busses (hosts 1 and 3)
> - 3 FMAN Ethernet devices (FMAN1 DTSEC1/2/5)
> - 4 Local Bus windows, with one dedicated to the QRIO reset/power mgmt
>   FPGA
> - 2 HW I2C busses
> - last but not least, the mandatory serial port
> 
> The patch also adds a defconfig file for this reference design and a DTS
> file for the kmcoge4 board which is the first one based on this
> reference design.
> 
> To try to avoid code duplication, the support was added directly to the
> corenet_generic.c file.
> 
> Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
> ---
>  arch/powerpc/boot/dts/kmcoge4.dts             | 165 ++++++++++++++++++
>  arch/powerpc/configs/85xx/kmp204x_defconfig   | 231 ++++++++++++++++++++++++++
>  arch/powerpc/platforms/85xx/Kconfig           |  14 ++
>  arch/powerpc/platforms/85xx/Makefile          |   1 +
>  arch/powerpc/platforms/85xx/corenet_generic.c |  52 ++++++
>  5 files changed, 463 insertions(+)
>  create mode 100644 arch/powerpc/boot/dts/kmcoge4.dts
>  create mode 100644 arch/powerpc/configs/85xx/kmp204x_defconfig
> 
> diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts
> new file mode 100644
> index 0000000..c10df6d
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/kmcoge4.dts
> @@ -0,0 +1,165 @@
> +/*
> + * Keymile kmcoge4 Device Tree Source, based on the P2041RDB DTS
> + *
> + * (C) Copyright 2014
> + * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
> + *
> + * Copyright 2011 Freescale Semiconductor Inc.
> + *
> + * This program is free software; you can redistribute  it and/or modify it
> + * under  the terms of  the GNU General  Public License as published by the
> + * Free Software Foundation;  either version 2 of the  License, or (at your
> + * option) any later version.
> + */
> +
> +/include/ "fsl/p2041si-pre.dtsi"
> +
> +/ {
> +	model = "keymile,kmcoge4";
> +	compatible = "keymile,kmp204x";

Don't put wildcards in compatible.

> +	soc: soc@ffe000000 {
> +		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
> +		reg = <0xf 0xfe000000 0 0x00001000>;
> +		spi@110000 {
> +			flash@0 {
> +				#address-cells = <1>;
> +				#size-cells = <1>;
> +				compatible = "spansion,s25fl256s1";
> +				reg = <0>;
> +				spi-max-frequency = <20000000>; /* input clock */
> +				partition@u-boot {
> +					label = "u-boot";
> +					reg = <0x00000000 0x00100000>;
> +					read-only;
> +				};
> +				partition@env {
> +					label = "env";
> +					reg = <0x00100000 0x00010000>;
> +				};
> +				partition@envred {
> +					label = "envred";
> +					reg = <0x00110000 0x00010000>;
> +				};
> +				partition@fman {
> +					label = "fman-ucode";
> +					reg = <0x00120000 0x00010000>;
> +					read-only;
> +				};
> +			};

I realize it's common practice, but it would be good to get away from
putting partition layouts in the dts file.  Alternatives include using
mtdparts on the command line, or having U-Boot put the partition info
into the dtb based on the mtdparts environment variable (there is
existing code for this).

> +			zl30343@1 {
> +				compatible = "gen,spidev";

Node names are supposed to be generic.  Compatibles are supposed to be
specific.

> +	lbc: localbus@ffe124000 {
> +		reg = <0xf 0xfe124000 0 0x1000>;
> +		ranges = <0 0 0xf 0xffa00000 0x00040000		/* LB 0 */
> +			  1 0 0xf 0xfb000000 0x00010000		/* LB 1 */
> +			  2 0 0xf 0xd0000000 0x10000000		/* LB 2 */
> +			  3 0 0xf 0xe0000000 0x10000000>;	/* LB 3 */
> +
> +		nand@0,0 {
> +			#address-cells = <1>;
> +			#size-cells = <1>;
> +			compatible = "fsl,elbc-fcm-nand";
> +			reg = <0 0 0x40000>;
> +
> +			partition@0 {
> +				label = "ubi0";
> +				reg = <0x0 0x8000000>;
> +			};
> +		};
> +	};

No nodes for those other chipselects?

> diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
> new file mode 100644
> index 0000000..3bbf4fa
> --- /dev/null
> +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig

Why does this board need its own defconfig?

> diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
> index fbd871e..8e84e1c 100644
> --- a/arch/powerpc/platforms/85xx/corenet_generic.c
> +++ b/arch/powerpc/platforms/85xx/corenet_generic.c
> @@ -122,6 +122,7 @@ static const char * const hv_boards[] __initconst = {
>  	NULL
>  };
>  
> +#ifdef CONFIG_CORENET_GENERIC

corenet_generic.c without CONFIG_CORENET_GENERIC?

>  /*
>   * Called very early, device-tree isn't unflattened
>   */
> @@ -180,3 +181,54 @@ machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
>  #ifdef CONFIG_SWIOTLB
>  machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
>  #endif
> +#endif
> +
> +#ifdef CONFIG_KMP204X
> +/*
> + * Called very early, device-tree isn't unflattened
> + */
> +static int __init kmp204x_generic_probe(void)
> +{
> +	unsigned long root = of_get_flat_dt_root();
> +
> +	return of_flat_dt_is_compatible(root, "keymile,kmp204x");
> +}
> +
> +
> +/*
> + * Setup the architecture
> + */
> +void __init kmp204x_gen_setup_arch(void)
> +{
> +	mpc85xx_smp_init();
> +
> +	swiotlb_detect_4g();
> +
> +	pr_info("%s platform from Keymile\n", ppc_md.name);
> +}
> +
> +define_machine(kmp204x) {
> +	.name			= "kmp204x",
> +	.probe			= kmp204x_generic_probe,
> +	.setup_arch		= kmp204x_gen_setup_arch,
> +	.init_IRQ		= corenet_gen_pic_init,
> +#ifdef CONFIG_PCI
> +	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
> +#endif
> +	.get_irq		= mpic_get_coreint_irq,
> +	.restart		= fsl_rstcr_restart,
> +	.calibrate_decr		= generic_calibrate_decr,
> +	.progress		= udbg_progress,
> +#ifdef CONFIG_PPC64
> +	.power_save		= book3e_idle,
> +#else
> +	.power_save		= e500_idle,
> +#endif
> +};
> +
> +machine_arch_initcall(kmp204x, corenet_gen_publish_devices);
> +
> +#ifdef CONFIG_SWIOTLB
> +machine_arch_initcall(kmp204x, swiotlb_setup_bus_notifier);
> +#endif
> +#endif

The whole point of corenet_generic.c is to avoid duplicating all of this
stuff.

Can't you just use corenet_generic as-is other than adding the
compatible to boards[]?  If not, explain why and put it in a different
file.

-Scott

^ permalink raw reply

* [PATCH 0/8] Add support for PowerPC Hypervisor supplied performance counters
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer

These patches add basic pmus for 2 powerpc hypervisor interfaces to obtain
performance counters: gpci ("get performance counter info") and 24x7.

The counters supplied by these interfaces are continually counting and never
need to be (and cannot be) disabled or enabled. They additionally do not
generate any interrupts. This makes them in some regards similar to software
counters, and as a result their implimentation shares some common code (which
an initial patch exposes) with the sw counters.

There is ongoing work to support transactions for each of these pmus.

Cody P Schafer (8):
  perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus
  perf core: export swevent hrtimer helpers
  powerpc: add hvcalls for 24x7 and gpci (get performance counter info)
  powerpc: add hv_gpci interface header
  powerpc: add 24x7 interface header
  powerpc/perf: add support for the hv gpci (get performance counter
    info) interface
  powerpc/perf: add support for the hv 24x7 interface
  powerpc/perf: add kconfig option for hypervisor provided counters

 arch/powerpc/include/asm/hv_24x7.h     | 239 ++++++++++++++++
 arch/powerpc/include/asm/hv_gpci.h     | 490 +++++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/hvcall.h      |   6 +-
 arch/powerpc/perf/Makefile             |   2 +
 arch/powerpc/perf/hv-24x7.c            | 354 ++++++++++++++++++++++++
 arch/powerpc/perf/hv-gpci.c            | 235 ++++++++++++++++
 arch/powerpc/platforms/Kconfig.cputype |   6 +
 include/linux/perf_event.h             |  22 +-
 kernel/events/core.c                   |   8 +-
 9 files changed, 1356 insertions(+), 6 deletions(-)
 create mode 100644 arch/powerpc/include/asm/hv_24x7.h
 create mode 100644 arch/powerpc/include/asm/hv_gpci.h
 create mode 100644 arch/powerpc/perf/hv-24x7.c
 create mode 100644 arch/powerpc/perf/hv-gpci.c

-- 
1.8.5.2

^ permalink raw reply

* [PATCH 1/8] perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

Add PMU_RANGE_ATTR() and PMU_RANGE_RESV() (for reserved areas) which
generate functions to extract the relevent bits from
event->attr.config{,1,2} for use by sw-like pmus where the
'config{,1,2}' values don't map directly to hardware registers.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 include/linux/perf_event.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2e069d1..8646e33 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -870,4 +870,21 @@ _name##_show(struct device *dev,					\
 									\
 static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
 
+#define PMU_RANGE_ATTR(name, attr_var, bit_start, bit_end)		\
+PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);		\
+PMU_RANGE_RESV(name, attr_var, bit_start, bit_end)
+
+#define PMU_RANGE_RESV(name, attr_var, bit_start, bit_end)		\
+static u64 event_get_##name##_max(void)					\
+{									\
+	int bits = (bit_end) - (bit_start) + 1;				\
+	return ((0x1ULL << (bits - 1ULL)) - 1ULL) |			\
+		(0xFULL << (bits - 4ULL));				\
+}									\
+static u64 event_get_##name(struct perf_event *event)			\
+{									\
+	return (event->attr.attr_var >> (bit_start)) &			\
+		event_get_##name##_max();				\
+}
+
 #endif /* _LINUX_PERF_EVENT_H */
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 3/8] powerpc: add hvcalls for 24x7 and gpci (get performance counter info)
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/hvcall.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index d8b600b..48d6efa 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -269,11 +269,15 @@
 #define H_COP			0x304
 #define H_GET_MPP_X		0x314
 #define H_SET_MODE		0x31C
-#define MAX_HCALL_OPCODE	H_SET_MODE
+#define H_GET_24X7_CATALOG_PAGE 0xF078
+#define H_GET_24X7_DATA		0xF07C
+#define H_GET_PERF_COUNTER_INFO 0xF080
+#define MAX_HCALL_OPCODE	H_GET_PERF_COUNTER_INFO
 
 /* Platform specific hcalls, used by KVM */
 #define H_RTAS			0xf000
 
+
 #ifndef __ASSEMBLY__
 
 /**
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 2/8] perf core: export swevent hrtimer helpers
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

Export the swevent hrtimer helpers currently only used in events/core.c
to allow the addition of architecture specific sw-like pmus.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 include/linux/perf_event.h | 5 ++++-
 kernel/events/core.c       | 8 ++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8646e33..c5bc71a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -558,7 +558,10 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
 				int src_cpu, int dst_cpu);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
-
+extern void perf_swevent_init_hrtimer(struct perf_event *event);
+extern void perf_swevent_start_hrtimer(struct perf_event *event);
+extern void perf_swevent_cancel_hrtimer(struct perf_event *event);
+extern int perf_swevent_event_idx(struct perf_event *event);
 
 struct perf_sample_data {
 	u64				type;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f574401..d881d1e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5801,7 +5801,7 @@ static int perf_swevent_init(struct perf_event *event)
 	return 0;
 }
 
-static int perf_swevent_event_idx(struct perf_event *event)
+int perf_swevent_event_idx(struct perf_event *event)
 {
 	return 0;
 }
@@ -6030,7 +6030,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 	return ret;
 }
 
-static void perf_swevent_start_hrtimer(struct perf_event *event)
+void perf_swevent_start_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	s64 period;
@@ -6052,7 +6052,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
 				HRTIMER_MODE_REL_PINNED, 0);
 }
 
-static void perf_swevent_cancel_hrtimer(struct perf_event *event)
+void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -6064,7 +6064,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 	}
 }
 
-static void perf_swevent_init_hrtimer(struct perf_event *event)
+void perf_swevent_init_hrtimer(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 4/8] powerpc: add hv_gpci interface header
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

"H_GetPerformanceCounterInfo" (refered to as hv_gpci or just gpci from
here on) is an interface to retrieve specific performance counters and
other data from the hypervisor. All outputs have a fixed format (and
are represented as structs in this patch).

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/hv_gpci.h | 490 +++++++++++++++++++++++++++++++++++++
 1 file changed, 490 insertions(+)
 create mode 100644 arch/powerpc/include/asm/hv_gpci.h

diff --git a/arch/powerpc/include/asm/hv_gpci.h b/arch/powerpc/include/asm/hv_gpci.h
new file mode 100644
index 0000000..237de26
--- /dev/null
+++ b/arch/powerpc/include/asm/hv_gpci.h
@@ -0,0 +1,490 @@
+#ifndef LINUX_POWERPC_UAPI_HV_GPCI_H_
+#define LINUX_POWERPC_UAPI_HV_GPCI_H_
+
+#include <linux/types.h>
+
+/* From the document "H_GetPerformanceCounterInfo Interface" v1.06, paritialy
+ * updated with v1.07 */
+
+/* H_GET_PERF_COUNTER_INFO argument */
+struct hv_get_perf_counter_info_params {
+	__be32 counter_request; /* I */
+	__be32 starting_index;  /* IO */
+	__be16 secondary_index; /* IO */
+	__be16 returned_values; /* O */
+	__be32 detail_rc; /* O, "only for 32bit clients" */
+
+	/*
+	 * O, size each of counter_value element in bytes, only set for version
+	 * >= 0x3
+	 */
+	__be16 cv_element_size;
+
+	/* I, funny if version < 0x3 */
+	__u8 counter_info_version_in;
+
+	/* O, funny if version < 0x3 */
+	__u8 counter_info_version_out;
+	__u8 reserved[0xC];
+	__u8 counter_value[];
+} __packed;
+
+/* 8 => power8 (1.07)
+ * 6 => TLBIE  (1.07)
+ * 5 => (1.05)
+ * 4 => ?
+ * 3 => ?
+ * 2 => v7r7m0.phyp (?)
+ * 1 => v7r6m0.phyp (?)
+ * 0 => v7r{2,3,4}m0.phyp (?)
+ */
+#define COUNTER_INFO_VERSION_CURRENT 0x8
+
+/* these determine the counter_value[] layout and the meaning of starting_index
+ * and secondary_index */
+enum counter_info_requests {
+
+	/* GENERAL */
+
+	/* @starting_index: "starting" physical processor index or -1 for
+	 *                  current phyical processor. Data is only collected
+	 *                  for the processors' "primary" thread.
+	 * @secondary_index: unused
+	 */
+	CIR_dispatch_timebase_by_processor = 0x10,
+
+	/* @starting_index: starting partition id or -1 for the current logical
+	 *                  partition (virtual machine).
+	 * @secondary_index: unused
+	 */
+	CIR_entitled_capped_uncapped_donated_idle_timebase_by_partition = 0x20,
+
+	/* @starting_index: starting partition id or -1 for the current logical
+	 *                  partition (virtual machine).
+	 * @secondary_index: unused
+	 */
+	CIR_run_instructions_run_cycles_by_partition = 0x30,
+
+	/* @starting_index: must be -1 (to refer to the current partition)
+	 * @secondary_index: unused
+	 */
+	CIR_system_performance_capabilities = 0x40,
+
+
+	/* Data from this should only be considered valid if
+	 * counter_info_version >= 0x3
+	 * @starting_index: starting hardware chip id or -1 for the current hw
+	 *		    chip id
+	 * @secondary_index: unused
+	 */
+	CIR_processor_bus_utilization_abc_links = 0x50,
+
+	/* Data from this should only be considered valid if
+	 * counter_info_version >= 0x3
+	 * @starting_index: starting hardware chip id or -1 for the current hw
+	 *		    chip id
+	 * @secondary_index: unused
+	 */
+	CIR_processor_bus_utilization_wxyz_links = 0x60,
+
+
+	/* EXPANDED */
+
+	/* Avaliable if counter_info_version >= 0x3
+	 * @starting_index: starting hardware chip id or -1 for the current hw
+	 *		    chip id
+	 * @secondary_index: unused
+	 */
+	CIR_processor_bus_utilization_gx_links = 0x70,
+
+	/* Avaliable if counter_info_version >= 0x3
+	 * @starting_index: starting hardware chip id or -1 for the current hw
+	 *		    chip id
+	 * @secondary_index: unused
+	 */
+	CIR_processor_bus_utilization_mc_links = 0x80,
+
+	/* Avaliable if counter_info_version >= 0x3
+	 * @starting_index: starting physical processor or -1 for the current
+	 *                  physical processor
+	 * @secondary_index: unused
+	 */
+	CIR_processor_config = 0x90,
+
+	/* Avaliable if counter_info_version >= 0x3
+	 * @starting_index: starting physical processor or -1 for the current
+	 *                  physical processor
+	 * @secondary_index: unused
+	 */
+	CIR_current_processor_frequency = 0x91,
+
+	CIR_processor_core_utilization = 0x94,
+
+	CIR_processor_core_power_mode = 0x95,
+
+	CIR_affinity_domain_information_by_virutal_processor = 0xA0,
+
+	CIR_affinity_domain_info_by_domain = 0xB0,
+
+	CIR_affinity_domain_info_by_partition = 0xB1,
+
+	/* @starting_index: unused
+	 * @secondary_index: unused
+	 */
+	CIR_physical_memory_info = 0xC0,
+
+	CIR_processor_bus_topology = 0xD0,
+
+	CIR_partition_hypervisor_queuing_times = 0xE0,
+
+	CIR_system_hypervisor_times = 0xF0,
+
+	/* LAB */
+
+	CIR_set_mmcrh = 0x80001000,
+	CIR_get_hpmcx = 0x80002000,
+};
+
+/* counter value layout */
+struct cv_dispatch_timebase_by_processor {
+	__be64 processor_time_in_timebase_cycles;
+	__be32 hw_processor_id;
+	__be16 owning_part_id; /* 0xffff if shared or unowned */
+	__u8 processor_state;
+	__u8 version; /* unused unless counter_info_version == 0 */
+	__be32 hw_chip_id; /* -1 for "Not Installed" processors */
+	__be32 phys_module_id; /* -1 for "Not Installed" processors */
+	__be32 primary_affinity_domain_idx;
+	__be32 secondary_affinity_domain_idx;
+	__be32 processor_version;
+	__be16 logical_processor_idx;
+	__u8 reserved[0x2];
+
+	/* counter_info_version >= 0x3 || version >= 0x1 */
+	__be32 processor_id_register;
+	__be32 physical_processor_idx; /* counter_info_version >= 0x3 */
+} __packed;
+
+struct cv_timebase_by_partition {
+	__be64 partition_id;
+	__be64 entitled_cycles;
+	__be64 consumed_capped_cycles;
+	__be64 consumed_uncapped_cycles;
+	__be64 cycles_donated;
+	__be64 purr_idle_cycles;
+} __packed;
+
+struct cv_cycles_per_partition {
+	__be64 partition_id;
+	__be64 instructions_completed; /* 0 if collection is unsupported */
+	__be64 cycles; /* 0 if collection is unsupported */
+} __packed;
+
+struct cv_system_performance_capabilities {
+	/* If != 0, allowed to collect data from other partitions */
+	__u8 perf_collect_privlidged;
+
+	/* These are only valid if counter_info_version >= 0x3 */
+#define CV_CM_GA       0x1
+#define CV_CM_EXPANDED 0x2
+#define CV_CM_LAB      0x3
+	/* remainig bits are reserved */
+	__u8 capability_mask;
+	__u8 reserved[0xE];
+} __packed;
+
+struct cv_processor_bus_utilization_abc {
+	__be32 hw_chip_id;
+	__u8 reserved1[0xC];
+	__be64 total_link_cycles;
+	__be64 idle_cycles_a;
+	__be64 idle_cycles_b;
+	__be64 idle_cycles_c;
+	__u8 reserved2[0x20];
+} __packed;
+
+struct cv_processor_bus_utilization_wxyz {
+	__be32 hw_chip_id;
+	__u8 reserved1[0xC];
+	__be64 total_link_cycles;
+
+	/* Inactive links (all cycles idle) give -1 */
+	__be64 idle_cycles_w;
+	__be64 idle_cycles_x;
+	__be64 idle_cycles_y;
+	__be64 idle_cycles_z;
+	__u8 reserved2[0x28];
+} __packed;
+
+/* EXPANDED */
+
+struct cv_gx_cycles {
+	__be64 address_cycles;
+	__be64 data_cycles;
+	__be64 retries;
+	__be64 bus_cycles;
+	__be64 total_cycles;
+} __packed;
+
+struct cv_gx_cycles_io {
+	struct cv_gx_cycles in, out;
+} __packed;
+
+struct cv_processor_bus_utilization_gx {
+	__be32 hw_chip_id;
+	__u8 reserved1[0xC];
+	struct cv_gx_cycles_io gx[2];
+} __packed;
+
+struct cv_mc_counts {
+	__be64 frames;
+	__be64 reads;
+	__be64 writes;
+	__be64 total_cycles;
+} __packed;
+
+/* inactive links return 0 for all utilization data */
+struct cv_processor_bus_utilization_mc {
+	__be32 hw_chip_id;
+	__u8 reserved1[0xC];
+	struct cv_mc_counts mc[2];
+} __packed;
+
+struct cv_processor_config {
+	__be32 physical_processor_idx;
+	__be32 hw_node_id;
+	__be32 hw_card_id;
+	__be32 phys_module_id;
+	__be32 hw_chip_id;
+	__be32 hw_processor_id;
+	__be32 processor_id_register;
+
+#define CV_PS_NOT_INSTALLED 0x1
+#define CV_PS_GAURDED_OFF   0x2
+#define CV_PS_UNLICENCED    0x3
+#define CV_PS_SHARED        0x4
+#define CV_PS_BORROWED      0x5
+#define CV_PS_DEDICATED     0x6
+	__u8 processor_state;
+
+	__u8 reserved1[0x1];
+	__be16 owning_part_id;
+	__be32 processor_version;
+	__u8 reserved2[0x4];
+} __packed;
+
+struct cv_processor_frequency {
+	__be32 physical_processor_idx;
+	__be32 hw_processor_id;
+	__u8 reserved1[0x8];
+	__be32 nominal_freq_mhz;
+	__be32 current_freq_mhz;
+} __packed;
+
+struct cv_processor_core_utilization {
+	__be32 physical_processor_idx;
+	__be32 hw_processor_id;
+	__be64 cycles;
+	__be64 timebase_at_collection;
+	__be64 purr_cycles;
+	__be64 sum_of_cycles_across_threads;
+	__be64 instructions_completed;
+} __packed;
+
+struct cv_processor_core_power_mode {
+	__be16 partition_id;
+	__u8 reserved1[0x6];
+
+#define CV_PM_NONE		 0x0
+#define CV_PM_NOMINAL		 0x1
+#define CV_PM_DYNAMIC_MAX_PERF   0x2
+#define CV_PM_DYNAMIC_POWER_SAVE 0x3
+#define CV_PM_UNKNOWN		 0xF
+	__be16 power_mode;
+
+	__u8 reserved2[0x6];
+} __packed;
+
+struct cv_affinity_domain_information_by_virutal_processor {
+	__be16 partition_id;
+	__be16 virtual_processor_idx;
+	__u8 reserved1[0xC];
+	__be16 physical_processor_idx;
+	__be16 primary_affinity_domain_idx;
+	__be16 secondary_affinity_domain_idx;
+	__u8 reserved2[0x2];
+	__u8 reserved3[0x8];
+} __packed;
+
+struct cv_affinity_domain_info_by_domain {
+	__be16 primary_affinity_domain_idx;
+	__be16 secondary_affinity_domain_idx;
+	__be32 total_processor_units;
+	__be32 free_dedicated_processor_units;
+	__be32 free_shared_processor_units;
+	__be32 total_memory_lmbs;
+	__be32 free_memory_lmbs;
+	__be32 num_partitions_in_domain;
+	__u8 reserved1[0x14];
+} __packed;
+
+struct cv_affinity_domain_info_by_partition {
+	__be16 partition_id;
+	__u8 reserved1[0x6];
+	__be16 assignment_order;
+
+#define CV_PPS_UNKNOWN			      0x00
+#define CV_PPS_CONTAIN_IN_PRIMARY_DOMAIN      0x01
+#define CV_PPS_CONTAIN_IN_SECONDARY_DOMAIN    0x02
+#define CV_PPS_SPREAD_ACROSS_SECONDAY_DOMAINS 0x03
+#define CV_PPS_WHEREEVER		      0x04
+#define CV_PPS_SCRAMBLE			      0x05
+	__u8 partition_placement_spread;
+
+	__u8 parition_affinity_score;
+	__be16 num_affinity_domain_elements;
+	__be16 affinity_domain_element_size;
+	__u8 domain_elements[];
+} __packed;
+
+struct cv_affinity_domain_elem {
+	__be16 primary_affinity_domain_idx;
+	__be16 secondary_affinity_domain_idx;
+	__be32 dedicated_processor_units_allocated;
+	__be32 dedicated_memory_allocated_reserved_1;
+	__be32 dedicated_memory_allocated_reserved_2;
+	__be32 dedicated_memory_allocated_16Gb_pages;
+	__u8 reserved[0x8];
+} __packed;
+
+/* Also avaliable via `of_get_flat_dt_prop(node, "ibm,lmb-size", &l)` */
+struct cv_physical_memory_info {
+	__be64 lmb_size_in_bytes;
+	__u8 reserved1[0x18];
+} __packed;
+
+struct cv_processor_bus_topology {
+	__be32 hw_chip_id;
+	__be32 hw_node_id;
+	__be32 fabric_chip_id;
+	__u8 reserved1[0x4];
+
+#define CV_IM_A_LINK_ACTIVE (1 << 0)
+#define CV_IM_B_LINK_ACTIVE (1 << 1)
+#define CV_IM_C_LINK_ACTIVE (1 << 2)
+/* Bits 3-5 are reserved */
+#define CV_IM_ABC_LINK_WIDTH_MASK ((1 << 6) | (1 << 7))
+#define CV_IM_ABC_LINK_WIDTH_SHIFT 6
+#define CV_IM_ABC_LINK_WIDTH_8B 0x0
+#define CV_IM_ABC_LINK_WIDTH_4B 0x1
+
+#define CV_IM_W_LINK_ACTIVE (1 << 8)
+#define CV_IM_X_LINK_ACTIVE (1 << 9)
+#define CV_IM_Y_LINK_ACTIVE (1 << 10)
+#define CV_IM_Z_LINK_ACTIVE (1 << 11)
+/* Bits 12-13 are reserved */
+
+#define CV_IM_WXYZ_LINK_WIDTH_MASK ((1 << 14) | (1 << 15))
+#define CV_IM_WXYZ_LINK_WIDTH_SHIFT 14
+#define CV_IM_WXYZ_LINK_WIDTH_8B 0x0
+#define CV_IM_WXYZ_LINK_WIDTH_4B 0x1
+
+#define CV_IM_GX0_CONFIGURED (1 << 16)
+#define CV_IM_GX1_CONFIGURED (1 << 17)
+/* Bits 18-23 are reserved */
+#define CV_IM_MC0_CONFIGURED (1 << 24)
+#define CV_IM_MC1_CONFIGURED (1 << 25)
+/* Bits 26-31 are reserved */
+
+	__be32 info_mask;
+
+	__u8 hw_node_id_connected_to_a_link;
+	__u8 hw_node_id_connected_to_b_link;
+
+	__u8 reserved2[0x2];
+
+	__u8 fabric_chip_id_connected_to_w_link;
+	__u8 fabric_chip_id_connected_to_x_link;
+	__u8 fabric_chip_id_connected_to_y_link;
+	__u8 fabric_chip_id_connected_to_z_link;
+
+	__u8 reserved3[0x4];
+} __packed;
+
+struct cv_partition_hypervisor_queuing_times {
+	__be16 partition_id;
+	__u8 reserved1[0x6];
+	__be64 time_waiting_for_entitlement; /* in timebase cycles */
+	__be64 times_waited_for_entitlement;
+	__be64 time_waiting_for_physical_processor; /* in timebase cycles */
+	__be64 times_waited_for_physical_processor;
+	__be64 dispatches_on_home_processor_core;
+	__be64 dispatches_on_home_primary_affinity_domain;
+	__be64 dispatches_on_home_secondary_affinity_domain;
+	__be64 dispatches_off_home_secondary_affinity_domain;
+	__be64 dispatches_on_dedicated_processor_donating_cycles;
+} __packed;
+
+struct cv_system_hypervisor_times {
+	__be64 phyp_time_spent_to_dispatch_virtual_processors;
+	__be64 phyp_time_spent_processing_virtual_processor_timers;
+	__be64 phyp_time_spent_managing_partitions_over_entitlement;
+	__be64 time_spent_on_system_managment;
+} __packed;
+
+/* LAB */
+
+struct cv_set_mmcrh {
+	/* Only HPMC bits (40:46, 48:54) used, all others ignored
+	 * -1 = default (0x00000000_003C1200)
+	 */
+	__be64 mmcrh_value_to_set;
+};
+
+struct cv_get_hpmcx {
+	__be32 hw_processor_id;
+	__u8 reserved1[0x4];
+	__be64 mmcrh_current;
+	__be64 time_since_mmcrh_was_set;
+	__be64 hpmc1_since_current_mmcrh;
+	__be64 hpmc2_since_current_mmcrh;
+	__be64 hpmc3_since_current_mmcrh;
+	__be64 hpmc3_current;
+	__be64 hpmc4_since_current_mmcrh;
+	__be64 hpmc4_current;
+};
+
+union h_gpci_cvs {
+	/* GA */
+	struct cv_dispatch_timebase_by_processor dispatch_timebase_by_processor;
+	struct cv_timebase_by_partition timebase_by_partition;
+	struct cv_cycles_per_partition cycles_per_partition;
+	struct cv_system_performance_capabilities system_performance_capabilities;
+	struct cv_processor_bus_utilization_abc processor_bus_utilization_abc;
+	struct cv_processor_bus_utilization_wxyz processor_bus_utilization_wxyz;
+
+	/* EXPANDED */
+	struct cv_gx_cycles gx_cycles;
+	struct cv_gx_cycles_io gx_cycles_io;
+	struct cv_processor_bus_utilization_gx processor_bus_utilization_gx;
+	struct cv_mc_counts mc_counts;
+	struct cv_processor_bus_utilization_mc processor_bus_utilization_mc;
+	struct cv_processor_config processor_config;
+	struct cv_processor_frequency processor_frequency;
+	struct cv_processor_core_utilization processor_core_utilization;
+	struct cv_processor_core_power_mode processor_core_power_mode;
+	struct cv_affinity_domain_information_by_virutal_processor affinity_domain_information_by_virutal_processor;
+	struct cv_affinity_domain_info_by_domain affinity_domain_info_by_domain;
+	struct cv_affinity_domain_info_by_partition affinity_domain_info_by_partition;
+	struct cv_affinity_domain_elem affinity_domain_elem;
+	struct cv_physical_memory_info physical_memory_info;
+	struct cv_processor_bus_topology processor_bus_topology;
+	struct cv_partition_hypervisor_queuing_times partition_hypervisor_queuing_times;
+	struct cv_system_hypervisor_times system_hypervisor_times;
+
+	/* LAB */
+	struct cv_set_mmcrh set_mmcrh;
+	struct cv_get_hpmcx get_hpmcx;
+};
+
+#endif
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 5/8] powerpc: add 24x7 interface header
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

24x7 (also called hv_24x7 or H_24X7) is an interface to obtain
performance counters from the hypervisor. These counters do not have a
fixed format/possition and are instead documented in a "24x7 Catalog",
which is provided by the hypervisor (that interface is also documented
in this header).

This method of obtaining performance counters from the hypervisor is
intended to paritialy replace the gpci interface.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/hv_24x7.h | 239 +++++++++++++++++++++++++++++++++++++
 1 file changed, 239 insertions(+)
 create mode 100644 arch/powerpc/include/asm/hv_24x7.h

diff --git a/arch/powerpc/include/asm/hv_24x7.h b/arch/powerpc/include/asm/hv_24x7.h
new file mode 100644
index 0000000..f77b3cc
--- /dev/null
+++ b/arch/powerpc/include/asm/hv_24x7.h
@@ -0,0 +1,239 @@
+#ifndef ARCH_POWERPC_24X7_H_
+#define ARCH_POWERPC_24X7_H_
+
+#include <linux/types.h>
+
+struct hv_24x7_request {
+	/* PHYSICAL domains require enabling via phyp/hmc. */
+#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01
+#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02
+#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE   0x03
+#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP   0x04
+#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE   0x05
+#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06
+	__u8 performance_domain;
+	__u8 reserved[0x1];
+
+	/* bytes to read starting at @data_offset. must be a multiple of 8 */
+	__be16 data_size;
+
+	/*
+	 * byte offset within the perf domain to read from. must be 8 byte
+	 * aligned
+	 */
+	__be32 data_offset;
+
+	/*
+	 * only valid for VIRTUAL_PROCESSOR domains, ignored for others.
+	 * -1 means "current partition only"
+	 *  Enabling via phyp/hmc required for non-"-1" values. 0 forbidden
+	 *  unless requestor is 0.
+	 */
+	__be16 starting_lpar_ix;
+
+	/*
+	 * Ignored when @starting_lpar_ix == -1
+	 * Ignored when @performance_domain is not VIRTUAL_PROCESSOR_*
+	 * -1 means "infinite" or all
+	 */
+	__be16 max_num_lpars;
+
+	/* chip, core, or virtual processor based on @performance_domain */
+	__be16 starting_ix;
+	__be16 max_ix;
+} __packed;
+
+struct hv_24x7_request_buffer {
+	/* 0 - ? */
+	/* 1 - ? */
+#define HV_24X7_IF_VERSION_CURRENT 0x01
+	__u8 interface_version;
+	__u8 num_requests;
+	__u8 reserved[0xE];
+	struct hv_24x7_request requests[];
+} __packed;
+
+struct hv_24x7_result_element {
+	__be16 lpar_ix;
+
+	/*
+	 * represents the core, chip, or virtual processor based on the
+	 * request's @performance_domain
+	 */
+	__be16 domain_ix;
+
+	/* -1 if @performance_domain does not refer to a virtual processor */
+	__be32 lpar_cfg_instance_id;
+
+	/* size = @result_element_data_size of cointaining result. */
+	__u8 element_data[];
+} __packed;
+
+struct hv_24x7_result {
+	__u8 result_ix;
+
+	/*
+	 * 0 = not all result elements fit into the buffer, additional requests
+	 *     required
+	 * 1 = all result elements were returned
+	 */
+	__u8 results_complete;
+	__be16 num_elements_returned;
+
+	/* This is a copy of @data_size from the coresponding hv_24x7_request */
+	__be16 result_element_data_size;
+	__u8 reserved[0x2];
+
+	/* WARNING: only valid for first result element due to variable sizes
+	 *          of result elements */
+	/* struct hv_24x7_result_element[@num_elements_returned] */
+	struct hv_24x7_result_element elements[];
+} __packed;
+
+struct hv_24x7_data_result_buffer {
+	/* See versioning for request buffer */
+	__u8 interface_version;
+
+	__u8 num_results;
+	__u8 reserved[0x1];
+	__u8 failing_request_ix;
+	__be32 detailed_rc;
+	__be64 cec_cfg_instance_id;
+	__be64 catalog_version_num;
+	__u8 reserved2[0x8];
+	/* WARNING: only valid for the first result due to variable sizes of
+	 *	    results */
+	struct hv_24x7_result results[]; /* [@num_results] */
+} __packed;
+
+/* From document "24x7 Event and Group Catalog Formats Proposal" v0.14 */
+struct hv_24x7_catalog_page_0 {
+#define HV_24X7_CATALOG_MAGIC 0x32347837 /* "24x7" in ASCII */
+	__be32 magic;
+	__be32 length; /* In 4096 byte pages */
+	__u8 reserved1[4];
+	__be32 version;
+	__u8 build_time_stamp[16]; /* "YYYYMMDDHHMMSS\0\0" */
+	__u8 reserved2[32];
+	__be16 schema_data_offs; /* in 4096 byte pages */
+	__be16 schema_data_len;  /* in 4096 byte pages */
+	__be16 schema_entry_count;
+	__u8 reserved3[2];
+	__be16 group_data_offs; /* in 4096 byte pages */
+	__be16 group_data_len;  /* in 4096 byte pages */
+	__be16 group_entry_count;
+	__u8 reserved4[2];
+	__be16 formula_data_offs; /* in 4096 byte pages */
+	__be16 formula_data_len;  /* in 4096 byte pages */
+	__be16 formula_entry_count;
+	__u8 reserved5[2];
+} __packed;
+
+struct hv_24x7_event_data {
+	__be16 length; /* in bytes, must be a multiple of 16 */
+	__u8 reserved1[2];
+	__u8 domain; /* Chip = 1, Core = 2 */
+	__u8 reserved2[1];
+	__be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
+	__be16 event_group_record_len; /* in bytes */
+
+	/* in bytes, offset from event_group_record */
+	__be16 event_counter_offs;
+
+	/* verified_state, unverified_state, caveat_state, broken_state, ... */
+	__be32 flags;
+
+	__be16 primary_group_ix;
+	__be16 group_count;
+	__be16 event_name_len;
+	__u8 remainder[];
+	/* __u8 event_name[event_name_len - 2]; */
+	/* __be16 event_description_len; */
+	/* __u8 event_desc[event_description_len - 2]; */
+	/* __be16 detailed_desc_len; */
+	/* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
+struct hv_24x7_group_data {
+	__be16 length; /* in bytes, must be multiple of 16 */
+	__u8 reserved1[2];
+	__be32 flags; /* undefined contents */
+	__u8 domain; /* Chip = 1, Core = 2 */
+	__u8 reserved2[1];
+	__be16 event_group_record_offs;
+	__be16 event_group_record_len;
+	__u8 group_schema_ix;
+	__u8 event_count; /* 1 to 16 */
+	__be16 event_ixs;
+	__be16 group_name_len;
+	__u8 remainder[];
+	/* __u8 group_name[group_name_len]; */
+	/* __be16 group_desc_len; */
+	/* __u8 group_desc[group_desc_len]; */
+} __packed;
+
+/* TODO: Schema Data */
+/* TODO: Event Counter Group Record (see the PORE/SLW workbook) */
+
+/* "Get Event Counter Group Record Schema hypervisor interface" */
+
+enum hv_24x7_grs_field_enums {
+	/* GRS_COUNTER_1 = 1
+	 * GRS_COUNTER_2 = 2
+	 * ...
+	 * GRS_COUNTER_31 = 32 // FIXME: Doc issue.
+	 */
+	GRS_COUNTER_BASE = 1,
+	GRS_COUNTER_LAST = 32,
+	GRS_TIMEBASE_UPDATE = 48,
+	GRS_TIMEBASE_FENCE = 49,
+	GRS_UPDATE_COUNT = 50,
+	GRS_MEASUREMENT_PERIOD = 51,
+	GRS_ACCUMULATED_MEASUREMENT_PERIOD = 52,
+	GRS_LAST_UPDATE_PERIOD = 53,
+	GRS_STATUS_FLAGS = 54,
+};
+
+enum hv_24x7_grs_enums {
+	GRS_CORE_SCHEMA_INDEX = 0,
+};
+
+struct hv_24x7_grs_field {
+	__be16 field_enum;
+	__be16 offs; /* in bytes, within Event Counter group record */
+	__be16 length; /* in bytes */
+	__be16 flags; /* presently unused */
+} __packed;
+
+struct hv_24x7_grs {
+	__be16 length;
+	__u8 reserved1[2];
+	__be16 descriptor;
+	__be16 version_id;
+	__u8 reserved2[6];
+	__be16 field_entry_count;
+	__u8 field_entrys[];
+} __packed;
+
+struct hv_24x7_formula_data {
+	__be32 length; /* in bytes, must be multiple of 16 */
+	__u8 reserved1[2];
+	__be32 flags; /* not yet defined */
+	__be16 group;
+	__u8 reserved2[6];
+	__be16 name_len;
+	__u8 remainder[];
+	/* __u8 name[name_len]; */
+	/* __be16 desc_len; */
+	/* __u8 desc[name_len]; */
+	/* __be16 formula_len */
+	/* __u8 formula[formula_len]; */
+} __packed;
+
+/* Formula Syntax: ie, impliment a forth interpereter. */
+/* need fast lookup of the formula names, event names, "delta-timebase",
+ * "delta-cycles", "delta-instructions", "delta-seconds" */
+/* operators: '+', '-', '*', '/', 'mod', 'rem', 'sqr', 'x^y' (XXX: pow? xor?),
+ *            'rot', 'dup' */
+
+#endif
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 6/8] powerpc/perf: add support for the hv gpci (get performance counter info) interface
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

This provides a basic link between perf and hv_gpci. Notably, it does
not yet support transactions and does not list any events (they can
still be manually composed).

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 arch/powerpc/perf/hv-gpci.c | 235 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 235 insertions(+)
 create mode 100644 arch/powerpc/perf/hv-gpci.c

diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
new file mode 100644
index 0000000..31d9d59
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -0,0 +1,235 @@
+/*
+ * Hypervisor supplied "gpci" ("get performance counter info") performance
+ * counter support
+ *
+ * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
+ * Copyright 2014 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define pr_fmt(fmt) "hv-gpci: " fmt
+
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/hv_gpci.h>
+#include <asm/io.h>
+
+/* See arch/powerpc/include/asm/hv_gpci.h for details on the hcall interface */
+
+PMU_RANGE_ATTR(request, config, 0, 31); /* u32 */
+PMU_RANGE_ATTR(starting_index, config, 32, 63); /* u32 */
+PMU_RANGE_ATTR(secondary_index, config1, 0, 15); /* u16 */
+PMU_RANGE_ATTR(counter_info_version, config1, 16, 23); /* u8 */
+PMU_RANGE_ATTR(length, config1, 24, 31); /* u8, bytes of data (1-8) */
+PMU_RANGE_ATTR(offset, config1, 32, 63); /* u32, byte offset */
+
+static struct attribute *format_attr[] = {
+	&format_attr_request.attr,
+	&format_attr_starting_index.attr,
+	&format_attr_secondary_index.attr,
+	&format_attr_counter_info_version.attr,
+
+	&format_attr_offset.attr,
+	&format_attr_length.attr,
+	NULL,
+};
+
+static struct attribute_group format_group = {
+	.name = "format",
+	.attrs = format_attr,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&format_group,
+	NULL,
+};
+
+static unsigned long single_gpci_request(u32 req, u32 starting_index,
+		u16 secondary_index, u8 version_in, u32 offset, u8 length,
+		u64 *value)
+{
+	unsigned long ret;
+	size_t i;
+	u64 count;
+
+	struct {
+		struct hv_get_perf_counter_info_params params;
+		union {
+			union h_gpci_cvs data;
+			uint8_t bytes[sizeof(union h_gpci_cvs)];
+		};
+	} arg = {
+		.params = {
+			.counter_request = cpu_to_be32(req),
+			.starting_index = cpu_to_be32(starting_index),
+			.secondary_index = cpu_to_be16(secondary_index),
+			.counter_info_version_in = version_in,
+		}
+	};
+
+	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(&arg), sizeof(arg));
+	if (ret) {
+		pr_devel("hcall failed: 0x%lx\n", ret);
+		return ret;
+	}
+
+	/*
+	 * we verify offset and length are within the zeroed buffer at event
+	 * init.
+	 */
+	count = 0;
+	for (i = offset; i < offset + length; i++)
+		count |= arg.bytes[i] << (i - offset);
+
+	*value = count;
+	return ret;
+}
+
+static u64 h_gpci_get_value(struct perf_event *event)
+{
+	u64 count;
+	unsigned long ret = single_gpci_request(event_get_request(event),
+					event_get_starting_index(event),
+					event_get_secondary_index(event),
+					event_get_counter_info_version(event),
+					event_get_offset(event),
+					event_get_length(event),
+					&count);
+	if (ret)
+		return 0;
+	return count;
+}
+
+static void h_gpci_event_update(struct perf_event *event)
+{
+	s64 prev;
+	u64 now = h_gpci_get_value(event);
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
+}
+
+static void h_gpci_event_start(struct perf_event *event, int flags)
+{
+	local64_set(&event->hw.prev_count, h_gpci_get_value(event));
+	perf_swevent_start_hrtimer(event);
+}
+
+static void h_gpci_event_stop(struct perf_event *event, int flags)
+{
+	perf_swevent_cancel_hrtimer(event);
+	h_gpci_event_update(event);
+}
+
+static int h_gpci_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		h_gpci_event_start(event, flags);
+
+	return 0;
+}
+
+static void h_gpci_event_del(struct perf_event *event, int flags)
+{
+	h_gpci_event_stop(event, flags);
+}
+
+static void h_gpci_event_read(struct perf_event *event)
+{
+	h_gpci_event_update(event);
+}
+
+static int h_gpci_event_init(struct perf_event *event)
+{
+	u64 count;
+	u8 length;
+
+	/* Not our event */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* config2 is unused */
+	if (event->attr.config2)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    is_sampling_event(event)) /* no sampling */
+		return -EINVAL;
+
+	/* no branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	length = event_get_length(event);
+	if (length < 1 || length > 8)
+		return -EINVAL;
+
+	/* last byte within the buffer? */
+	if ((event_get_offset(event) + length) > sizeof(union h_gpci_cvs))
+		return -EINVAL;
+
+	/* check if the request works... */
+	if (single_gpci_request(event_get_request(event),
+				event_get_starting_index(event),
+				event_get_secondary_index(event),
+				event_get_counter_info_version(event),
+				event_get_offset(event),
+				length,
+				&count))
+		return -EINVAL;
+
+	/*
+	 * Some of the events are per-cpu, some per-core, some per-chip, some
+	 * are global, and some access data from other virtual machines on the
+	 * same physical machine. We can't map the cpu value without a lot of
+	 * work. Instead, we pick an arbitrary cpu for all events on this pmu.
+	 */
+	event->cpu = 0;
+
+	perf_swevent_init_hrtimer(event);
+	return 0;
+}
+
+struct pmu h_gpci_pmu = {
+	.task_ctx_nr = perf_invalid_context,
+
+	.name = "hv_gpci",
+	.attr_groups = attr_groups,
+	.event_init = h_gpci_event_init,
+	.add = h_gpci_event_add,
+	.del = h_gpci_event_del,
+	.start = h_gpci_event_start,
+	.stop = h_gpci_event_stop,
+	.read = h_gpci_event_read,
+
+	.event_idx = perf_swevent_event_idx,
+};
+
+static int hv_gpci_init(void)
+{
+	int r;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		pr_info("Not running under phyp, not supported\n");
+		return -ENODEV;
+	}
+
+	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+module_init(hv_gpci_init);
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 7/8] powerpc/perf: add support for the hv 24x7 interface
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

This provides a basic interface between hv_24x7 and perf. Similar to
the one provided for gpci, it lacks transaction support and does not
list any events.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 arch/powerpc/perf/hv-24x7.c | 354 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 354 insertions(+)
 create mode 100644 arch/powerpc/perf/hv-24x7.c

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
new file mode 100644
index 0000000..fb49e66
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -0,0 +1,354 @@
+/*
+ * Hypervisor supplied "24x7" performance counter support
+ *
+ * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
+ * Copyright 2014 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "hv-24x7: " fmt
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/hv_24x7.h>
+#include <asm/hv_gpci.h>
+#include <asm/io.h>
+
+/* See arch/powerpc/include/asm/24x7.h for details on the hcall interface */
+
+/* TODO: Merging events:
+ * - Think of the hcall as an interface to a 4d array of counters:
+ *   - x = domains
+ *   - y = indexes in the domain (core, chip, vcpu, node, etc)
+ *   - z = offset into the counter space
+ *   - w = lpars (guest vms, "logical partitions")
+ * - A single request is: x,y,y_last,z,z_last,w,w_last
+ *   - this means we can retrieve a rectangle of counters in y,z for a single x.
+ *
+ * - Things to consider (ignoring w):
+ *   - input  cost_per_request = 16
+ *   - output cost_per_result(ys,zs)  = 8 + 8 * ys + ys * zs
+ *   - limited number of requests per hcall (must fit into 4K bytes)
+ *     - 4k = 16 [buffer header] - 16 [request size] * request_count
+ *     - 255 requests per hcall
+ *   - sometimes it will be more efficient to read extra data and discard
+ */
+
+PMU_RANGE_ATTR(domain, config, 0, 3); /* u3 0-6, one of HV_24X7_PERF_DOMAIN */
+PMU_RANGE_ATTR(starting_index, config, 16, 31); /* u16 */
+PMU_RANGE_ATTR(offset, config, 32, 63); /* u32, see "data_offset" */
+PMU_RANGE_ATTR(lpar, config1, 0, 15); /* u16 */
+
+PMU_RANGE_RESV(reserved1, config,   4, 15);
+PMU_RANGE_RESV(reserved2, config1, 16, 63);
+PMU_RANGE_RESV(reserved3, config2,  0, 63);
+
+static struct attribute *format_attr[] = {
+	&format_attr_domain.attr,
+	&format_attr_offset.attr,
+	&format_attr_starting_index.attr,
+	&format_attr_lpar.attr,
+	NULL,
+};
+
+static struct attribute_group format_group = {
+	.name = "format",
+	.attrs = format_attr,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&format_group,
+	NULL,
+};
+
+struct hv_perf_caps {
+	u16 version;
+	u16 other_allowed:1,
+	    ga:1,
+	    expanded:1,
+	    lab:1,
+	    unused:12;
+};
+
+static unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
+{
+	unsigned long r;
+	struct p {
+		struct hv_get_perf_counter_info_params params;
+		struct cv_system_performance_capabilities caps;
+	} __packed __aligned(sizeof(uint64_t));
+
+	struct p arg = {
+		.params = {
+			.counter_request = cpu_to_be32(
+					CIR_system_performance_capabilities),
+			.starting_index = cpu_to_be32(-1),
+			.counter_info_version_in = 0,
+		}
+	};
+
+	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			       virt_to_phys(&arg), sizeof(arg));
+	caps->version = arg.params.counter_info_version_out;
+	caps->other_allowed = arg.caps.perf_collect_privlidged;
+	caps->ga = (arg.caps.capability_mask & CV_CM_GA) >> CV_CM_GA;
+	caps->expanded = (arg.caps.capability_mask & CV_CM_EXPANDED)
+				>> CV_CM_EXPANDED;
+	caps->lab = (arg.caps.capability_mask & CV_CM_LAB) >> CV_CM_LAB;
+
+	return r;
+}
+
+static bool is_physical_domain(int domain)
+{
+	return  domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP ||
+		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
+}
+
+static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
+					 u16 lpar, u64 *res)
+{
+	unsigned long ret;
+	struct reqb {
+		struct hv_24x7_request_buffer buf;
+		struct hv_24x7_request req;
+	} request_buffer = {
+		.buf = {
+			.interface_version = HV_24X7_IF_VERSION_CURRENT,
+			.num_requests = 1,
+		},
+		.req = {
+			.performance_domain = domain,
+			.data_size = cpu_to_be16(8),
+			.data_offset = cpu_to_be32(offset),
+			.starting_lpar_ix = cpu_to_be16(lpar),
+			.max_num_lpars = cpu_to_be16(1),
+			.starting_ix = cpu_to_be16(ix),
+			.max_ix = cpu_to_be16(1),
+		}
+	};
+
+	struct resb {
+		struct hv_24x7_data_result_buffer buf;
+		struct hv_24x7_result res;
+		struct hv_24x7_result_element elem;
+		__be64 result;
+	} result_buffer = {};
+
+	ret = plpar_hcall_norets(H_GET_24X7_DATA,
+			virt_to_phys(&request_buffer), sizeof(request_buffer),
+			virt_to_phys(&result_buffer),  sizeof(result_buffer));
+
+	if (ret) {
+		/*
+		 * this failure is unexpected since we check if the exact same
+		 * hcall works in event_init
+		 */
+		pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+				domain, offset, ix, lpar,
+				ret, ret,
+				result_buffer.buf.detailed_rc,
+				result_buffer.buf.failing_request_ix);
+		return ret;
+	}
+
+	*res = be64_to_cpu(result_buffer.result);
+	return ret;
+}
+
+static int h_24x7_event_init(struct perf_event *event)
+{
+	struct hv_perf_caps caps;
+	unsigned domain;
+	u64 ct;
+
+	/* Not our event */
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Unused areas must be 0 */
+	if (event_get_reserved1(event) ||
+	    event_get_reserved2(event) ||
+	    event_get_reserved3(event)) {
+		pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
+				event->attr.config,
+				event_get_reserved1(event),
+				event->attr.config1,
+				event_get_reserved2(event),
+				event->attr.config2,
+				event_get_reserved3(event));
+		return -EINVAL;
+	}
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest  ||
+	    is_sampling_event(event)) /* no sampling */
+		return -EINVAL;
+
+	/* no branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	/* offset must be 8 byte aligned */
+	if (event_get_offset(event) % 8) {
+		pr_devel("bad alignment\n");
+		return -EINVAL;
+	}
+
+	/* Domains above 6 are invalid */
+	domain = event_get_domain(event);
+	if (domain > 6) {
+		pr_devel("invalid domain\n");
+		return -EINVAL;
+	}
+
+	if (hv_perf_caps_get(&caps)) {
+		pr_devel("could not get capabilities\n");
+		return -EIO;
+	}
+
+	/* PHYSICAL domains & other lpars require extra capabilities */
+	if (!caps.other_allowed && (is_physical_domain(domain) ||
+		(event_get_lpar(event) != event_get_lpar_max()))) {
+		pr_devel("hv permisions disallow\n");
+		return -EPERM;
+	}
+
+	/* see if the event complains */
+	if (single_24x7_request(event_get_domain(event),
+				event_get_offset(event),
+				event_get_starting_index(event),
+				event_get_lpar(event),
+				&ct)) {
+		pr_devel("test hcall failed\n");
+		return -EIO;
+	}
+
+	/*
+	 * Some of the events are per-cpu, some per-core, some per-chip, some
+	 * are global, and some access data from other virtual machines on the
+	 * same physical machine. We can't map the cpu value without a lot of
+	 * work. Instead, we pick an arbitrary cpu for all events on this pmu.
+	 */
+	event->cpu = 0;
+
+	perf_swevent_init_hrtimer(event);
+	return 0;
+}
+
+static u64 h_24x7_get_value(struct perf_event *event)
+{
+	unsigned long ret;
+	u64 ct;
+	ret = single_24x7_request(event_get_domain(event),
+				  event_get_offset(event),
+				  event_get_starting_index(event),
+				  event_get_lpar(event),
+				  &ct);
+	if (ret)
+		/* We checked this in event init, shouldn't fail here... */
+		return 0;
+
+	return ct;
+}
+
+static void h_24x7_event_update(struct perf_event *event)
+{
+	s64 prev;
+	u64 now;
+	now = h_24x7_get_value(event);
+	prev = local64_xchg(&event->hw.prev_count, now);
+	local64_add(now - prev, &event->count);
+}
+
+static void h_24x7_event_start(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_RELOAD)
+		local64_set(&event->hw.prev_count, h_24x7_get_value(event));
+	perf_swevent_start_hrtimer(event);
+}
+
+static void h_24x7_event_stop(struct perf_event *event, int flags)
+{
+	perf_swevent_cancel_hrtimer(event);
+	h_24x7_event_update(event);
+}
+
+static int h_24x7_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		h_24x7_event_start(event, flags);
+
+	return 0;
+}
+
+static void h_24x7_event_del(struct perf_event *event, int flags)
+{
+	h_24x7_event_stop(event, flags);
+}
+
+static void h_24x7_event_read(struct perf_event *event)
+{
+	h_24x7_event_update(event);
+}
+
+struct pmu h_24x7_pmu = {
+	.task_ctx_nr = perf_invalid_context,
+
+	.name = "hv_24x7",
+	.attr_groups = attr_groups,
+	.event_init  = h_24x7_event_init,
+	.add         = h_24x7_event_add,
+	.del         = h_24x7_event_del,
+	.start       = h_24x7_event_start,
+	.stop        = h_24x7_event_stop,
+	.read        = h_24x7_event_read,
+
+	.event_idx = perf_swevent_event_idx,
+};
+
+static int hv_24x7_init(void)
+{
+	int r;
+	unsigned long hret;
+	struct hv_perf_caps caps;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+		pr_info("not an lpar, disabled\n");
+		return -ENODEV;
+	}
+
+	hret = hv_perf_caps_get(&caps);
+	if (hret) {
+		pr_info("could not obtain capabilities, error 0x%80lx, disabling\n",
+				hret);
+		return -ENODEV;
+	}
+
+	pr_info("gpci interface versions: hv:0x%x, kernel:0x%x\n",
+			caps.version, COUNTER_INFO_VERSION_CURRENT);
+
+	pr_info("gpci interface capabilities: other:%d ga:%d expanded:%d lab:%d\n",
+			caps.other_allowed, caps.ga,
+			caps.expanded,
+			caps.lab);
+
+	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+module_init(hv_24x7_init);
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 8/8] powerpc/perf: add kconfig option for hypervisor provided counters
From: Cody P Schafer @ 2014-01-16 23:53 UTC (permalink / raw)
  To: Linux PPC
  Cc: Peter Zijlstra, LKML, Ingo Molnar, Paul Mackerras,
	Arnaldo Carvalho de Melo, Cody P Schafer
In-Reply-To: <1389916434-2288-1-git-send-email-cody@linux.vnet.ibm.com>

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
---
 arch/powerpc/perf/Makefile             | 2 ++
 arch/powerpc/platforms/Kconfig.cputype | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 60d71ee..5e5fcd2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -11,5 +11,7 @@ obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
+obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o
+
 obj-$(CONFIG_PPC64)		+= $(obj64-y)
 obj-$(CONFIG_PPC32)		+= $(obj32-y)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index bca2465..f98ec61 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -363,6 +363,12 @@ config PPC_PERF_CTRS
        help
          This enables the powerpc-specific perf_event back-end.
 
+config HV_PERF_CTRS
+       def_bool y
+       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+       help
+         Enable access to perf counters provided by the hypervisor
+
 config SMP
 	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x
 	bool "Symmetric multi-processing support"
-- 
1.8.5.2

^ permalink raw reply related

* Re: Kernel stack overflows due to  "powerpc: Remove ksp_limit on ppc64" with v3.13-rc8 on ppc32 (P2020)
From: Kevin Hao @ 2014-01-17  2:20 UTC (permalink / raw)
  To: Guenter Roeck; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20140116180532.GA1616@roeck-us.net>

[-- Attachment #1: Type: text/plain, Size: 2620 bytes --]

On Thu, Jan 16, 2014 at 10:05:32AM -0800, Guenter Roeck wrote:
> Hi all,
> 
> I am getting kernel stack overflows with v3.13-rc8 on a system with P2020 CPU.
> The kernel is patched for the target, but I don't think that is related.
> Stack overflows are in different areas, but always in calls from __do_softirq.
> 
> Crashes happen reliably either during boot or if I put any kind of load
> onto the system.

How about the following fix:

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e47d268727a4..52fffe5616b4 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -61,7 +61,7 @@ _GLOBAL(call_do_irq)
 	mflr	r0
 	stw	r0,4(r1)
 	lwz	r10,THREAD+KSP_LIMIT(r2)
-	addi	r11,r3,THREAD_INFO_GAP
+	addi	r11,r4,THREAD_INFO_GAP
 	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
 	mr	r1,r4
 	stw	r10,8(r1)

Thanks,
Kevin
> 
> Example:
> 
> Kernel stack overflow in process eb3e5a00, r1=eb79df90
> CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
> task: eb3e5a00 ti: c0616000 task.ti: ef440000
> NIP: c003a420 LR: c003a410 CTR: c0017518
> REGS: eb79dee0 TRAP: 0901   Not tainted (3.13.0-rc8-juniper-00146-g19eca00)
> MSR: 00029000 <CE,EE,ME>  CR: 24008444  XER: 00000000
> GPR00: c003a410 eb79df90 eb3e5a00 00000000 eb05d900 00000001 65d87646 00000000
> GPR08: 00000000 020b8000 00000000 00000000 44008442
> NIP [c003a420] __do_softirq+0x94/0x1ec
> LR [c003a410] __do_softirq+0x84/0x1ec
> Call Trace:
> [eb79df90] [c003a410] __do_softirq+0x84/0x1ec (unreliable)
> [eb79dfe0] [c003a970] irq_exit+0xbc/0xc8
> [eb79dff0] [c000cc1c] call_do_irq+0x24/0x3c
> [ef441f20] [c00046a8] do_IRQ+0x8c/0xf8
> [ef441f40] [c000e7f4] ret_from_except+0x0/0x18
> --- Exception: 501 at 0xfcda524
>     LR = 0x10024900
> Instruction dump:
> 7c781b78 3b40000a 3a73b040 543c0024 3a800000 3b3913a0 7ef5bb78 48201bf9
> 5463103a 7d3b182e 7e89b92e 7c008146 <3ba00000> 7e7e9b78 48000014 57fff87f
> Kernel panic - not syncing: kernel stack overflow
> CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
> Call Trace:
> Rebooting in 180 seconds..
> 
> Reverting the following commit fixes the problem.
> 
> cbc9565ee8 "powerpc: Remove ksp_limit on ppc64"
> 
> Should I submit a patch reverting this commit, or is there a better way to fix
> the problem on short notice (given that 3.13 is close) ?
> 
> Thanks,
> Guenter
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply related

* Re: Kernel stack overflows due to  "powerpc: Remove ksp_limit on ppc64" with v3.13-rc8 on ppc32 (P2020)
From: Benjamin Herrenschmidt @ 2014-01-17  2:58 UTC (permalink / raw)
  To: Kevin Hao; +Cc: linuxppc-dev, linux-kernel, Guenter Roeck
In-Reply-To: <20140117022005.GA29880@pek-khao-d1.corp.ad.wrs.com>

On Fri, 2014-01-17 at 10:20 +0800, Kevin Hao wrote:
> On Thu, Jan 16, 2014 at 10:05:32AM -0800, Guenter Roeck wrote:
> > Hi all,
> > 
> > I am getting kernel stack overflows with v3.13-rc8 on a system with P2020 CPU.
> > The kernel is patched for the target, but I don't think that is related.
> > Stack overflows are in different areas, but always in calls from __do_softirq.
> > 
> > Crashes happen reliably either during boot or if I put any kind of load
> > onto the system.
> 
> How about the following fix:

Wow. I've been staring at that code for 15mn this morning and didn't
spot it ! Nice catch :-)

Any chance you can send a version of that patch that adds the C
prototype of the function in a comment right before the assembly ?

We should generalize that practice...

Cheers,
Ben.
 
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index e47d268727a4..52fffe5616b4 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -61,7 +61,7 @@ _GLOBAL(call_do_irq)
>  	mflr	r0
>  	stw	r0,4(r1)
>  	lwz	r10,THREAD+KSP_LIMIT(r2)
> -	addi	r11,r3,THREAD_INFO_GAP
> +	addi	r11,r4,THREAD_INFO_GAP
>  	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
>  	mr	r1,r4
>  	stw	r10,8(r1)
> 
> Thanks,
> Kevin
> > 
> > Example:
> > 
> > Kernel stack overflow in process eb3e5a00, r1=eb79df90
> > CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
> > task: eb3e5a00 ti: c0616000 task.ti: ef440000
> > NIP: c003a420 LR: c003a410 CTR: c0017518
> > REGS: eb79dee0 TRAP: 0901   Not tainted (3.13.0-rc8-juniper-00146-g19eca00)
> > MSR: 00029000 <CE,EE,ME>  CR: 24008444  XER: 00000000
> > GPR00: c003a410 eb79df90 eb3e5a00 00000000 eb05d900 00000001 65d87646 00000000
> > GPR08: 00000000 020b8000 00000000 00000000 44008442
> > NIP [c003a420] __do_softirq+0x94/0x1ec
> > LR [c003a410] __do_softirq+0x84/0x1ec
> > Call Trace:
> > [eb79df90] [c003a410] __do_softirq+0x84/0x1ec (unreliable)
> > [eb79dfe0] [c003a970] irq_exit+0xbc/0xc8
> > [eb79dff0] [c000cc1c] call_do_irq+0x24/0x3c
> > [ef441f20] [c00046a8] do_IRQ+0x8c/0xf8
> > [ef441f40] [c000e7f4] ret_from_except+0x0/0x18
> > --- Exception: 501 at 0xfcda524
> >     LR = 0x10024900
> > Instruction dump:
> > 7c781b78 3b40000a 3a73b040 543c0024 3a800000 3b3913a0 7ef5bb78 48201bf9
> > 5463103a 7d3b182e 7e89b92e 7c008146 <3ba00000> 7e7e9b78 48000014 57fff87f
> > Kernel panic - not syncing: kernel stack overflow
> > CPU: 0 PID: 2838 Comm: ssh Not tainted 3.13.0-rc8-juniper-00146-g19eca00 #4
> > Call Trace:
> > Rebooting in 180 seconds..
> > 
> > Reverting the following commit fixes the problem.
> > 
> > cbc9565ee8 "powerpc: Remove ksp_limit on ppc64"
> > 
> > Should I submit a patch reverting this commit, or is there a better way to fix
> > the problem on short notice (given that 3.13 is close) ?
> > 
> > Thanks,
> > Guenter
> > _______________________________________________
> > Linuxppc-dev mailing list
> > Linuxppc-dev@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: Kernel stack overflows due to  "powerpc: Remove ksp_limit on ppc64" with v3.13-rc8 on ppc32 (P2020)
From: Guenter Roeck @ 2014-01-17  3:15 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Kevin Hao; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1389927490.7406.10.camel@pasglop>

On 01/16/2014 06:58 PM, Benjamin Herrenschmidt wrote:
> On Fri, 2014-01-17 at 10:20 +0800, Kevin Hao wrote:
>> On Thu, Jan 16, 2014 at 10:05:32AM -0800, Guenter Roeck wrote:
>>> Hi all,
>>>
>>> I am getting kernel stack overflows with v3.13-rc8 on a system with P2020 CPU.
>>> The kernel is patched for the target, but I don't think that is related.
>>> Stack overflows are in different areas, but always in calls from __do_softirq.
>>>
>>> Crashes happen reliably either during boot or if I put any kind of load
>>> onto the system.
>>
>> How about the following fix:
>
> Wow. I've been staring at that code for 15mn this morning and didn't
> spot it ! Nice catch :-)
>
Yes, great catch! That fixes the problem.

Tested-by: Guenter Roeck <linux@roeck-us.net>

I assume you or Kevin will take it from there ?

Thanks,
Guenter

^ permalink raw reply

* Re: Kernel stack overflows due to  "powerpc: Remove ksp_limit on ppc64" with v3.13-rc8 on ppc32 (P2020)
From: Kevin Hao @ 2014-01-17  3:23 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-kernel, Guenter Roeck
In-Reply-To: <1389927490.7406.10.camel@pasglop>

[-- Attachment #1: Type: text/plain, Size: 923 bytes --]

On Fri, Jan 17, 2014 at 01:58:10PM +1100, Benjamin Herrenschmidt wrote:
> On Fri, 2014-01-17 at 10:20 +0800, Kevin Hao wrote:
> > On Thu, Jan 16, 2014 at 10:05:32AM -0800, Guenter Roeck wrote:
> > > Hi all,
> > > 
> > > I am getting kernel stack overflows with v3.13-rc8 on a system with P2020 CPU.
> > > The kernel is patched for the target, but I don't think that is related.
> > > Stack overflows are in different areas, but always in calls from __do_softirq.
> > > 
> > > Crashes happen reliably either during boot or if I put any kind of load
> > > onto the system.
> > 
> > How about the following fix:
> 
> Wow. I've been staring at that code for 15mn this morning and didn't
> spot it ! Nice catch :-)
> 
> Any chance you can send a version of that patch that adds the C
> prototype of the function in a comment right before the assembly ?

Will do. The patch is coming soon.

Thanks,
Kevin

[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox