LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v5 4/6] fsl-dma: move the function ahead of its invoke function
From: qiang.liu @ 2012-08-01  8:49 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev, linux-kernel, dan.j.williams
  Cc: Vinod Koul, Qiang Liu, herbert, Dan Williams, davem

From: Qiang Liu <qiang.liu@freescale.com>

Move the function fsldma_cleanup_descriptor() and fsl_chan_xfer_ld_queue()
ahead of its invoke function for avoiding redundant definition.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
---
 drivers/dma/fsldma.c |  252 +++++++++++++++++++++++++-------------------------
 1 files changed, 124 insertions(+), 128 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 87f52c0..bb883c0 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -400,9 +400,6 @@ out_splice:
 	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
 }

-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan);
-static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan);
-
 /**
  * fsldma_clean_completed_descriptor - free all descriptors which
  * has been completed and acked
@@ -519,6 +516,130 @@ fsldma_clean_running_descriptor(struct fsldma_chan *chan,
 	return 0;
 }

+/**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = DMA_IN(chan, &chan->regs->mr, 32);
+		mode &= ~FSL_DMA_MR_CS;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+/**
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+	dma_cookie_t cookie = 0;
+	dma_addr_t curr_phys = get_cdar(chan);
+	int idle = dma_is_idle(chan);
+	int seen_current = 0;
+
+	fsldma_clean_completed_descriptor(chan);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
+		/*
+		 * do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/*
+		 * stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (desc->async_tx.phys == curr_phys) {
+			seen_current = 1;
+			if (!idle)
+				break;
+		}
+
+		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
+
+		if (fsldma_clean_running_descriptor(chan, desc))
+			break;
+	}
+
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
+
+	if (cookie > 0)
+		chan->common.completed_cookie = cookie;
+}
+
 static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
@@ -932,131 +1053,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 }

 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
- * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
- *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
- */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc, *_desc;
-	dma_cookie_t cookie = 0;
-	dma_addr_t curr_phys = get_cdar(chan);
-	int idle = dma_is_idle(chan);
-	int seen_current = 0;
-
-	fsldma_clean_completed_descriptor(chan);
-
-	/* Run the callback for each descriptor, in order */
-	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
-		/*
-		 * do not advance past the current descriptor loaded into the
-		 * hardware channel, subsequent descriptors are either in
-		 * process or have not been submitted
-		 */
-		if (seen_current)
-			break;
-
-		/*
-		 * stop the search if we reach the current descriptor and the
-		 * channel is busy
-		 */
-		if (desc->async_tx.phys == curr_phys) {
-			seen_current = 1;
-			if (!idle)
-				break;
-		}
-
-		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
-
-		if (fsldma_clean_running_descriptor(chan, desc))
-			break;
-
-	}
-
-	/*
-	 * Start any pending transactions automatically
-	 *
-	 * In the ideal case, we keep the DMA controller busy while we go
-	 * ahead and free the descriptors below.
-	 */
-	fsl_chan_xfer_ld_queue(chan);
-
-	if (cookie > 0)
-		chan->common.completed_cookie = cookie;
-}
-
-/**
- * fsl_chan_xfer_ld_queue - transfer any pending transactions
- * @chan : Freescale DMA channel
- *
- * HARDWARE STATE: idle
- * LOCKING: must hold chan->desc_lock
- */
-static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc;
-
-	/*
-	 * If the list of pending descriptors is empty, then we
-	 * don't need to do any work at all
-	 */
-	if (list_empty(&chan->ld_pending)) {
-		chan_dbg(chan, "no pending LDs\n");
-		return;
-	}
-
-	/*
-	 * The DMA controller is not idle, which means that the interrupt
-	 * handler will start any queued transactions when it runs after
-	 * this transaction finishes
-	 */
-	if (!chan->idle) {
-		chan_dbg(chan, "DMA controller still busy\n");
-		return;
-	}
-
-	/*
-	 * If there are some link descriptors which have not been
-	 * transferred, we need to start the controller
-	 */
-
-	/*
-	 * Move all elements from the queue of pending transactions
-	 * onto the list of running transactions
-	 */
-	chan_dbg(chan, "idle, starting controller\n");
-	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
-	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
-
-	/*
-	 * The 85xx DMA controller doesn't clear the channel start bit
-	 * automatically at the end of a transfer. Therefore we must clear
-	 * it in software before starting the transfer.
-	 */
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		u32 mode;
-
-		mode = DMA_IN(chan, &chan->regs->mr, 32);
-		mode &= ~FSL_DMA_MR_CS;
-		DMA_OUT(chan, &chan->regs->mr, mode, 32);
-	}
-
-	/*
-	 * Program the descriptor's address into the DMA controller,
-	 * then start the DMA transaction
-	 */
-	set_cdar(chan, desc->async_tx.phys);
-	get_cdar(chan);
-
-	dma_start(chan);
-	chan->idle = false;
-}
-
-/**
  * fsl_dma_memcpy_issue_pending - Issue the DMA start command
  * @chan : Freescale DMA channel
  */
--
1.7.5.1

^ permalink raw reply related

* [PATCH v5 5/6] fsl-dma: use spin_lock_bh to instead of spin_lock_irqsave
From: qiang.liu @ 2012-08-01  8:50 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev, linux-kernel, dan.j.williams
  Cc: Vinod Koul, Timur Tabi, Qiang Liu, herbert, Dan Williams, davem

From: Qiang Liu <qiang.liu@freescale.com>

- use spin_lock_bh() is the right way to use async_tx api,
dma_run_dependencies() should not be protected by spin_lock_irqsave();
- use spin_lock_bh to instead of spin_lock_irqsave for improving performance,
There is not any place to access descriptor queues in fsl-dma ISR except its
tasklet, spin_lock_bh() is more proper here. Interrupts will be turned off and
context will be save in irqsave, there is needless to use irqsave..

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Timur Tabi <timur@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
---
 drivers/dma/fsldma.c |   30 ++++++++++++------------------
 1 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index bb883c0..e3814aa 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -645,10 +645,9 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
 	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
 	struct fsl_desc_sw *child;
-	unsigned long flags;
 	dma_cookie_t cookie;

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);

 	/*
 	 * assign cookies to all of the software descriptors
@@ -661,7 +660,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	/* put this transaction onto the tail of the pending queue */
 	append_ld_queue(chan, desc);

-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	return cookie;
 }
@@ -770,15 +769,14 @@ static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
 static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	unsigned long flags;

 	chan_dbg(chan, "free all channel resources\n");
-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsldma_cleanup_descriptor(chan);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
 	fsldma_free_desc_list(chan, &chan->ld_completed);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	dma_pool_destroy(chan->desc_pool);
 	chan->desc_pool = NULL;
@@ -997,7 +995,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 {
 	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
-	unsigned long flags;
 	int size;

 	if (!dchan)
@@ -1007,7 +1004,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,

 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_bh(&chan->desc_lock);

 		/* Halt the DMA engine */
 		dma_halt(chan);
@@ -1017,7 +1014,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 		fsldma_free_desc_list(chan, &chan->ld_running);
 		chan->idle = true;

-		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_bh(&chan->desc_lock);
 		return 0;

 	case DMA_SLAVE_CONFIG:
@@ -1059,11 +1056,10 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	unsigned long flags;

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsl_chan_xfer_ld_queue(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);
 }

 /**
@@ -1076,15 +1072,14 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	enum dma_status ret;
-	unsigned long flags;

 	ret = dma_cookie_status(dchan, cookie, txstate);
 	if (ret == DMA_SUCCESS)
 		return ret;

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsldma_cleanup_descriptor(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	return dma_cookie_status(dchan, cookie, txstate);
 }
@@ -1163,11 +1158,10 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
-	unsigned long flags;

 	chan_dbg(chan, "tasklet entry\n");

-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);

 	/* the hardware is now idle and ready for more */
 	chan->idle = true;
@@ -1175,7 +1169,7 @@ static void dma_do_tasklet(unsigned long data)
 	/* Run all cleanup for this descriptor */
 	fsldma_cleanup_descriptor(chan);

-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);

 	chan_dbg(chan, "tasklet exit\n");
 }
--
1.7.5.1

^ permalink raw reply related

* [PATCH v5 6/6] fsl-dma: fix a warning of unitialized cookie
From: qiang.liu @ 2012-08-01  8:50 UTC (permalink / raw)
  To: linux-crypto, linuxppc-dev, linux-kernel, dan.j.williams
  Cc: Vinod Koul, Qiang Liu, herbert, Dan Williams, davem

From: Qiang Liu <qiang.liu@freescale.com>

Fix a warning of unitialized value when compile with -Wuninitialized.

Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Li Yang <leoli@freescale.com>
Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
Reported-by: Kim Phillips <kim.phillips@freescale.com>
---
 drivers/dma/fsldma.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e3814aa..6fc22eb 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -645,7 +645,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
 	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
 	struct fsl_desc_sw *child;
-	dma_cookie_t cookie;
+	dma_cookie_t cookie = 0;

 	spin_lock_bh(&chan->desc_lock);

--
1.7.5.1

^ permalink raw reply related

* Re: [PATCH 1/8] ppc/pnv: create bus sensitive PEs
From: Gavin Shan @ 2012-08-01  9:18 UTC (permalink / raw)
  To: Richard Yang; +Cc: linuxppc-dev, Gavin Shan
In-Reply-To: <20120801090446.GA3895@richard.(null)>

On Wed, Aug 01, 2012 at 05:04:46AM -0400, Richard Yang wrote:
>On Wed, Aug 01, 2012 at 04:26:54PM +0800, Gavin Shan wrote:
>>On Wed, Aug 01, 2012 at 03:49:41AM -0400, Richard Yang wrote:
>>>On Mon, Jun 25, 2012 at 11:43:14PM +0800, Gavin Shan wrote:
>>>>Basically, there're 2 types of PCI bus sensitive PEs: (A) The PE
>>>>includes single PCI bus. (B) The PE includes the PCI bus and all
>>>>the subordinate PCI buses. At present, we'd like to put PCI bus
>>>>originated by PCI-e link to form PE that contains single PCI bus,
>>>>and the PCIe-to-PCI bridge will form the 2nd type of PE. We don't
>>>>figure out to detect PLX bridge yet. Once we can detect PLX bridge
>>>>some day, we have to put PCI buses originated from the downstream
>>>>port of PLX bridge to the 2nd type of PE.
>>>>
>>>>The patch changes the original implementation for a little bit
>>>>to support 2 types of PCI bus sensitive PEs described as above.
>>>>Also, the function used to retrieve the corresponding PE according
>>>>to the given PCI device has been changed based on that because each
>>>>PCI device should trace the directly associated PE.
>>>>
>>>>Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
>>>>Reviewed-by: Ram Pai <linuxram@us.ibm.com>
>>>>Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
>>>>---
>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   97 +++++++++++++++++------------
>>>> arch/powerpc/platforms/powernv/pci.h      |   10 +--
>>>> 2 files changed, 63 insertions(+), 44 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>index fbdd74d..1504795 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>@@ -548,7 +548,7 @@ static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
>>>>  * but in the meantime, we need to protect them to avoid warnings
>>>>  */
>>>> #ifdef CONFIG_PCI_MSI
>>>>-static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
>>>>+static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
>>>> {
>>>> 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
>>>> 	struct pnv_phb *phb = hose->private_data;
>>>>@@ -560,19 +560,6 @@ static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev)
>>>> 		return NULL;
>>>> 	return &phb->ioda.pe_array[pdn->pe_number];
>>>> }
>>>>-
>>>>-static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev)
>>>>-{
>>>>-	struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev);
>>>>-
>>>>-	while (!pe && dev->bus->self) {
>>>>-		dev = dev->bus->self;
>>>>-		pe = __pnv_ioda_get_one_pe(dev);
>>>>-		if (pe)
>>>>-			pe = pe->bus_pe;
>>>>-	}
>>>>-	return pe;
>>>>-}
>>>> #endif /* CONFIG_PCI_MSI */
>>>>
>>>> static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
>>>>@@ -589,7 +576,11 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb,
>>>> 		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
>>>> 		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
>>>> 		parent = pe->pbus->self;
>>>>-		count = pe->pbus->subordinate - pe->pbus->secondary + 1;
>>>>+		if (pe->flags & PNV_IODA_PE_BUS_ALL)
>>>>+			count = pe->pbus->subordinate - pe->pbus->secondary + 1;
>>>>+		else
>>>>+			count = 1;
>>>>+
>>>> 		switch(count) {
>>>> 		case  1: bcomp = OpalPciBusAll;		break;
>>>> 		case  2: bcomp = OpalPciBus7Bits;	break;
>>>>@@ -699,6 +690,7 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>>>> 	return 10;
>>>> }
>>>>
>>>>+#if 0
>>>> static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>>>> {
>>>> 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
>>>>@@ -767,6 +759,7 @@ static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>>>>
>>>> 	return pe;
>>>> }
>>>>+#endif /* Useful for SRIOV case */
>>>>
>>>> static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
>>>> {
>>>>@@ -784,43 +777,47 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
>>>> 		pdn->pcidev = dev;
>>>> 		pdn->pe_number = pe->pe_number;
>>>> 		pe->dma_weight += pnv_ioda_dma_weight(dev);
>>>>-		if (dev->subordinate)
>>>>+		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
>>>> 			pnv_ioda_setup_same_PE(dev->subordinate, pe);
>>>> 	}
>>>> }
>>>>
>>>>-static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
>>>>-					    struct pnv_ioda_pe *ppe)
>>>>+/*
>>>>+ * There're 2 types of PCI bus sensitive PEs: One that is compromised of
>>>>+ * single PCI bus. Another one that contains the primary PCI bus and its
>>>>+ * subordinate PCI devices and buses. The second type of PE is normally
>>>>+ * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
>>>>+ */
>>>>+static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all)
>>>> {
>>>>-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
>>>>+	struct pci_controller *hose = pci_bus_to_host(bus);
>>>> 	struct pnv_phb *phb = hose->private_data;
>>>>-	struct pci_bus *bus = dev->subordinate;
>>>> 	struct pnv_ioda_pe *pe;
>>>> 	int pe_num;
>>>>
>>>>-	if (!bus) {
>>>>-		pr_warning("%s: Bridge without a subordinate bus !\n",
>>>>-			   pci_name(dev));
>>>>-		return;
>>>>-	}
>>>> 	pe_num = pnv_ioda_alloc_pe(phb);
>>>> 	if (pe_num == IODA_INVALID_PE) {
>>>>-		pr_warning("%s: Not enough PE# available, disabling bus\n",
>>>>-			   pci_name(dev));
>>>>+		pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n",
>>>>+			__func__, pci_domain_nr(bus), bus->number);
>>>> 		return;
>>>> 	}
>>>>
>>>> 	pe = &phb->ioda.pe_array[pe_num];
>>>>-	ppe->bus_pe = pe;
>>>>+	pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
>>>> 	pe->pbus = bus;
>>>>+	pe->pe_number = pe_num;
>>>
>>>Gavin, 
>>>
>>>Sorry for the late reply. I am not sure I a replying on the latest code. If
>>>not, please point me out. 
>>>
>>>I think we don't need to add this line. the pe->pe_number is already set in
>>>pnv_ioda_alloc_pe().
>>>
>>
>>Thanks, Richard. I think we probablly need remove the following line in pnv_ioda_alloc_pe()
>>instead of the line you pointed because pnv_ioda_alloc_pe() might return invalid
>>PE number (-1). That will eventually cause data corruption while using "-1" to
>>referring phb->ioda.pe_array[], even the situation shouldn't happen for now :-)
>>
>>	phb->ioda.pe_array[pe].pe_number = pe;
>
>oh, so it is not proper to set pe_number = -1 in the pe_array, right?
>

It seems that I missed something. Anyway, moving the line from pnv_ioda_alloc_pe
or that one you pointed is ok. I will remove the line you pointed in next version.
Thanks a lot, Richard.

"-1" means invalid PE number. In previous reply, I tried to say that following code
will cause data corruption, which will never happen after looking into the code
again :-)

	phb->ioda.pe_array[-1].pe_number = -1;

Thanks,
Gavin

>>
>>Let me change it accordingly in next version. The series of patches is pending
>>for the patches against PCI core change. The later one is waiting for Bjorn's
>>confirm.
>>
>>Thanks,
>>Gavin
>>
>>>> 	pe->pdev = NULL;
>>>> 	pe->tce32_seg = -1;
>>>> 	pe->mve_number = -1;
>>>> 	pe->rid = bus->secondary << 8;
>>>> 	pe->dma_weight = 0;
>>>>
>>>>-	pe_info(pe, "Secondary busses %d..%d associated with PE\n",
>>>>-		bus->secondary, bus->subordinate);
>>>>+	if (all)
>>>>+		pe_info(pe, "Secondary busses %d..%d associated with PE#%d\n",
>>>>+			bus->secondary, bus->subordinate, pe_num);
>>>>+	else
>>>>+		pe_info(pe, "Secondary busses %d associated with PE#%d\n",
>>>>+			bus->secondary, pe_num);
>>>>
>>>> 	if (pnv_ioda_configure_pe(phb, pe)) {
>>>> 		/* XXX What do we do here ? */
>>>>@@ -848,17 +845,33 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev,
>>>> static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus)
>>>> {
>>>> 	struct pci_dev *dev;
>>>>-	struct pnv_ioda_pe *pe;
>>>>+
>>>>+	pnv_ioda_setup_bus_PE(bus, 0);
>>>>
>>>> 	list_for_each_entry(dev, &bus->devices, bus_list) {
>>>>-		pe = pnv_ioda_setup_dev_PE(dev);
>>>>-		if (pe == NULL)
>>>>-			continue;
>>>>-		/* Leaving the PCIe domain ... single PE# */
>>>>-		if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
>>>>-			pnv_ioda_setup_bus_PE(dev, pe);
>>>>-		else if (dev->subordinate)
>>>>-			pnv_ioda_setup_PEs(dev->subordinate);
>>>>+		if (dev->subordinate) {
>>>>+			if (dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
>>>>+				pnv_ioda_setup_bus_PE(dev->subordinate, 1);
>>>>+			else
>>>>+				pnv_ioda_setup_PEs(dev->subordinate);
>>>>+		}
>>>>+	}
>>>>+}
>>>>+
>>>>+/*
>>>>+ * Configure PEs so that the downstream PCI buses and devices
>>>>+ * could have their associated PE#. Unfortunately, we didn't
>>>>+ * figure out the way to identify the PLX bridge yet. So we
>>>>+ * simply put the PCI bus and the subordinate behind the root
>>>>+ * port to PE# here. The game rule here is expected to be changed
>>>>+ * as soon as we can detected PLX bridge correctly.
>>>>+ */
>>>>+static void __devinit pnv_pci_ioda_setup_PEs(void)
>>>>+{
>>>>+	struct pci_controller *hose, *tmp;
>>>>+
>>>>+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>>>>+		pnv_ioda_setup_PEs(hose->bus);
>>>> 	}
>>>> }
>>>>
>>>>@@ -1139,6 +1152,11 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose)
>>>> 	}
>>>> }
>>>>
>>>>+static void __devinit pnv_pci_ioda_fixup(void)
>>>>+{
>>>>+	pnv_pci_ioda_setup_PEs();
>>>>+}
>>>>+
>>>> /* Prevent enabling devices for which we couldn't properly
>>>>  * assign a PE
>>>>  */
>>>>@@ -1305,6 +1323,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
>>>> 	 * ourselves here
>>>> 	 */
>>>> 	ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb;
>>>>+	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
>>>> 	ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook;
>>>> 	pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC);
>>>>
>>>>diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
>>>>index 8bc4796..0cb760c 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci.h
>>>>+++ b/arch/powerpc/platforms/powernv/pci.h
>>>>@@ -17,9 +17,14 @@ enum pnv_phb_model {
>>>> };
>>>>
>>>> #define PNV_PCI_DIAG_BUF_SIZE	4096
>>>>+#define PNV_IODA_PE_DEV		(1 << 0)	/* PE has single PCI device	*/
>>>>+#define PNV_IODA_PE_BUS		(1 << 1)	/* PE has primary PCI bus	*/
>>>>+#define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
>>>>
>>>> /* Data associated with a PE, including IOMMU tracking etc.. */
>>>> struct pnv_ioda_pe {
>>>>+	unsigned long		flags;
>>>>+
>>>> 	/* A PE can be associated with a single device or an
>>>> 	 * entire bus (& children). In the former case, pdev
>>>> 	 * is populated, in the later case, pbus is.
>>>>@@ -40,11 +45,6 @@ struct pnv_ioda_pe {
>>>> 	 */
>>>> 	unsigned int		dma_weight;
>>>>
>>>>-	/* This is a PCI-E -> PCI-X bridge, this points to the
>>>>-	 * corresponding bus PE
>>>>-	 */
>>>>-	struct pnv_ioda_pe	*bus_pe;
>>>>-
>>>> 	/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
>>>> 	int			tce32_seg;
>>>> 	int			tce32_segcount;
>>>>-- 
>>>>1.7.9.5
>>>>
>>>>_______________________________________________
>>>>Linuxppc-dev mailing list
>>>>Linuxppc-dev@lists.ozlabs.org
>>>>https://lists.ozlabs.org/listinfo/linuxppc-dev
>>>
>>>-- 
>>>Richard Yang
>>>Help you, Help me
>>
>>_______________________________________________
>>Linuxppc-dev mailing list
>>Linuxppc-dev@lists.ozlabs.org
>>https://lists.ozlabs.org/listinfo/linuxppc-dev
>
>-- 
>Richard Yang
>Help you, Help me

^ permalink raw reply

* RE: [PATCH v5 3/6] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Liu Qiang-B32616 @ 2012-08-01  9:35 UTC (permalink / raw)
  To: Liu Qiang-B32616, linux-crypto@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	dan.j.williams@gmail.com
  Cc: Li Yang-R58472, Ira W. Snyder, Vinod Koul, Phillips Kim-R1AAHA,
	Dan Williams, davem@davemloft.net, herbert@gondor.apana.org.au
In-Reply-To: <1343810957-25378-1-git-send-email-qiang.liu@freescale.com>

Hi Ira,

I hope we can discuss fsl-dma in this thread. In this patch I give a simple=
 case to illustrate why I must correct the release process of finished desc=
riptors.
There is potential risk in current fsl-dma, the finished cookie value and f=
inished async_tx descriptor should be judged by hardware, but not only depe=
nd on the s/w queue ld_running. I know h/w is very fast, but the driver sho=
uld be in align with h/w.

Thanks.

> -----Original Message-----
> From: Liu Qiang-B32616
> Sent: Wednesday, August 01, 2012 4:49 PM
> To: linux-crypto@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> kernel@vger.kernel.org; dan.j.williams@gmail.com
> Cc: Phillips Kim-R1AAHA; herbert@gondor.hengli.com.au;
> davem@davemloft.net; Liu Qiang-B32616; Dan Williams; Vinod Koul; Li Yang-
> R58472; Ira W. Snyder
> Subject: [PATCH v5 3/6] fsl-dma: change release process of dma descriptor
> for supporting async_tx
>=20
> From: Qiang Liu <qiang.liu@freescale.com>
>=20
> Fix the potential risk when enable config NET_DMA and ASYNC_TX.
> Async_tx is lack of support in current release process of dma descriptor,
> all descriptors will be released whatever is acked or no-acked by
> async_tx, so there is a potential race condition when dma engine is uesd
> by others clients (e.g. when enable NET_DMA to offload TCP).
>=20
> In our case, a race condition which is raised when use both of talitos
> and dmaengine to offload xor is because napi scheduler will sync all
> pending requests in dma channels, it affects the process of raid
> operations due to ack_tx is not checked in fsl dma. The no-acked
> descriptor is freed which is submitted just now, as a dependent tx, this
> freed descriptor trigger
> BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().
>=20
> TASK =3D ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0
> GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4
> 00000000 00000001
> GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4
> ed576d98 00000000
> GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000
> ed3015e8 c15a7aa0
> GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0
> ef640c30 ecf41ca0 NIP [c02b048c] async_tx_submit+0x6c/0x2b4 LR [c02b068c]
> async_tx_submit+0x26c/0x2b4 Call Trace:
> [ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable)
> [ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c [ecf41d20] [c0421064]
> async_copy_data+0xa0/0x17c [ecf41d70] [c0421cf4]
> __raid_run_ops+0x874/0xe10 [ecf41df0] [c0426ee4]
> handle_stripe+0x820/0x25e8 [ecf41e90] [c0429080] raid5d+0x3d4/0x5b4
> [ecf41f40] [c04329b8] md_thread+0x138/0x16c [ecf41f90] [c008277c]
> kthread+0x8c/0x90 [ecf41ff0] [c0011630] kernel_thread+0x4c/0x68
>=20
> Another major modification in this patch is the change to completed
> descriptors, there is a potential risk which caused by exception
> interrupt, all descriptors in ld_running list are seemed completed when
> an interrupt raised, it works fine under normal condition, but if there
> is an exception occured, it cannot work as our excepted. Hardware should
> not depend on s/w list, the right way is to read current descriptor
> address register to find the last completed descriptor. If an interrupt
> is raised by an error, all descriptors in ld_running should not be seemed
> finished, or these unfinished descriptors in ld_running will be released
> wrongly.
>=20
> A simple way to reproduce,
> Enable dmatest first, then insert some bad descriptors which can trigger
> Programming Error interrupts before the good descriptors. Last, the good
> descriptors will be freed before they are processsed because of the
> exception intrerrupt.
>=20
> Note: the bad descriptors are only for simulating an exception interrupt.
> This case can illustrate the potential risk in current fsl-dma very well.
>=20
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dan Williams <dan.j.williams@gmail.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Li Yang <leoli@freescale.com>
> Cc: Ira W. Snyder <iws@ovro.caltech.edu>
> Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> ---
>  drivers/dma/fsldma.c |  242 +++++++++++++++++++++++++++++++++++---------
> ------
>  drivers/dma/fsldma.h |    1 +
>  2 files changed, 172 insertions(+), 71 deletions(-)
>=20
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index
> 4f2f212..87f52c0 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -400,6 +400,125 @@ out_splice:
>  	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);  }
>=20
> +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan); static
> +void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan);
> +
> +/**
> + * fsldma_clean_completed_descriptor - free all descriptors which
> + * has been completed and acked
> + * @chan: Freescale DMA channel
> + *
> + * This function is used on all completed and acked descriptors.
> + * All descriptors should only be freed in this function.
> + */
> +static int
> +fsldma_clean_completed_descriptor(struct fsldma_chan *chan) {
> +	struct fsl_desc_sw *desc, *_desc;
> +
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
> +
> +		if (async_tx_test_ack(&desc->async_tx)) {
> +			/* Remove from the list of transactions */
> +			list_del(&desc->node);
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p free\n", desc); #endif
> +			dma_pool_free(chan->desc_pool, desc,
> +					desc->async_tx.phys);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * fsldma_run_tx_complete_actions - cleanup and free a single link
> +descriptor
> + * @chan: Freescale DMA channel
> + * @desc: descriptor to cleanup and free
> + * @cookie: Freescale DMA transaction identifier
> + *
> + * This function is used on a descriptor which has been executed by the
> +DMA
> + * controller. It will run any callbacks, submit any dependencies.
> + */
> +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsl_desc_sw
> *desc,
> +		struct fsldma_chan *chan, dma_cookie_t cookie) {
> +	struct dma_async_tx_descriptor *txd =3D &desc->async_tx;
> +	struct device *dev =3D chan->common.device->dev;
> +	dma_addr_t src =3D get_desc_src(chan, desc);
> +	dma_addr_t dst =3D get_desc_dst(chan, desc);
> +	u32 len =3D get_desc_cnt(chan, desc);
> +
> +	BUG_ON(txd->cookie < 0);
> +
> +	if (txd->cookie > 0) {
> +		cookie =3D txd->cookie;
> +
> +		/* Run the link descriptor callback function */
> +		if (txd->callback) {
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p callback\n", desc); #endif
> +			txd->callback(txd->callback_param);
> +		}
> +
> +		/* Unmap the dst buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> +				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> +			else
> +				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> +		}
> +
> +		/* Unmap the src buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> +				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> +			else
> +				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> +		}
> +	}
> +
> +	/* Run any dependencies */
> +	dma_run_dependencies(txd);
> +
> +	return cookie;
> +}
> +
> +/**
> + * fsldma_clean_running_descriptor - move the completed descriptor from
> + * ld_running to ld_completed
> + * @chan: Freescale DMA channel
> + * @desc: the descriptor which is completed
> + *
> + * Free the descriptor directly if acked by async_tx api, or move it to
> + * queue ld_completed.
> + */
> +static int
> +fsldma_clean_running_descriptor(struct fsldma_chan *chan,
> +		struct fsl_desc_sw *desc)
> +{
> +	/* Remove from the list of transactions */
> +	list_del(&desc->node);
> +	/*
> +	 * the client is allowed to attach dependent operations
> +	 * until 'ack' is set
> +	 */
> +	if (!async_tx_test_ack(&desc->async_tx)) {
> +		/*
> +		 * Move this descriptor to the list of descriptors which is
> +		 * completed, but still awaiting the 'ack' bit to be set.
> +		 */
> +		list_add_tail(&desc->node, &chan->ld_completed);
> +		return 0;
> +	}
> +
> +	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
> +	return 0;
> +}
> +
>  static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx=
)
> {
>  	struct fsldma_chan *chan =3D to_fsl_chan(tx->chan); @@ -534,8 +653,10
> @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
>=20
>  	chan_dbg(chan, "free all channel resources\n");
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptor(chan);
>  	fsldma_free_desc_list(chan, &chan->ld_pending);
>  	fsldma_free_desc_list(chan, &chan->ld_running);
> +	fsldma_free_desc_list(chan, &chan->ld_completed);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
>=20
>  	dma_pool_destroy(chan->desc_pool);
> @@ -819,46 +940,53 @@ static int fsl_dma_device_control(struct dma_chan
> *dchan,
>   * controller. It will run any callbacks, submit any dependencies, and
> then
>   * free the descriptor.
>   */
> -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
> -				      struct fsl_desc_sw *desc)
> +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
>  {
> -	struct dma_async_tx_descriptor *txd =3D &desc->async_tx;
> -	struct device *dev =3D chan->common.device->dev;
> -	dma_addr_t src =3D get_desc_src(chan, desc);
> -	dma_addr_t dst =3D get_desc_dst(chan, desc);
> -	u32 len =3D get_desc_cnt(chan, desc);
> +	struct fsl_desc_sw *desc, *_desc;
> +	dma_cookie_t cookie =3D 0;
> +	dma_addr_t curr_phys =3D get_cdar(chan);
> +	int idle =3D dma_is_idle(chan);
> +	int seen_current =3D 0;
>=20
> -	/* Run the link descriptor callback function */
> -	if (txd->callback) {
> -#ifdef FSL_DMA_LD_DEBUG
> -		chan_dbg(chan, "LD %p callback\n", desc);
> -#endif
> -		txd->callback(txd->callback_param);
> -	}
> +	fsldma_clean_completed_descriptor(chan);
>=20
> -	/* Run any dependencies */
> -	dma_run_dependencies(txd);
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> +		/*
> +		 * do not advance past the current descriptor loaded into the
> +		 * hardware channel, subsequent descriptors are either in
> +		 * process or have not been submitted
> +		 */
> +		if (seen_current)
> +			break;
>=20
> -	/* Unmap the dst buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> -			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> -		else
> -			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> -	}
> +		/*
> +		 * stop the search if we reach the current descriptor and the
> +		 * channel is busy
> +		 */
> +		if (desc->async_tx.phys =3D=3D curr_phys) {
> +			seen_current =3D 1;
> +			if (!idle)
> +				break;
> +		}
> +
> +		cookie =3D fsldma_run_tx_complete_actions(desc, chan, cookie);
> +
> +		if (fsldma_clean_running_descriptor(chan, desc))
> +			break;
>=20
> -	/* Unmap the src buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> -			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> -		else
> -			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
>  	}
>=20
> -#ifdef FSL_DMA_LD_DEBUG
> -	chan_dbg(chan, "LD %p free\n", desc);
> -#endif
> -	dma_pool_free(chan->desc_pool, desc, txd->phys);
> +	/*
> +	 * Start any pending transactions automatically
> +	 *
> +	 * In the ideal case, we keep the DMA controller busy while we go
> +	 * ahead and free the descriptors below.
> +	 */
> +	fsl_chan_xfer_ld_queue(chan);
> +
> +	if (cookie > 0)
> +		chan->common.completed_cookie =3D cookie;
>  }
>=20
>  /**
> @@ -954,11 +1082,15 @@ static enum dma_status fsl_tx_status(struct
> dma_chan *dchan,
>  	enum dma_status ret;
>  	unsigned long flags;
>=20
> -	spin_lock_irqsave(&chan->desc_lock, flags);
>  	ret =3D dma_cookie_status(dchan, cookie, txstate);
> +	if (ret =3D=3D DMA_SUCCESS)
> +		return ret;
> +
> +	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptor(chan);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
>=20
> -	return ret;
> +	return dma_cookie_status(dchan, cookie, txstate);
>  }
>=20
>  /*----------------------------------------------------------------------
> ------*/
> @@ -1035,52 +1167,19 @@ static irqreturn_t fsldma_chan_irq(int irq, void
> *data)  static void dma_do_tasklet(unsigned long data)  {
>  	struct fsldma_chan *chan =3D (struct fsldma_chan *)data;
> -	struct fsl_desc_sw *desc, *_desc;
> -	LIST_HEAD(ld_cleanup);
>  	unsigned long flags;
>=20
>  	chan_dbg(chan, "tasklet entry\n");
>=20
>  	spin_lock_irqsave(&chan->desc_lock, flags);
>=20
> -	/* update the cookie if we have some descriptors to cleanup */
> -	if (!list_empty(&chan->ld_running)) {
> -		dma_cookie_t cookie;
> -
> -		desc =3D to_fsl_desc(chan->ld_running.prev);
> -		cookie =3D desc->async_tx.cookie;
> -		dma_cookie_complete(&desc->async_tx);
> -
> -		chan_dbg(chan, "completed_cookie=3D%d\n", cookie);
> -	}
> -
> -	/*
> -	 * move the descriptors to a temporary list so we can drop the lock
> -	 * during the entire cleanup operation
> -	 */
> -	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
> -
>  	/* the hardware is now idle and ready for more */
>  	chan->idle =3D true;
>=20
> -	/*
> -	 * Start any pending transactions automatically
> -	 *
> -	 * In the ideal case, we keep the DMA controller busy while we go
> -	 * ahead and free the descriptors below.
> -	 */
> -	fsl_chan_xfer_ld_queue(chan);
> -	spin_unlock_irqrestore(&chan->desc_lock, flags);
> -
> -	/* Run the callback for each descriptor, in order */
> -	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
> +	/* Run all cleanup for this descriptor */
> +	fsldma_cleanup_descriptor(chan);
>=20
> -		/* Remove from the list of transactions */
> -		list_del(&desc->node);
> -
> -		/* Run all cleanup for this descriptor */
> -		fsldma_cleanup_descriptor(chan, desc);
> -	}
> +	spin_unlock_irqrestore(&chan->desc_lock, flags);
>=20
>  	chan_dbg(chan, "tasklet exit\n");
>  }
> @@ -1262,6 +1361,7 @@ static int __devinit fsl_dma_chan_probe(struct
> fsldma_device *fdev,
>  	spin_lock_init(&chan->desc_lock);
>  	INIT_LIST_HEAD(&chan->ld_pending);
>  	INIT_LIST_HEAD(&chan->ld_running);
> +	INIT_LIST_HEAD(&chan->ld_completed);
>  	chan->idle =3D true;
>=20
>  	chan->common.device =3D &fdev->common;
> diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index
> f5c3879..7ede908 100644
> --- a/drivers/dma/fsldma.h
> +++ b/drivers/dma/fsldma.h
> @@ -140,6 +140,7 @@ struct fsldma_chan {
>  	spinlock_t desc_lock;		/* Descriptor operation lock */
>  	struct list_head ld_pending;	/* Link descriptors queue */
>  	struct list_head ld_running;	/* Link descriptors queue */
> +	struct list_head ld_completed;	/* Link descriptors queue */
>  	struct dma_chan common;		/* DMA common channel */
>  	struct dma_pool *desc_pool;	/* Descriptors pool */
>  	struct device *dev;		/* Channel device */
> --
> 1.7.5.1

^ permalink raw reply

* Re: [PATCH -V5 12/13] arch/powerpc: Replace open coded CONTEXT_BITS value
From: Aneesh Kumar K.V @ 2012-08-01  9:56 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <20120801052902.GF24014@drongo>

Paul Mackerras <paulus@samba.org> writes:

> On Mon, Jul 30, 2012 at 04:52:18PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> To clarify the meaning for future readers, replace the open coded
>> 19 with CONTEXT_BITS
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/mm/mmu_context_hash64.c |    2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
>> index 40677aa..daa076c 100644
>> --- a/arch/powerpc/mm/mmu_context_hash64.c
>> +++ b/arch/powerpc/mm/mmu_context_hash64.c
>> @@ -34,7 +34,7 @@ static DEFINE_IDA(mmu_context_ida);
>>   * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
>>   * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
>>   */
>> -#define MAX_CONTEXT	((1UL << 19) - 1)
>> +#define MAX_CONTEXT	((1UL << CONTEXT_BITS) - 1)
>
> This is a good thing to do, but you should also update the comment.
> Maybe you should put this patch before your number 10/13 and then
> change the comment in the same patch where you add the 64TB support.

Moved this as the first patch and updated the doc the following doc
update patch.

-aneesh

^ permalink raw reply

* Re: [PATCH -V5 13/13] arch/powerpc: Update VSID allocation documentation
From: Aneesh Kumar K.V @ 2012-08-01 10:01 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <20120801053505.GG24014@drongo>

Paul Mackerras <paulus@samba.org> writes:

> On Mon, Jul 30, 2012 at 04:52:19PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>> 
>> This update the proto-VSID and VSID scramble related information
>> to be more generic by using names instead of current values.
>
> Comments below...
>
>> - * VSID allocation
>> + * VSID allocation (256MB segment)
>>   *
>> - * We first generate a 36-bit "proto-VSID".  For kernel addresses this
>> - * is equal to the ESID, for user addresses it is:
>> - *	(context << 15) | (esid & 0x7fff)
>> + * We first generate a 38-bit "proto-VSID".  For kernel addresses this
>> + * is equal to the ESID | 1 << 37, for user addresses it is:
>> + *	(context << USER_ESID_BITS) | (esid & (1U << USER_ESID_BITS))
> 					      ^^^^^^^^^^^^^^^^^^^^^^
> should be ((1U << USER_ESID_BITS) - 1)
>
>>   *
>> - * The two forms are distinguishable because the top bit is 0 for user
>> - * addresses, whereas the top two bits are 1 for kernel addresses.
>> - * Proto-VSIDs with the top two bits equal to 0b10 are reserved for
>> - * now.
>> + * This splits the proto-VSID into the below range
>> + *  0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range
>> + *  2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range
>
> Perhaps point out also that CONTEXT_BITS + USER_ESID_BITS == VSID_BITS - 1,
> that is, you have assigned half of the space to user processes and half
> to the kernel.
>

updated

>> -/*
>> - * WARNING - If you change these you must make sure the asm
>> - * implementations in slb_allocate (slb_low.S), do_stab_bolted
>> - * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
>> - */
>
> Are you absolutely sure that nothing in the assembly code would need
> to be changed if someone changed these definitions?
>

As a part of this patchset, i didn't touch any of these and the pathcset
do change these values. 

-aneesh

^ permalink raw reply

* RE: [2/3][PATCH][v2] TDM Framework
From: Singh Sandeep-B37400 @ 2012-08-01 12:13 UTC (permalink / raw)
  To: Greg KH
  Cc: devel@driverdev.osuosl.org, linux-kernel@vger.kernel.org,
	Francois Romieu, linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20120730160146.GB28703@kroah.com>




> -----Original Message-----
> From: Greg KH [mailto:greg@kroah.com]
> Sent: Monday, July 30, 2012 9:32 PM
> To: Singh Sandeep-B37400
> Cc: Francois Romieu; devel@driverdev.osuosl.org; linuxppc-
> dev@lists.ozlabs.org; galak@kernel.crashing.org; linux-arm-
> kernel@lists.infradead.org; linux-kernel@vger.kernel.org
> Subject: Re: [2/3][PATCH][v2] TDM Framework
>=20
> On Mon, Jul 30, 2012 at 09:50:57AM +0000, Singh Sandeep-B37400 wrote:
> > 1. You should send some kernel mode TDM clients. Without those the
> framework
> >    is pretty useless.
> > [Sandeep] We do have a test client but not good enough to be pushed in
> > open source, should we add it to documentation??
>=20
> Then how do you know if the framework is "correct" and good enough for
> real clients?  We don't add frameworks, or apis, to the kernel without
> users, so you will have to come up with some users before we can accept
> it.
We can only say that this framework is available in FSL BSPs and being used=
 by VoIP companies.
But running a complete voice stack itself is beyond the scope of Freescale.
So vendors integrate their solutions with FSL solution.
To test the framework we have a small application in our BSP (this is a ver=
y basic test client) which tests the TDM driver and the SLIC interface from=
 voice  transfer perspective.
We can get this added in the Linux codebase in some test directory. What co=
uld be a good place for this?

Regards
Sandeep

^ permalink raw reply

* Re: [2/3][PATCH][v2] TDM Framework
From: Greg KH @ 2012-08-01 12:37 UTC (permalink / raw)
  To: Singh Sandeep-B37400
  Cc: devel@driverdev.osuosl.org, linux-kernel@vger.kernel.org,
	Francois Romieu, linuxppc-dev@lists.ozlabs.org,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <3F1D9DCAAB49B94D88DBE05911FA4E6E515F98@039-SN1MPN1-001.039d.mgd.msft.net>

On Wed, Aug 01, 2012 at 12:13:19PM +0000, Singh Sandeep-B37400 wrote:
> > On Mon, Jul 30, 2012 at 09:50:57AM +0000, Singh Sandeep-B37400 wrote:
> > > 1. You should send some kernel mode TDM clients. Without those the
> > framework
> > >    is pretty useless.
> > > [Sandeep] We do have a test client but not good enough to be pushed in
> > > open source, should we add it to documentation??
> > 
> > Then how do you know if the framework is "correct" and good enough for
> > real clients?  We don't add frameworks, or apis, to the kernel without
> > users, so you will have to come up with some users before we can accept
> > it.
> We can only say that this framework is available in FSL BSPs and being used by VoIP companies.
> But running a complete voice stack itself is beyond the scope of Freescale.
> So vendors integrate their solutions with FSL solution.
> To test the framework we have a small application in our BSP (this is a very basic test client) which tests the TDM driver and the SLIC interface from voice  transfer perspective.
> We can get this added in the Linux codebase in some test directory. What could be a good place for this?

tools/ is a good place for that.

And sorry, I was thinking you had kernel drivers that attached to this
framework, not userspace programs.  Actually, what is the user/kernel
interface for this framework, I seem to have missed that entirely.  You
will have to document that quite well, and run it by the linux-api
mailing list.

thanks,

greg k-h

^ permalink raw reply

* [PATCH -V6 0/12] arch/powerpc: Add 64TB support to ppc64
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev

Hi,

This patchset include patches for supporting 64TB with ppc64. I haven't booted
this on hardware with 64TB memory yet. But they boot fine on real hardware with
less memory. Changes extend VSID bits to 38 bits for a 256MB segment
and 26 bits for 1TB segments. 

Changes from v5:
 * Address review feedback

Changes from v4:
 * Drop patch "arch/powerpc: properly offset the context bits for 1T segemnts"
   based on review feedback
 * split CONTEXT_BITS related changes from patch 12
 * Add a new doc update patch

Changes from v3:
 * Address review comments.
 * Added new patch to ensure proto-VSID isolation between kernel and user space

Changes from V2:
 * Fix few FIXMEs in the patchset. I have added them as separate patch for
   easier review. That should help us to drop those changes if we don't agree.

Changes from V1:
* Drop the usage of structure (struct virt_addr) to carry virtual address.
  We now represent virtual address via vpn which is virtual address shifted 
  right 12 bits.

Thanks,
-aneesh

^ permalink raw reply

* [PATCH -V6 01/12] arch/powerpc: Replace open coded CONTEXT_BITS value
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

To clarify the meaning for future readers, replace the open coded
19 with CONTEXT_BITS

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/mmu_context_hash64.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 40677aa..daa076c 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -34,7 +34,7 @@ static DEFINE_IDA(mmu_context_ida);
  * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
  * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
  */
-#define MAX_CONTEXT	((1UL << 19) - 1)
+#define MAX_CONTEXT	((1UL << CONTEXT_BITS) - 1)
 
 int __init_new_context(void)
 {
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 04/12] arch/powerpc: Convert virtual address to vpn
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch convert different functions to take virtual page number
instead of virtual address. Virtual page number is virtual address
shifted right by VPN_SHIFT (12) bits. This enable us to have an
address range of upto 76 bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h     |   71 +++++++++++++++++----
 arch/powerpc/include/asm/pte-hash64-64k.h |   18 +++---
 arch/powerpc/kvm/book3s_32_mmu_host.c     |    2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c     |    2 +-
 arch/powerpc/mm/hash_low_64.S             |   97 ++++++++++++++++++-----------
 arch/powerpc/mm/hash_native_64.c          |   45 +++++++++----
 arch/powerpc/mm/hash_utils_64.c           |    6 +-
 arch/powerpc/mm/hugetlbpage-hash64.c      |    2 +-
 arch/powerpc/mm/tlb_hash64.c              |    2 +-
 arch/powerpc/platforms/cell/beat_htab.c   |    2 +-
 arch/powerpc/platforms/pseries/lpar.c     |   20 +-----
 11 files changed, 173 insertions(+), 94 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 1c65a59..d3a1139 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -15,6 +15,10 @@
 #include <asm/asm-compat.h>
 #include <asm/page.h>
 
+#ifndef __ASSEMBLY__
+#include <linux/bug.h>
+#endif
+
 /*
  * Segment table
  */
@@ -154,9 +158,25 @@ struct mmu_psize_def
 #define MMU_SEGSIZE_256M	0
 #define MMU_SEGSIZE_1T		1
 
+/*
+ * encode page number shift.
+ * in order to fit the 78 bit va in a 64 bit variable we shift the va by
+ * 12 bits. This enable us to address upto 76 bit va.
+ * For hpt hash from a va we can ignore the page size bits of va and for
+ * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
+ * we work in all cases including 4k page size.
+ */
+#define VPN_SHIFT	12
 
 #ifndef __ASSEMBLY__
 
+static inline int segment_shift(int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return SID_SHIFT;
+	return SID_SHIFT_1T;
+}
+
 /*
  * The current system page and segment sizes
  */
@@ -180,6 +200,29 @@ extern unsigned long tce_alloc_start, tce_alloc_end;
 extern int mmu_ci_restrictions;
 
 /*
+ * This computes the AVPN and B fields of the first dword of a HPTE,
+ * for use when we want to match an existing PTE.  The bottom 7 bits
+ * of the returned value are zero.
+ */
+static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
+					     int ssize)
+{
+	unsigned long v;
+	/*
+	 * The AVA field omits the low-order 23 bits of the 78 bits VA.
+	 * These bits are not needed in the PTE, because the
+	 * low-order b of these bits are part of the byte offset
+	 * into the virtual page and, if b < 23, the high-order
+	 * 23-b of these bits are always used in selecting the
+	 * PTEGs to be searched
+	 */
+	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
+	v <<= HPTE_V_AVPN_SHIFT;
+	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+	return v;
+}
+
+/*
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
  */
@@ -187,11 +230,9 @@ static inline unsigned long hpte_encode_v(unsigned long va, int psize,
 					  int ssize)
 {
 	unsigned long v;
-	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
-	v <<= HPTE_V_AVPN_SHIFT;
+	v = hpte_encode_avpn(va, psize, ssize);
 	if (psize != MMU_PAGE_4K)
 		v |= HPTE_V_LARGE;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
 
@@ -216,14 +257,16 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
 }
 
 /*
- * Build a VA given VSID, EA and segment size
+ * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
  */
-static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
+static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
 				   int ssize)
 {
-	if (ssize == MMU_SEGSIZE_256M)
-		return (vsid << 28) | (ea & 0xfffffffUL);
-	return (vsid << 40) | (ea & 0xffffffffffUL);
+	unsigned long mask;
+	int s_shift = segment_shift(ssize);
+
+	mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
+	return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
 }
 
 /*
@@ -233,13 +276,19 @@ static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
 static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
 				     int ssize)
 {
+	int mask;
 	unsigned long hash, vsid;
 
+	/* VPN_SHIFT can be atmost 12 */
 	if (ssize == MMU_SEGSIZE_256M) {
-		hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift);
+		mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
+		hash = ((va >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
+			(((va & mask) >> (shift - VPN_SHIFT)) & 0xffff);
 	} else {
-		vsid = va >> 40;
-		hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift);
+		mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
+		vsid = va >> (SID_SHIFT_1T - VPN_SHIFT);
+		hash = (vsid & 0xffffff) ^ ((vsid << 25) & 0x7fffffffff) ^
+			(((va & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
 	}
 	return hash & 0x7fffffffffUL;
 }
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index 59247e8..eedf427 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -58,14 +58,16 @@
 /* Trick: we set __end to va + 64k, which happens works for
  * a 16M page as well as we want only one iteration
  */
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)	    \
-        do {                                                                \
-                unsigned long __end = va + PAGE_SIZE;                       \
-                unsigned __split = (psize == MMU_PAGE_4K ||                 \
-				    psize == MMU_PAGE_64K_AP);              \
-                shift = mmu_psize_defs[psize].shift;                        \
-		for (index = 0; va < __end; index++, va += (1L << shift)) { \
-		        if (!__split || __rpte_sub_valid(rpte, index)) do { \
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
+	do {								\
+		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
+		unsigned __split = (psize == MMU_PAGE_4K ||		\
+				    psize == MMU_PAGE_64K_AP);		\
+		shift = mmu_psize_defs[psize].shift;			\
+		for (index = 0; vpn < __end; index++,			\
+			     vpn += (1L << (shift - VPN_SHIFT))) {	\
+			if (!__split || __rpte_sub_valid(rpte, index))	\
+				do {
 
 #define pte_iterate_hashed_end() } while(0); } } while(0)
 
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index f922c29..bf5dfb3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -173,7 +173,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	BUG_ON(!map);
 
 	vsid = map->host_vsid;
-	va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
+	va = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
 
 next_pteg:
 	if (rr == 16) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 10fc8ec..9d184f1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -117,7 +117,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	}
 
 	vsid = map->host_vsid;
-	va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+	va = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
 
 	if (!orig_pte->may_write)
 		rflags |= HPTE_R_PP;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index a242b5d..534cc26 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -71,7 +71,7 @@ _GLOBAL(__hash_page_4K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 */
@@ -119,10 +119,10 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -130,14 +130,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * calculate hash value for primary slot and
+	 * store it in r28 for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-12,36		/* (ea >> 12) & 0xfffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -193,7 +198,7 @@ htab_insert_pte:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -216,7 +221,7 @@ _GLOBAL(htab_call_hpte_insert1)
 	
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -286,7 +291,7 @@ htab_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_4K		/* page size */
 	ld	r7,STK_PARM(r9)(r1)	/* segment size */
 	ld	r8,STK_PARM(r8)(r1)	/* get "local" param */
@@ -347,7 +352,7 @@ _GLOBAL(__hash_page_4K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 * r26 is the hidx mask
@@ -402,10 +407,14 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28		/* r29 = (vsid << 28) */
-	rldicl	r3,r3,0,36		/* r3 = (ea & 0x0fffffff) */
-	or	r29,r3,r29		/* r29 = va */
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	/*
+	 * clrldi r3,r3,64 - SID_SHIFT -->  ea & 0xfffffff
+	 * srdi	 r28,r3,VPN_SHIFT
+	 */
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -413,14 +422,23 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	/*
+	 * clrldi r3,r3,64 - SID_SHIFT_1T -->  ea & 0xffffffffff
+	 * srdi	r28,r3,VPN_SHIFT
+	 */
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * Calculate hash value for primary slot and
+	 * store it in r28  for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-12,36		/* (ea >> 12) & 0xfffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -496,7 +514,7 @@ htab_special_pfn:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -523,7 +541,7 @@ _GLOBAL(htab_call_hpte_insert1)
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -555,7 +573,7 @@ _GLOBAL(htab_call_hpte_remove)
 	 * useless now that the segment has been switched to 4k pages.
 	 */
 htab_inval_old_hpte:
-	mr	r3,r29			/* virtual addr */
+	mr	r3,r29			/* vpn */
 	mr	r4,r31			/* PTE.pte */
 	li	r5,0			/* PTE.hidx */
 	li	r6,MMU_PAGE_64K		/* psize */
@@ -628,7 +646,7 @@ htab_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_4K		/* page size */
 	ld	r7,STK_PARM(r9)(r1)	/* segment size */
 	ld	r8,STK_PARM(r8)(r1)	/* get "local" param */
@@ -684,7 +702,7 @@ _GLOBAL(__hash_page_64K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 */
@@ -737,10 +755,10 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -748,14 +766,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * calculate hash value for primary slot and
+	 * store it in r28 for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-16,40		/* (ea >> 16) & 0xffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -814,7 +837,7 @@ ht64_insert_pte:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_64K
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -837,7 +860,7 @@ _GLOBAL(ht64_call_hpte_insert1)
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARM(r4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_64K
 	ld	r9,STK_PARM(r9)(r1)	/* segment size */
@@ -907,7 +930,7 @@ ht64_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_64K
 	ld	r7,STK_PARM(r9)(r1)	/* segment size */
 	ld	r8,STK_PARM(r8)(r1)	/* get "local" param */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 660b8bb..a5c08c3 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -39,22 +39,35 @@
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbie(unsigned long va, int psize, int ssize)
+static inline void __tlbie(unsigned long vpn, int psize, int ssize)
 {
+	unsigned long va;
 	unsigned int penc;
 
-	/* clear top 16 bits, non SLS segment */
+	/*
+	 * We need 14 to 65 bits of va for a tlibe of 4K page
+	 * With vpn we ignore the lower VPN_SHIFT bits already.
+	 * And top two bits are already ignored because we can
+	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+	 * of 12.
+	 */
+	va = vpn << VPN_SHIFT;
+	/*
+	 * clear top 16 bits of 64bit va, non SLS segment
+	 * Older versions of the architecture (2.02 and earler) require the
+	 * masking of the top 16 bits.
+	 */
 	va &= ~(0xffffULL << 48);
 
 	switch (psize) {
 	case MMU_PAGE_4K:
-		va &= ~0xffful;
 		va |= ssize << 8;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 			     : "memory");
 		break;
 	default:
+		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 		va |= penc << 12;
@@ -67,21 +80,28 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 	}
 }
 
-static inline void __tlbiel(unsigned long va, int psize, int ssize)
+static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
 {
+	unsigned long va;
 	unsigned int penc;
 
-	/* clear top 16 bits, non SLS segment */
+	/* VPN_SHIFT can be atmost 12 */
+	va = vpn << VPN_SHIFT;
+	/*
+	 * clear top 16 bits of 64 bit va, non SLS segment
+	 * Older versions of the architecture (2.02 and earler) require the
+	 * masking of the top 16 bits.
+	 */
 	va &= ~(0xffffULL << 48);
 
 	switch (psize) {
 	case MMU_PAGE_4K:
-		va &= ~0xffful;
 		va |= ssize << 8;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
 		break;
 	default:
+		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 		va |= penc << 12;
@@ -234,7 +254,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 
 	want_v = hpte_encode_v(va, psize, ssize);
 
-	DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+	DBG_LOW("    update(va=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
 		va, want_v & HPTE_V_AVPN, slot, newpp);
 
 	native_lock_hpte(hptep);
@@ -300,7 +320,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	struct hash_pte *hptep;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = native_hpte_find(va, psize, ssize);
 	if (slot == -1)
@@ -325,7 +345,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	local_irq_save(flags);
 
-	DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
+	DBG_LOW("    invalidate(va=%016lx, hash: %lx)\n", va, slot);
 
 	want_v = hpte_encode_v(va, psize, ssize);
 	native_lock_hpte(hptep);
@@ -399,7 +419,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << SID_SHIFT | seg_off;
+		*va = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -408,7 +428,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << SID_SHIFT_1T | seg_off;
+		*va = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	default:
 		*va = size = 0;
 	}
@@ -425,9 +445,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  */
 static void native_hpte_clear(void)
 {
+	unsigned long va = 0;
 	unsigned long slot, slots, flags;
 	struct hash_pte *hptep = htab_address;
-	unsigned long hpte_v, va;
+	unsigned long hpte_v;
 	unsigned long pteg_count;
 	int psize, ssize;
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 377e5cb..975c7d1 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,7 +192,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	     vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
-		unsigned long va = hpt_va(vaddr, vsid, ssize);
+		unsigned long va  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
 		/* Make kernel text executable */
@@ -1208,7 +1208,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hpteg;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 	unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
 	int ret;
 
@@ -1229,7 +1229,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hidx, slot;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 
 	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
 	spin_lock(&linear_map_hash_lock);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cc5c273..1331403 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -25,7 +25,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 31f1820..321c585 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -86,7 +86,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
 	}
-	vaddr = hpt_va(addr, vsid, ssize);
+	vaddr = hpt_vpn(addr, vsid, ssize);
 	rpte = __real_pte(__pte(pte), ptep);
 
 	/*
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index b83077e..c8c7bf6 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = hpt_va(ea, vsid, MMU_SEGSIZE_256M);
+	va = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
 	slot = beat_lpar_hpte_find(va, psize);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5f3ef87..2127529 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -227,22 +227,6 @@ static void pSeries_lpar_hptab_clear(void)
 }
 
 /*
- * This computes the AVPN and B fields of the first dword of a HPTE,
- * for use when we want to match an existing PTE.  The bottom 7 bits
- * of the returned value are zero.
- */
-static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
-					     int ssize)
-{
-	unsigned long v;
-
-	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
-	v <<= HPTE_V_AVPN_SHIFT;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
-	return v;
-}
-
-/*
  * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
  * the low 3 bits of flags happen to line up.  So no transform is needed.
  * We can probably optimize here and assume the high bits of newpp are
@@ -326,7 +310,7 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 	unsigned long lpar_rc, slot, vsid, va, flags;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = pSeries_lpar_hpte_find(va, psize, ssize);
 	BUG_ON(slot == -1);
@@ -361,7 +345,7 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 	unsigned long slot, vsid, va;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = pSeries_lpar_hpte_find(va, psize, ssize);
 	BUG_ON(slot == -1);
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 06/12] arch/powerpc: Make KERN_VIRT_SIZE not dependend on PGTABLE_RANGE
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

As we keep increasing PGTABLE_RANGE we need not increase the virual
map area for kernel.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgtable-ppc64.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index c420561..8af1cf2 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -41,7 +41,7 @@
 #else
 #define KERN_VIRT_START ASM_CONST(0xD000000000000000)
 #endif
-#define KERN_VIRT_SIZE	PGTABLE_RANGE
+#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
 
 /*
  * The vmalloc space starts at the beginning of that region, and
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 07/12] arch/powerpc: Increase the slice range to 64TB
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch makes the high psizes mask as an unsigned char array
so that we can have more than 16TB. Currently we support upto
64TB

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h |    6 +-
 arch/powerpc/include/asm/page_64.h    |    6 +-
 arch/powerpc/mm/hash_utils_64.c       |   15 +++--
 arch/powerpc/mm/slb_low.S             |   30 ++++++---
 arch/powerpc/mm/slice.c               |  107 +++++++++++++++++++++------------
 5 files changed, 109 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index d848c56..8f05eec 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -464,7 +464,11 @@ typedef struct {
 
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;	/* SLB page size encodings */
-	u64 high_slices_psize;  /* 4 bits per slice for now */
+	/*
+	 * Right now we support 64TB and 4 bits for each
+	 * 1TB slice we need 32 bytes for 64TB.
+	 */
+	unsigned char high_slices_psize[32];  /* 4 bits per slice for now */
 #else
 	u16 sllp;		/* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index fed85e6..6c9bef4 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -82,7 +82,11 @@ extern u64 ppc64_pft_size;
 
 struct slice_mask {
 	u16 low_slices;
-	u16 high_slices;
+	/*
+	 * This should be derived out of PGTABLE_RANGE. For the current
+	 * max 64TB, u64 should be ok.
+	 */
+	u64 high_slices;
 };
 
 struct mm_struct;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 74c5479..13e0ccf 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 #ifdef CONFIG_PPC_MM_SLICES
 unsigned int get_paca_psize(unsigned long addr)
 {
-	unsigned long index, slices;
+	u64 lpsizes;
+	unsigned char *hpsizes;
+	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		slices = get_paca()->context.low_slices_psize;
+		lpsizes = get_paca()->context.low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-	} else {
-		slices = get_paca()->context.high_slices_psize;
-		index = GET_HIGH_SLICE_INDEX(addr);
+		return (lpsizes >> (index * 4)) & 0xF;
 	}
-	return (slices >> (index * 4)) & 0xF;
+	hpsizes = get_paca()->context.high_slices_psize;
+	index = GET_HIGH_SLICE_INDEX(addr);
+	mask_index = index & 0x1;
+	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
 #else
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b9ee79ce..e132dc6 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -108,17 +108,31 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	 * between 4k and 64k standard page size
 	 */
 #ifdef CONFIG_PPC_MM_SLICES
+	/* r10 have esid */
 	cmpldi	r10,16
-
-	/* Get the slice index * 4 in r11 and matching slice size mask in r9 */
-	ld	r9,PACALOWSLICESPSIZE(r13)
-	sldi	r11,r10,2
+	/* below SLICE_LOW_TOP */
 	blt	5f
-	ld	r9,PACAHIGHSLICEPSIZE(r13)
-	srdi	r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
-	andi.	r11,r11,0x3c
+	/*
+	 * Handle hpsizes,
+	 * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
+	 */
+	srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
+	addi	r9,r11,PACAHIGHSLICEPSIZE
+	lbzx	r9,r13,r9		/* r9 is hpsizes[r11] */
+	/* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
+	rldicl	r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
+	b	6f
 
-5:	/* Extract the psize and multiply to get an array offset */
+5:
+	/*
+	 * Handle lpsizes
+	 * r9 is get_paca()->context.low_slices_psize, r11 is index
+	 */
+	ld	r9,PACALOWSLICESPSIZE(r13)
+	mr	r11,r10
+6:
+	sldi	r11,r11,2  /* index * 4 */
+	/* Extract the psize and multiply to get an array offset */
 	srd	r9,r9,r11
 	andi.	r9,r9,0xf
 	mulli	r9,r9,MMUPSIZEDEFSIZE
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 73709f7..b4e996a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -42,7 +42,7 @@ int _slice_debug = 1;
 
 static void slice_print_mask(const char *label, struct slice_mask mask)
 {
-	char	*p, buf[16 + 3 + 16 + 1];
+	char	*p, buf[16 + 3 + 64 + 1];
 	int	i;
 
 	if (!_slice_debug)
@@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
 	*(p++) = '-';
 	*(p++) = ' ';
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		*(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+		*(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
 	*(p++) = 0;
 
 	printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 	}
 
 	if ((start + len) > SLICE_LOW_TOP)
-		ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
-			- (1u << GET_HIGH_SLICE_INDEX(start));
+		ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
+			- (1ul << GET_HIGH_SLICE_INDEX(start));
 
 	return ret;
 }
@@ -135,26 +135,31 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
 
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
 		if (!slice_high_has_vma(mm, i))
-			ret.high_slices |= 1u << i;
+			ret.high_slices |= 1ul << i;
 
 	return ret;
 }
 
 static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 {
+	unsigned char *hpsizes;
+	int index, mask_index;
 	struct slice_mask ret = { 0, 0 };
 	unsigned long i;
-	u64 psizes;
+	u64 lpsizes;
 
-	psizes = mm->context.low_slices_psize;
+	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (((psizes >> (i * 4)) & 0xf) == psize)
+		if (((lpsizes >> (i * 4)) & 0xf) == psize)
 			ret.low_slices |= 1u << i;
 
-	psizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		if (((psizes >> (i * 4)) & 0xf) == psize)
-			ret.high_slices |= 1u << i;
+	hpsizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
+			ret.high_slices |= 1ul << i;
+	}
 
 	return ret;
 }
@@ -183,8 +188,10 @@ static void slice_flush_segments(void *parm)
 
 static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
 {
+	int index, mask_index;
 	/* Write the new slice psize bits */
-	u64 lpsizes, hpsizes;
+	unsigned char *hpsizes;
+	u64 lpsizes;
 	unsigned long i, flags;
 
 	slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
@@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
 				(((unsigned long)psize) << (i * 4));
 
-	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		if (mask.high_slices & (1u << i))
-			hpsizes = (hpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
-
+	/* Assign the value back */
 	mm->context.low_slices_psize = lpsizes;
-	mm->context.high_slices_psize = hpsizes;
+
+	hpsizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (mask.high_slices & (1ul << i))
+			hpsizes[index] = (hpsizes[index] &
+					  ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
 		  mm->context.low_slices_psize,
@@ -587,18 +598,19 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
-	u64 psizes;
-	int index;
+	unsigned char *hpsizes;
+	int index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		psizes = mm->context.low_slices_psize;
+		u64 lpsizes;
+		lpsizes = mm->context.low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-	} else {
-		psizes = mm->context.high_slices_psize;
-		index = GET_HIGH_SLICE_INDEX(addr);
+		return (lpsizes >> (index * 4)) & 0xf;
 	}
-
-	return (psizes >> (index * 4)) & 0xf;
+	hpsizes = mm->context.high_slices_psize;
+	index = GET_HIGH_SLICE_INDEX(addr);
+	mask_index = index & 0x1;
+	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
 }
 EXPORT_SYMBOL_GPL(get_slice_psize);
 
@@ -618,7 +630,9 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
  */
 void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 {
-	unsigned long flags, lpsizes, hpsizes;
+	int index, mask_index;
+	unsigned char *hpsizes;
+	unsigned long flags, lpsizes;
 	unsigned int old_psize;
 	int i;
 
@@ -639,15 +653,21 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 		if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
 			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
 				(((unsigned long)psize) << (i * 4));
+	/* Assign the value back */
+	mm->context.low_slices_psize = lpsizes;
 
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
-			hpsizes = (hpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
+			hpsizes[index] = (hpsizes[index] &
+					  ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
+
+
 
-	mm->context.low_slices_psize = lpsizes;
-	mm->context.high_slices_psize = hpsizes;
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
 		  mm->context.low_slices_psize,
@@ -660,18 +680,27 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 void slice_set_psize(struct mm_struct *mm, unsigned long address,
 		     unsigned int psize)
 {
+	unsigned char *hpsizes;
 	unsigned long i, flags;
-	u64 *p;
+	u64 *lpsizes;
 
 	spin_lock_irqsave(&slice_convert_lock, flags);
 	if (address < SLICE_LOW_TOP) {
 		i = GET_LOW_SLICE_INDEX(address);
-		p = &mm->context.low_slices_psize;
+		lpsizes = &mm->context.low_slices_psize;
+		*lpsizes = (*lpsizes & ~(0xful << (i * 4))) |
+			((unsigned long) psize << (i * 4));
 	} else {
+		int index, mask_index;
 		i = GET_HIGH_SLICE_INDEX(address);
-		p = &mm->context.high_slices_psize;
+		hpsizes = mm->context.high_slices_psize;
+		mask_index = i & 0x1;
+		index = i >> 1;
+		hpsizes[index] = (hpsizes[index] &
+				  ~(0xf << (mask_index * 4))) |
+			(((unsigned long)psize) << (mask_index * 4));
 	}
-	*p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4));
+
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
 
 #ifdef CONFIG_SPU_BASE
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 09/12] arch/powerpc: Use the required number of VSID bits in slbmte
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

ASM_VSID_SCRAMBLE can leave non-zero bits in the high 28 bits of the result
for 256MB segment (40 bits for 1T segment). Properly mask them before using
the values in slbmte

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/slb_low.S |   12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index e132dc6..3b75f19 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -223,7 +223,11 @@ _GLOBAL(slb_allocate_user)
  */
 slb_finish_load:
 	ASM_VSID_SCRAMBLE(r10,r9,256M)
-	rldimi	r11,r10,SLB_VSID_SHIFT,16	/* combine VSID and flags */
+	/*
+	 * bits above VSID_BITS_256M need to be ignored from r10
+	 * also combine VSID and flags
+	 */
+	rldimi	r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
 
 	/* r3 = EA, r11 = VSID data */
 	/*
@@ -287,7 +291,11 @@ _GLOBAL(slb_compare_rr_to_size)
 slb_finish_load_1T:
 	srdi	r10,r10,40-28		/* get 1T ESID */
 	ASM_VSID_SCRAMBLE(r10,r9,1T)
-	rldimi	r11,r10,SLB_VSID_SHIFT_1T,16	/* combine VSID and flags */
+	/*
+	 * bits above VSID_BITS_1T need to be ignored from r10
+	 * also combine VSID and flags
+	 */
+	rldimi	r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
 	li	r10,MMU_SEGSIZE_1T
 	rldimi	r11,r10,SLB_VSID_SSIZE_SHIFT,0	/* insert segment size */
 
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 02/12] arch/powerpc: Use hpt_va to compute virtual address
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Don't open code the same

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/cell/beat_htab.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 943c9d3..b83077e 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = (vsid << 28) | (ea & 0x0fffffff);
+	va = hpt_va(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
 	slot = beat_lpar_hpte_find(va, psize);
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 05/12] arch/powerpc: Rename va to vpn
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Rename the variable to better reflect the values. No functional change
in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s.h   |    2 +-
 arch/powerpc/include/asm/machdep.h      |    6 +--
 arch/powerpc/include/asm/mmu-hash64.h   |   23 ++++----
 arch/powerpc/include/asm/tlbflush.h     |    4 +-
 arch/powerpc/kvm/book3s_32_mmu_host.c   |    8 +--
 arch/powerpc/kvm/book3s_64_mmu_host.c   |   17 +++---
 arch/powerpc/kvm/trace.h                |   14 ++---
 arch/powerpc/mm/hash_native_64.c        |   88 ++++++++++++++++---------------
 arch/powerpc/mm/hash_utils_64.c         |   30 +++++------
 arch/powerpc/mm/hugetlbpage-hash64.c    |   15 +++---
 arch/powerpc/mm/tlb_hash64.c            |   11 ++--
 arch/powerpc/platforms/cell/beat_htab.c |   45 ++++++++--------
 arch/powerpc/platforms/ps3/htab.c       |   22 ++++----
 arch/powerpc/platforms/pseries/lpar.c   |   60 +++++++++++----------
 14 files changed, 177 insertions(+), 168 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a..7aefdb3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -59,7 +59,7 @@ struct hpte_cache {
 	struct hlist_node list_vpte;
 	struct hlist_node list_vpte_long;
 	struct rcu_head rcu_head;
-	u64 host_va;
+	u64 host_vpn;
 	u64 pfn;
 	ulong slot;
 	struct kvmppc_pte pte;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 42ce570..cd63f1a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -34,19 +34,19 @@ struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
 	void            (*hpte_invalidate)(unsigned long slot,
-					   unsigned long va,
+					   unsigned long vpn,
 					   int psize, int ssize,
 					   int local);
 	long		(*hpte_updatepp)(unsigned long slot, 
 					 unsigned long newpp, 
-					 unsigned long va,
+					 unsigned long vpn,
 					 int psize, int ssize,
 					 int local);
 	void            (*hpte_updateboltedpp)(unsigned long newpp, 
 					       unsigned long ea,
 					       int psize, int ssize);
 	long		(*hpte_insert)(unsigned long hpte_group,
-				       unsigned long va,
+				       unsigned long vpn,
 				       unsigned long prpn,
 				       unsigned long rflags,
 				       unsigned long vflags,
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index d3a1139..d848c56 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -226,11 +226,11 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
  */
-static inline unsigned long hpte_encode_v(unsigned long va, int psize,
-					  int ssize)
+static inline unsigned long hpte_encode_v(unsigned long vpn,
+					  int psize, int ssize)
 {
 	unsigned long v;
-	v = hpte_encode_avpn(va, psize, ssize);
+	v = hpte_encode_avpn(vpn, psize, ssize);
 	if (psize != MMU_PAGE_4K)
 		v |= HPTE_V_LARGE;
 	return v;
@@ -259,8 +259,8 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
 /*
  * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
  */
-static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
-				   int ssize)
+static inline unsigned long hpt_vpn(unsigned long ea,
+				    unsigned long vsid, int ssize)
 {
 	unsigned long mask;
 	int s_shift = segment_shift(ssize);
@@ -272,9 +272,8 @@ static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
 /*
  * This hashes a virtual address
  */
-
-static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
-				     int ssize)
+static inline unsigned long hpt_hash(unsigned long vpn,
+				     unsigned int shift, int ssize)
 {
 	int mask;
 	unsigned long hash, vsid;
@@ -282,13 +281,13 @@ static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
 	/* VPN_SHIFT can be atmost 12 */
 	if (ssize == MMU_SEGSIZE_256M) {
 		mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
-		hash = ((va >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
-			(((va & mask) >> (shift - VPN_SHIFT)) & 0xffff);
+		hash = ((vpn >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
+			(((vpn & mask) >> (shift - VPN_SHIFT)) & 0xffff);
 	} else {
 		mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
-		vsid = va >> (SID_SHIFT_1T - VPN_SHIFT);
+		vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
 		hash = (vsid & 0xffffff) ^ ((vsid << 25) & 0x7fffffffff) ^
-			(((va & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
+			(((vpn & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
 	}
 	return hash & 0x7fffffffffUL;
 }
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 81143fc..fc02d1d 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -95,7 +95,7 @@ struct ppc64_tlb_batch {
 	unsigned long		index;
 	struct mm_struct	*mm;
 	real_pte_t		pte[PPC64_TLB_BATCH_NR];
-	unsigned long		vaddr[PPC64_TLB_BATCH_NR];
+	unsigned long		vpn[PPC64_TLB_BATCH_NR];
 	unsigned int		psize;
 	int			ssize;
 };
@@ -127,7 +127,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
 #define arch_flush_lazy_mmu_mode()      do {} while (0)
 
 
-extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
 			    int ssize, int local);
 extern void flush_hash_range(unsigned long number, int local);
 
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index bf5dfb3..f024d2c 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -141,7 +141,7 @@ extern char etext[];
 int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 {
 	pfn_t hpaddr;
-	u64 va;
+	u64 vpn;
 	u64 vsid;
 	struct kvmppc_sid_map *map;
 	volatile u32 *pteg;
@@ -173,7 +173,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	BUG_ON(!map);
 
 	vsid = map->host_vsid;
-	va = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
+	vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
 
 next_pteg:
 	if (rr == 16) {
@@ -241,11 +241,11 @@ next_pteg:
 	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
 		    orig_pte->may_write ? 'w' : '-',
 		    orig_pte->may_execute ? 'x' : '-',
-		    orig_pte->eaddr, (ulong)pteg, va,
+		    orig_pte->eaddr, (ulong)pteg, vpn,
 		    orig_pte->vpage, hpaddr);
 
 	pte->slot = (ulong)&pteg[rr];
-	pte->host_va = va;
+	pte->host_vpn = vpn;
 	pte->pte = *orig_pte;
 	pte->pfn = hpaddr >> PAGE_SHIFT;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 9d184f1..bfb5640 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -33,7 +33,7 @@
 
 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
-	ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+	ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
 			       MMU_PAGE_4K, MMU_SEGSIZE_256M,
 			       false);
 }
@@ -80,8 +80,9 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 
 int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 {
+	unsigned long vpn;
 	pfn_t hpaddr;
-	ulong hash, hpteg, va;
+	ulong hash, hpteg;
 	u64 vsid;
 	int ret;
 	int rflags = 0x192;
@@ -117,7 +118,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	}
 
 	vsid = map->host_vsid;
-	va = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+	vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
 
 	if (!orig_pte->may_write)
 		rflags |= HPTE_R_PP;
@@ -127,7 +128,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	if (!orig_pte->may_execute)
 		rflags |= HPTE_R_N;
 
-	hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+	hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M);
 
 map_again:
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -139,7 +140,8 @@ map_again:
 			goto out;
 		}
 
-	ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+	ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
+				 MMU_PAGE_4K, MMU_SEGSIZE_256M);
 
 	if (ret < 0) {
 		/* If we couldn't map a primary PTE, try a secondary */
@@ -150,7 +152,8 @@ map_again:
 	} else {
 		struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
 
-		trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
+		trace_kvm_book3s_64_mmu_map(rflags, hpteg,
+					    vpn, hpaddr, orig_pte);
 
 		/* The ppc_md code may give us a secondary entry even though we
 		   asked for a primary. Fix up. */
@@ -160,7 +163,7 @@ map_again:
 		}
 
 		pte->slot = hpteg + (ret & 7);
-		pte->host_va = va;
+		pte->host_vpn = vpn;
 		pte->pte = *orig_pte;
 		pte->pfn = hpaddr >> PAGE_SHIFT;
 
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 877186b..ddb6a21 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -189,7 +189,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 	TP_ARGS(pte),
 
 	TP_STRUCT__entry(
-		__field(	u64,		host_va		)
+		__field(	u64,		host_vpn	)
 		__field(	u64,		pfn		)
 		__field(	ulong,		eaddr		)
 		__field(	u64,		vpage		)
@@ -198,7 +198,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 	),
 
 	TP_fast_assign(
-		__entry->host_va	= pte->host_va;
+		__entry->host_vpn	= pte->host_vpn;
 		__entry->pfn		= pte->pfn;
 		__entry->eaddr		= pte->pte.eaddr;
 		__entry->vpage		= pte->pte.vpage;
@@ -208,8 +208,8 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 					  (pte->pte.may_execute ? 0x1 : 0);
 	),
 
-	TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-		  __entry->host_va, __entry->pfn, __entry->eaddr,
+	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
 		  __entry->vpage, __entry->raddr, __entry->flags)
 );
 
@@ -218,7 +218,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 	TP_ARGS(pte),
 
 	TP_STRUCT__entry(
-		__field(	u64,		host_va		)
+		__field(	u64,		host_vpn	)
 		__field(	u64,		pfn		)
 		__field(	ulong,		eaddr		)
 		__field(	u64,		vpage		)
@@ -227,7 +227,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 	),
 
 	TP_fast_assign(
-		__entry->host_va	= pte->host_va;
+		__entry->host_vpn	= pte->host_vpn;
 		__entry->pfn		= pte->pfn;
 		__entry->eaddr		= pte->pte.eaddr;
 		__entry->vpage		= pte->pte.vpage;
@@ -238,7 +238,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 	),
 
 	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-		  __entry->host_va, __entry->pfn, __entry->eaddr,
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
 		  __entry->vpage, __entry->raddr, __entry->flags)
 );
 
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index a5c08c3..36b212b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -114,7 +114,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
 
 }
 
-static inline void tlbie(unsigned long va, int psize, int ssize, int local)
+static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
 {
 	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
 	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -125,10 +125,10 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 		raw_spin_lock(&native_tlbie_lock);
 	asm volatile("ptesync": : :"memory");
 	if (use_local) {
-		__tlbiel(va, psize, ssize);
+		__tlbiel(vpn, psize, ssize);
 		asm volatile("ptesync": : :"memory");
 	} else {
-		__tlbie(va, psize, ssize);
+		__tlbie(vpn, psize, ssize);
 		asm volatile("eieio; tlbsync; ptesync": : :"memory");
 	}
 	if (lock_tlbie && !use_local)
@@ -154,7 +154,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
 	clear_bit_unlock(HPTE_LOCK_BIT, word);
 }
 
-static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
+static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 			unsigned long pa, unsigned long rflags,
 			unsigned long vflags, int psize, int ssize)
 {
@@ -163,9 +163,9 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	int i;
 
 	if (!(vflags & HPTE_V_BOLTED)) {
-		DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
+		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
 			" rflags=%lx, vflags=%lx, psize=%d)\n",
-			hpte_group, va, pa, rflags, vflags, psize);
+			hpte_group, vpn, pa, rflags, vflags, psize);
 	}
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
@@ -183,7 +183,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	if (i == HPTES_PER_GROUP)
 		return -1;
 
-	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+	hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED)) {
@@ -245,17 +245,17 @@ static long native_hpte_remove(unsigned long hpte_group)
 }
 
 static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
-				 unsigned long va, int psize, int ssize,
+				 unsigned long vpn, int psize, int ssize,
 				 int local)
 {
 	struct hash_pte *hptep = htab_address + slot;
 	unsigned long hpte_v, want_v;
 	int ret = 0;
 
-	want_v = hpte_encode_v(va, psize, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 
-	DBG_LOW("    update(va=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
-		va, want_v & HPTE_V_AVPN, slot, newpp);
+	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
+		vpn, want_v & HPTE_V_AVPN, slot, newpp);
 
 	native_lock_hpte(hptep);
 
@@ -274,12 +274,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	native_unlock_hpte(hptep);
 
 	/* Ensure it is out of the tlb too. */
-	tlbie(va, psize, ssize, local);
+	tlbie(vpn, psize, ssize, local);
 
 	return ret;
 }
 
-static long native_hpte_find(unsigned long va, int psize, int ssize)
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 {
 	struct hash_pte *hptep;
 	unsigned long hash;
@@ -287,8 +287,8 @@ static long native_hpte_find(unsigned long va, int psize, int ssize)
 	long slot;
 	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
-	want_v = hpte_encode_v(va, psize, ssize);
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 
 	/* Bolted mappings are only ever in the primary group */
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -315,14 +315,15 @@ static long native_hpte_find(unsigned long va, int psize, int ssize)
 static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 				       int psize, int ssize)
 {
-	unsigned long vsid, va;
+	unsigned long vpn;
+	unsigned long vsid;
 	long slot;
 	struct hash_pte *hptep;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
-	slot = native_hpte_find(va, psize, ssize);
+	slot = native_hpte_find(vpn, psize, ssize);
 	if (slot == -1)
 		panic("could not find page to bolt\n");
 	hptep = htab_address + slot;
@@ -332,10 +333,10 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 		(newpp & (HPTE_R_PP | HPTE_R_N));
 
 	/* Ensure it is out of the tlb too. */
-	tlbie(va, psize, ssize, 0);
+	tlbie(vpn, psize, ssize, 0);
 }
 
-static void native_hpte_invalidate(unsigned long slot, unsigned long va,
+static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 				   int psize, int ssize, int local)
 {
 	struct hash_pte *hptep = htab_address + slot;
@@ -345,9 +346,9 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	local_irq_save(flags);
 
-	DBG_LOW("    invalidate(va=%016lx, hash: %lx)\n", va, slot);
+	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
 
-	want_v = hpte_encode_v(va, psize, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 	native_lock_hpte(hptep);
 	hpte_v = hptep->v;
 
@@ -359,7 +360,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 		hptep->v = 0;
 
 	/* Invalidate the TLB */
-	tlbie(va, psize, ssize, local);
+	tlbie(vpn, psize, ssize, local);
 
 	local_irq_restore(flags);
 }
@@ -369,7 +370,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 #define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
 
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
-			int *psize, int *ssize, unsigned long *va)
+			int *psize, int *ssize, unsigned long *vpn)
 {
 	unsigned long avpn, pteg, vpi;
 	unsigned long hpte_r = hpte->r;
@@ -419,7 +420,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -428,9 +429,9 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	default:
-		*va = size = 0;
+		*vpn = size = 0;
 	}
 	*psize = size;
 }
@@ -445,7 +446,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  */
 static void native_hpte_clear(void)
 {
-	unsigned long va = 0;
+	unsigned long vpn = 0;
 	unsigned long slot, slots, flags;
 	struct hash_pte *hptep = htab_address;
 	unsigned long hpte_v;
@@ -476,9 +477,9 @@ static void native_hpte_clear(void)
 		 * already hold the native_tlbie_lock.
 		 */
 		if (hpte_v & HPTE_V_VALID) {
-			hpte_decode(hptep, slot, &psize, &ssize, &va);
+			hpte_decode(hptep, slot, &psize, &ssize, &vpn);
 			hptep->v = 0;
-			__tlbie(va, psize, ssize);
+			__tlbie(vpn, psize, ssize);
 		}
 	}
 
@@ -493,7 +494,8 @@ static void native_hpte_clear(void)
  */
 static void native_flush_hash_range(unsigned long number, int local)
 {
-	unsigned long va, hash, index, hidx, shift, slot;
+	unsigned long vpn;
+	unsigned long hash, index, hidx, shift, slot;
 	struct hash_pte *hptep;
 	unsigned long hpte_v;
 	unsigned long want_v;
@@ -507,18 +509,18 @@ static void native_flush_hash_range(unsigned long number, int local)
 	local_irq_save(flags);
 
 	for (i = 0; i < number; i++) {
-		va = batch->vaddr[i];
+		vpn = batch->vpn[i];
 		pte = batch->pte[i];
 
-		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
-			hash = hpt_hash(va, shift, ssize);
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			hash = hpt_hash(vpn, shift, ssize);
 			hidx = __rpte_to_hidx(pte, index);
 			if (hidx & _PTEIDX_SECONDARY)
 				hash = ~hash;
 			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 			slot += hidx & _PTEIDX_GROUP_IX;
 			hptep = htab_address + slot;
-			want_v = hpte_encode_v(va, psize, ssize);
+			want_v = hpte_encode_v(vpn, psize, ssize);
 			native_lock_hpte(hptep);
 			hpte_v = hptep->v;
 			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
@@ -533,12 +535,12 @@ static void native_flush_hash_range(unsigned long number, int local)
 	    mmu_psize_defs[psize].tlbiel && local) {
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
-			va = batch->vaddr[i];
+			vpn = batch->vpn[i];
 			pte = batch->pte[i];
 
-			pte_iterate_hashed_subpages(pte, psize, va, index,
-						    shift) {
-				__tlbiel(va, psize, ssize);
+			pte_iterate_hashed_subpages(pte, psize,
+						    vpn, index, shift) {
+				__tlbiel(vpn, psize, ssize);
 			} pte_iterate_hashed_end();
 		}
 		asm volatile("ptesync":::"memory");
@@ -550,12 +552,12 @@ static void native_flush_hash_range(unsigned long number, int local)
 
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
-			va = batch->vaddr[i];
+			vpn = batch->vpn[i];
 			pte = batch->pte[i];
 
-			pte_iterate_hashed_subpages(pte, psize, va, index,
-						    shift) {
-				__tlbie(va, psize, ssize);
+			pte_iterate_hashed_subpages(pte, psize,
+						    vpn, index, shift) {
+				__tlbie(vpn, psize, ssize);
 			} pte_iterate_hashed_end();
 		}
 		asm volatile("eieio; tlbsync; ptesync":::"memory");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 975c7d1..74c5479 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,18 +192,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	     vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
-		unsigned long va  = hpt_vpn(vaddr, vsid, ssize);
+		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
 		/* Make kernel text executable */
 		if (overlaps_kernel_text(vaddr, vaddr + step))
 			tprot &= ~HPTE_R_N;
 
-		hash = hpt_hash(va, shift, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
 		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
 		BUG_ON(!ppc_md.hpte_insert);
-		ret = ppc_md.hpte_insert(hpteg, va, paddr, tprot,
+		ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
 					 HPTE_V_BOLTED, psize, ssize);
 
 		if (ret < 0)
@@ -1153,21 +1153,21 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 /* WARNING: This is called from hash_low_64.S, if you change this prototype,
  *          do not forget to update the assembly call site !
  */
-void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize,
+void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
 		     int local)
 {
 	unsigned long hash, index, shift, hidx, slot;
 
-	DBG_LOW("flush_hash_page(va=%016lx)\n", va);
-	pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
-		hash = hpt_hash(va, shift, ssize);
+	DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
+	pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+		hash = hpt_hash(vpn, shift, ssize);
 		hidx = __rpte_to_hidx(pte, index);
 		if (hidx & _PTEIDX_SECONDARY)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += hidx & _PTEIDX_GROUP_IX;
 		DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
-		ppc_md.hpte_invalidate(slot, va, psize, ssize, local);
+		ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local);
 	} pte_iterate_hashed_end();
 }
 
@@ -1181,7 +1181,7 @@ void flush_hash_range(unsigned long number, int local)
 			&__get_cpu_var(ppc64_tlb_batch);
 
 		for (i = 0; i < number; i++)
-			flush_hash_page(batch->vaddr[i], batch->pte[i],
+			flush_hash_page(batch->vpn[i], batch->pte[i],
 					batch->psize, batch->ssize, local);
 	}
 }
@@ -1208,14 +1208,14 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hpteg;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 	unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
 	int ret;
 
-	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
+	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
-	ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr),
+	ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr),
 				 mode, HPTE_V_BOLTED,
 				 mmu_linear_psize, mmu_kernel_ssize);
 	BUG_ON (ret < 0);
@@ -1229,9 +1229,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hidx, slot;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 
-	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
+	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
 	spin_lock(&linear_map_hash_lock);
 	BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
 	hidx = linear_map_hash_slots[lmi] & 0x7f;
@@ -1241,7 +1241,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 		hash = ~hash;
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 	slot += hidx & _PTEIDX_GROUP_IX;
-	ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, mmu_kernel_ssize, 0);
+	ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0);
 }
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 1331403..cecad34 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -18,14 +18,15 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, int local, int ssize,
 		     unsigned int shift, unsigned int mmu_psize)
 {
+	unsigned long vpn;
 	unsigned long old_pte, new_pte;
-	unsigned long va, rflags, pa, sz;
+	unsigned long rflags, pa, sz;
 	long slot;
 
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
 	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
@@ -69,19 +70,19 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		/* There MIGHT be an HPTE for this pte */
 		unsigned long hash, slot;
 
-		hash = hpt_hash(va, shift, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
 		if (old_pte & _PAGE_F_SECOND)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += (old_pte & _PAGE_F_GIX) >> 12;
 
-		if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
+		if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
 					 ssize, local) == -1)
 			old_pte &= ~_PAGE_HPTEFLAGS;
 	}
 
 	if (likely(!(old_pte & _PAGE_HASHPTE))) {
-		unsigned long hash = hpt_hash(va, shift, ssize);
+		unsigned long hash = hpt_hash(vpn, shift, ssize);
 		unsigned long hpte_group;
 
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -101,14 +102,14 @@ repeat:
 				      _PAGE_COHERENT | _PAGE_GUARDED));
 
 		/* Insert into the hash table, primary slot */
-		slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+		slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
 					  mmu_psize, ssize);
 
 		/* Primary is full, try the secondary */
 		if (unlikely(slot == -1)) {
 			hpte_group = ((~hash & htab_hash_mask) *
 				      HPTES_PER_GROUP) & ~0x7UL;
-			slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
+			slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
 						  HPTE_V_SECONDARY,
 						  mmu_psize, ssize);
 			if (slot == -1) {
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 321c585..ae758b3 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -42,8 +42,9 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, unsigned long pte, int huge)
 {
+	unsigned long vpn;
 	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
-	unsigned long vsid, vaddr;
+	unsigned long vsid;
 	unsigned int psize;
 	int ssize;
 	real_pte_t rpte;
@@ -86,7 +87,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
 	}
-	vaddr = hpt_vpn(addr, vsid, ssize);
+	vpn = hpt_vpn(addr, vsid, ssize);
 	rpte = __real_pte(__pte(pte), ptep);
 
 	/*
@@ -96,7 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	 * and decide to use local invalidates instead...
 	 */
 	if (!batch->active) {
-		flush_hash_page(vaddr, rpte, psize, ssize, 0);
+		flush_hash_page(vpn, rpte, psize, ssize, 0);
 		put_cpu_var(ppc64_tlb_batch);
 		return;
 	}
@@ -122,7 +123,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		batch->ssize = ssize;
 	}
 	batch->pte[i] = rpte;
-	batch->vaddr[i] = vaddr;
+	batch->vpn[i] = vpn;
 	batch->index = ++i;
 	if (i >= PPC64_TLB_BATCH_NR)
 		__flush_tlb_pending(batch);
@@ -146,7 +147,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
 	if (cpumask_equal(mm_cpumask(batch->mm), tmp))
 		local = 1;
 	if (i == 1)
-		flush_hash_page(batch->vaddr[0], batch->pte[0],
+		flush_hash_page(batch->vpn[0], batch->pte[0],
 				batch->psize, batch->ssize, local);
 	else
 		flush_hash_range(i, local);
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index c8c7bf6..0f6f839 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -88,7 +88,7 @@ static inline unsigned int beat_read_mask(unsigned hpte_group)
 }
 
 static long beat_lpar_hpte_insert(unsigned long hpte_group,
-				  unsigned long va, unsigned long pa,
+				  unsigned long vpn, unsigned long pa,
 				  unsigned long rflags, unsigned long vflags,
 				  int psize, int ssize)
 {
@@ -103,7 +103,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
 			"rflags=%lx, vflags=%lx, psize=%d)\n",
 		hpte_group, va, pa, rflags, vflags, psize);
 
-	hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+	hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
 		vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
@@ -184,14 +184,14 @@ static void beat_lpar_hptab_clear(void)
  */
 static long beat_lpar_hpte_updatepp(unsigned long slot,
 				    unsigned long newpp,
-				    unsigned long va,
+				    unsigned long vpn,
 				    int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
 	u64 dummy0, dummy1;
 	unsigned long want_v;
 
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 
 	DBG_LOW("    update: "
 		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
@@ -220,15 +220,15 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
 	return 0;
 }
 
-static long beat_lpar_hpte_find(unsigned long va, int psize)
+static long beat_lpar_hpte_find(unsigned long vpn, int psize)
 {
 	unsigned long hash;
 	unsigned long i, j;
 	long slot;
 	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(va, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 
 	for (j = 0; j < 2; j++) {
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -255,14 +255,15 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 					  unsigned long ea,
 					  int psize, int ssize)
 {
-	unsigned long lpar_rc, slot, vsid, va;
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid;
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
+	vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
-	slot = beat_lpar_hpte_find(va, psize);
+	slot = beat_lpar_hpte_find(vpn, psize);
 	BUG_ON(slot == -1);
 
 	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
@@ -272,7 +273,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	BUG_ON(lpar_rc != 0);
 }
 
-static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 					 int psize, int ssize, int local)
 {
 	unsigned long want_v;
@@ -282,7 +283,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
 		slot, va, psize, local);
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 
 	raw_spin_lock_irqsave(&beat_htab_lock, flags);
 	dummy1 = beat_lpar_hpte_getword0(slot);
@@ -311,7 +312,7 @@ void __init hpte_init_beat(void)
 }
 
 static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
-				  unsigned long va, unsigned long pa,
+				  unsigned long vpn, unsigned long pa,
 				  unsigned long rflags, unsigned long vflags,
 				  int psize, int ssize)
 {
@@ -322,11 +323,11 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
 		return -1;
 
 	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+		DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
 			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, va, pa, rflags, vflags, psize);
+		hpte_group, vpn, pa, rflags, vflags, psize);
 
-	hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+	hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
 		vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
@@ -364,14 +365,14 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
  */
 static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 				    unsigned long newpp,
-				    unsigned long va,
+				    unsigned long vpn,
 				    int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
 	unsigned long want_v;
 	unsigned long pss;
 
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
 
 	DBG_LOW("    update: "
@@ -392,16 +393,16 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 	return 0;
 }
 
-static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long va,
+static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
 					 int psize, int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
 	unsigned long pss;
 
-	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
-		slot, va, psize, local);
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	DBG_LOW("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		slot, vpn, psize, local);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
 
 	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 3124cf7..d00d7b0 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -43,7 +43,7 @@ enum ps3_lpar_vas_id {
 
 static DEFINE_SPINLOCK(ps3_htab_lock);
 
-static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
+static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 	unsigned long pa, unsigned long rflags, unsigned long vflags,
 	int psize, int ssize)
 {
@@ -61,7 +61,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
 	 */
 	vflags &= ~HPTE_V_SECONDARY;
 
-	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+	hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize) | rflags;
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
@@ -75,8 +75,8 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
 
 	if (result) {
 		/* all entries bolted !*/
-		pr_info("%s:result=%d va=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
-			__func__, result, va, pa, hpte_group, hpte_v, hpte_r);
+		pr_info("%s:result=%d vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
+			__func__, result, vpn, pa, hpte_group, hpte_v, hpte_r);
 		BUG();
 	}
 
@@ -107,7 +107,7 @@ static long ps3_hpte_remove(unsigned long hpte_group)
 }
 
 static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
-	unsigned long va, int psize, int ssize, int local)
+	unsigned long vpn, int psize, int ssize, int local)
 {
 	int result;
 	u64 hpte_v, want_v, hpte_rs;
@@ -115,7 +115,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	unsigned long flags;
 	long ret;
 
-	want_v = hpte_encode_v(va, psize, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
 
@@ -125,8 +125,8 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
 				       &hpte_rs);
 
 	if (result) {
-		pr_info("%s: res=%d read va=%lx slot=%lx psize=%d\n",
-			__func__, result, va, slot, psize);
+		pr_info("%s: res=%d read vpn=%lx slot=%lx psize=%d\n",
+			__func__, result, vpn, slot, psize);
 		BUG();
 	}
 
@@ -159,7 +159,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	panic("ps3_hpte_updateboltedpp() not implemented");
 }
 
-static void ps3_hpte_invalidate(unsigned long slot, unsigned long va,
+static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	int psize, int ssize, int local)
 {
 	unsigned long flags;
@@ -170,8 +170,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long va,
 	result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0);
 
 	if (result) {
-		pr_info("%s: res=%d va=%lx slot=%lx psize=%d\n",
-			__func__, result, va, slot, psize);
+		pr_info("%s: res=%d vpn=%lx slot=%lx psize=%d\n",
+			__func__, result, vpn, slot, psize);
 		BUG();
 	}
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2127529..8308b25 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -108,9 +108,9 @@ void vpa_init(int cpu)
 }
 
 static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- 			      unsigned long va, unsigned long pa,
- 			      unsigned long rflags, unsigned long vflags,
-			      int psize, int ssize)
+				     unsigned long vpn, unsigned long pa,
+				     unsigned long rflags, unsigned long vflags,
+				     int psize, int ssize)
 {
 	unsigned long lpar_rc;
 	unsigned long flags;
@@ -118,11 +118,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	unsigned long hpte_v, hpte_r;
 
 	if (!(vflags & HPTE_V_BOLTED))
-		pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
-			 "rflags=%lx, vflags=%lx, psize=%d)\n",
-			 hpte_group, va, pa, rflags, vflags, psize);
+		pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+			 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+			 hpte_group, vpn,  pa, rflags, vflags, psize);
 
-	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+	hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED))
@@ -234,14 +234,14 @@ static void pSeries_lpar_hptab_clear(void)
  */
 static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 				       unsigned long newpp,
-				       unsigned long va,
+				       unsigned long vpn,
 				       int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
 	unsigned long flags = (newpp & 7) | H_AVPN;
 	unsigned long want_v;
 
-	want_v = hpte_encode_avpn(va, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
 		 want_v, slot, flags, psize);
@@ -279,15 +279,15 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
 	return dword0;
 }
 
-static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize)
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
 {
 	unsigned long hash;
 	unsigned long i;
 	long slot;
 	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
-	want_v = hpte_encode_avpn(va, psize, ssize);
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	/* Bolted entries are always in the primary group */
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -307,12 +307,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 					     unsigned long ea,
 					     int psize, int ssize)
 {
-	unsigned long lpar_rc, slot, vsid, va, flags;
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid, flags;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
-	slot = pSeries_lpar_hpte_find(va, psize, ssize);
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
 	BUG_ON(slot == -1);
 
 	flags = newpp & 7;
@@ -321,17 +322,17 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 	BUG_ON(lpar_rc != H_SUCCESS);
 }
 
-static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 					 int psize, int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
 	unsigned long dummy1, dummy2;
 
-	pr_devel("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
-		 slot, va, psize, local);
+	pr_devel("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		 slot, vpn, psize, local);
 
-	want_v = hpte_encode_avpn(va, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 	lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
 	if (lpar_rc == H_NOT_FOUND)
 		return;
@@ -342,15 +343,16 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 					   int psize, int ssize)
 {
-	unsigned long slot, vsid, va;
+	unsigned long vpn;
+	unsigned long slot, vsid;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
-	slot = pSeries_lpar_hpte_find(va, psize, ssize);
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
 	BUG_ON(slot == -1);
 
-	pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0);
+	pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
 }
 
 /* Flag bits for H_BULK_REMOVE */
@@ -366,12 +368,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
  */
 static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 {
+	unsigned long vpn;
 	unsigned long i, pix, rc;
 	unsigned long flags = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 	unsigned long param[9];
-	unsigned long va;
 	unsigned long hash, index, shift, hidx, slot;
 	real_pte_t pte;
 	int psize, ssize;
@@ -383,21 +385,21 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 	ssize = batch->ssize;
 	pix = 0;
 	for (i = 0; i < number; i++) {
-		va = batch->vaddr[i];
+		vpn = batch->vpn[i];
 		pte = batch->pte[i];
-		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
-			hash = hpt_hash(va, shift, ssize);
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			hash = hpt_hash(vpn, shift, ssize);
 			hidx = __rpte_to_hidx(pte, index);
 			if (hidx & _PTEIDX_SECONDARY)
 				hash = ~hash;
 			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 			slot += hidx & _PTEIDX_GROUP_IX;
 			if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
-				pSeries_lpar_hpte_invalidate(slot, va, psize,
+				pSeries_lpar_hpte_invalidate(slot, vpn, psize,
 							     ssize, local);
 			} else {
 				param[pix] = HBR_REQUEST | HBR_AVPN | slot;
-				param[pix+1] = hpte_encode_avpn(va, psize,
+				param[pix+1] = hpte_encode_avpn(vpn, psize,
 								ssize);
 				pix += 2;
 				if (pix == 8) {
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 08/12] arch/powerpc: Make some of the PGTABLE_RANGE dependency explicit
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

slice array size and slice mask size depend on PGTABLE_RANGE. We
can't directly include pgtable.h in these header because there is
a circular dependency. So add compile time check for these values.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h    |   13 ++++++++-----
 arch/powerpc/include/asm/page_64.h       |   16 ++++++++++++----
 arch/powerpc/include/asm/pgtable-ppc64.h |    8 ++++++++
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 8f05eec..8c5c5a4 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -420,6 +420,13 @@ extern void slb_set_size(u16 size);
 	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
 	add	rt,rt,rx
 
+/* 4 bits per slice and we have one slice per 1TB */
+#if 0 /* We can't directly include pgtable.h hence this hack */
+#define SLICE_ARRAY_SIZE  (PGTABLE_RANGE >> 41)
+#else
+/* Right now we only support 64TB */
+#define SLICE_ARRAY_SIZE  32
+#endif
 
 #ifndef __ASSEMBLY__
 
@@ -464,11 +471,7 @@ typedef struct {
 
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;	/* SLB page size encodings */
-	/*
-	 * Right now we support 64TB and 4 bits for each
-	 * 1TB slice we need 32 bytes for 64TB.
-	 */
-	unsigned char high_slices_psize[32];  /* 4 bits per slice for now */
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
 #else
 	u16 sllp;		/* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 6c9bef4..b55beb4 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -78,14 +78,22 @@ extern u64 ppc64_pft_size;
 #define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
 
+/* 1 bit per slice and we have one slice per 1TB */
+#if 0 /* We can't directly include pgtable.h hence this hack */
+#define SLICE_MASK_SIZE (PGTABLE_RANGE >> 43)
+#else
+/*
+ * Right now we support only 64TB.
+ * IF we change this we will have to change the type
+ * of high_slices
+ */
+#define SLICE_MASK_SIZE 8
+#endif
+
 #ifndef __ASSEMBLY__
 
 struct slice_mask {
 	u16 low_slices;
-	/*
-	 * This should be derived out of PGTABLE_RANGE. For the current
-	 * max 64TB, u64 should be ok.
-	 */
 	u64 high_slices;
 };
 
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 8af1cf2..dea953f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -32,6 +32,14 @@
 #endif
 #endif
 
+#if (PGTABLE_RANGE >> 41) > SLICE_ARRAY_SIZE
+#error PGTABLE_RANGE exceeds SLICE_ARRAY_SIZE
+#endif
+
+#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
+#error PGTABLE_RANGE exceeds slice_mask high_slices size
+#endif
+
 /*
  * Define the address range of the kernel non-linear virtual area
  */
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 03/12] arch/powerpc: Simplify hpte_decode
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch simplify hpte_decode for easy switching of virtual address to
virtual page number in the later patch

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c |   49 ++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 90039bc..660b8bb 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -351,9 +351,10 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *ssize, unsigned long *va)
 {
+	unsigned long avpn, pteg, vpi;
 	unsigned long hpte_r = hpte->r;
 	unsigned long hpte_v = hpte->v;
-	unsigned long avpn;
+	unsigned long vsid, seg_off;
 	int i, size, shift, penc;
 
 	if (!(hpte_v & HPTE_V_LARGE))
@@ -380,32 +381,38 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 	}
 
 	/* This works for all page sizes, and for 256M and 1T segments */
+	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 	shift = mmu_psize_defs[size].shift;
-	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
-
-	if (shift < 23) {
-		unsigned long vpi, vsid, pteg;
 
-		pteg = slot / HPTES_PER_GROUP;
-		if (hpte_v & HPTE_V_SECONDARY)
-			pteg = ~pteg;
-		switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
-		case MMU_SEGSIZE_256M:
-			vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
-			break;
-		case MMU_SEGSIZE_1T:
-			vsid = avpn >> 40;
+	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
+	pteg = slot / HPTES_PER_GROUP;
+	if (hpte_v & HPTE_V_SECONDARY)
+		pteg = ~pteg;
+
+	switch (*ssize) {
+	case MMU_SEGSIZE_256M:
+		/* We only have 28 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1f) << 23;
+		vsid    =  avpn >> 5;
+		/* We can find more bits from the pteg value */
+		if (shift < 23) {
+			vpi = (vsid ^ pteg) & htab_hash_mask;
+			seg_off |= vpi << shift;
+		}
+		*va = vsid << SID_SHIFT | seg_off;
+	case MMU_SEGSIZE_1T:
+		/* We only have 40 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1ffff) << 23;
+		vsid    = avpn >> 17;
+		if (shift < 23) {
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
-			break;
-		default:
-			avpn = vpi = size = 0;
+			seg_off |= vpi << shift;
 		}
-		avpn |= (vpi << mmu_psize_defs[size].shift);
+		*va = vsid << SID_SHIFT_1T | seg_off;
+	default:
+		*va = size = 0;
 	}
-
-	*va = avpn;
 	*psize = size;
-	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 }
 
 /*
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 10/12] arch/powerpc: Use 32bit array for slb cache
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

With larger vsid we need to track more bits of ESID in slb cache
for slb invalidate.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/paca.h |    2 +-
 arch/powerpc/mm/slb_low.S       |    8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index daf813f..3e7abba 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -100,7 +100,7 @@ struct paca_struct {
 	/* SLB related definitions */
 	u16 vmalloc_sllp;
 	u16 slb_cache_ptr;
-	u16 slb_cache[SLB_CACHE_ENTRIES];
+	u32 slb_cache[SLB_CACHE_ENTRIES];
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 #ifdef CONFIG_PPC_BOOK3E
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 3b75f19..f6a2625 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -270,10 +270,10 @@ _GLOBAL(slb_compare_rr_to_size)
 	bge	1f
 
 	/* still room in the slb cache */
-	sldi	r11,r3,1		/* r11 = offset * sizeof(u16) */
-	rldicl	r10,r10,36,28		/* get low 16 bits of the ESID */
-	add	r11,r11,r13		/* r11 = (u16 *)paca + offset */
-	sth	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
+	sldi	r11,r3,2		/* r11 = offset * sizeof(u32) */
+	srdi    r10,r10,28		/* get the 36 bits of the ESID */
+	add	r11,r11,r13		/* r11 = (u32 *)paca + offset */
+	stw	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
 	addi	r3,r3,1			/* offset++ */
 	b	2f
 1:					/* offset >= SLB_CACHE_ENTRIES */
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 11/12] arch/powerpc: Add 64TB support
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Increase max addressable range to 64TB. This is not tested on
real hardware yet.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h        |   42 ++++++++++++++++++++------
 arch/powerpc/include/asm/pgtable-ppc64-4k.h  |    2 +-
 arch/powerpc/include/asm/pgtable-ppc64-64k.h |    2 +-
 arch/powerpc/include/asm/processor.h         |    4 +--
 arch/powerpc/include/asm/sparsemem.h         |    4 +--
 arch/powerpc/kernel/exceptions-64s.S         |    4 ++-
 arch/powerpc/mm/slb_low.S                    |   12 ++++++++
 7 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 8c5c5a4..5c52691 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -374,17 +374,21 @@ extern void slb_set_size(u16 size);
  * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
  */
 
-#define VSID_MULTIPLIER_256M	ASM_CONST(200730139)	/* 28-bit prime */
-#define VSID_BITS_256M		36
+/*
+ * This should be computed such that protovosid * vsid_mulitplier
+ * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
+ */
+#define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
+#define VSID_BITS_256M		38
 #define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		24
+#define VSID_BITS_1T		26
 #define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
 
 #define CONTEXT_BITS		19
-#define USER_ESID_BITS		16
-#define USER_ESID_BITS_1T	4
+#define USER_ESID_BITS		18
+#define USER_ESID_BITS_1T	6
 
 #define USER_VSID_RANGE	(1UL << (USER_ESID_BITS + SID_SHIFT))
 
@@ -507,12 +511,32 @@ typedef struct {
 	})
 #endif /* 1 */
 
-/* This is only valid for addresses >= PAGE_OFFSET */
+/*
+ * This is only valid for addresses >= PAGE_OFFSET
+ * The proto-VSID space is divided into two class
+ * User:   0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1
+ * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1
+ *
+ * With KERNEL_START at 0xc000000000000000, the proto vsid for
+ * the kernel ends up with 0xc00000000 (36 bits). With 64TB
+ * support we need to have kernel proto-VSID in the
+ * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS.
+ */
 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
-	if (ssize == MMU_SEGSIZE_256M)
-		return vsid_scramble(ea >> SID_SHIFT, 256M);
-	return vsid_scramble(ea >> SID_SHIFT_1T, 1T);
+	unsigned long proto_vsid;
+	/*
+	 * We need to make sure proto_vsid for the kernel is
+	 * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T])
+	 */
+	if (ssize == MMU_SEGSIZE_256M) {
+		proto_vsid = ea >> SID_SHIFT;
+		proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS));
+		return vsid_scramble(proto_vsid, 256M);
+	}
+	proto_vsid = ea >> SID_SHIFT_1T;
+	proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T));
+	return vsid_scramble(proto_vsid, 1T);
 }
 
 /* Returns the segment size indicator for a user address */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
index 6eefdcf..b3eccf2 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
@@ -7,7 +7,7 @@
  */
 #define PTE_INDEX_SIZE  9
 #define PMD_INDEX_SIZE  7
-#define PUD_INDEX_SIZE  7
+#define PUD_INDEX_SIZE  9
 #define PGD_INDEX_SIZE  9
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index 90533dd..be4e287 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -7,7 +7,7 @@
 #define PTE_INDEX_SIZE  12
 #define PMD_INDEX_SIZE  12
 #define PUD_INDEX_SIZE	0
-#define PGD_INDEX_SIZE  4
+#define PGD_INDEX_SIZE  6
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 413a5ea..ac3861b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -97,8 +97,8 @@ extern struct task_struct *last_task_used_spe;
 #endif
 
 #ifdef CONFIG_PPC64
-/* 64-bit user address space is 44-bits (16TB user VM) */
-#define TASK_SIZE_USER64 (0x0000100000000000UL)
+/* 64-bit user address space is 46-bits (64TB user VM) */
+#define TASK_SIZE_USER64 (0x0000400000000000UL)
 
 /* 
  * 32-bit user address space is 4GB - 1 page 
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 0c5fa31..f6fc0ee 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -10,8 +10,8 @@
  */
 #define SECTION_SIZE_BITS       24
 
-#define MAX_PHYSADDR_BITS       44
-#define MAX_PHYSMEM_BITS        44
+#define MAX_PHYSADDR_BITS       46
+#define MAX_PHYSMEM_BITS        46
 
 #endif /* CONFIG_SPARSEMEM */
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c06d29..40ed208 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -958,7 +958,9 @@ _GLOBAL(do_stab_bolted)
 	rldimi	r10,r11,7,52	/* r10 = first ste of the group */
 
 	/* Calculate VSID */
-	/* This is a kernel address, so protovsid = ESID */
+	/* This is a kernel address, so protovsid = ESID | 1 << 37 */
+	li	r9,0x1
+	rldimi  r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0
 	ASM_VSID_SCRAMBLE(r11, r9, 256M)
 	rldic	r9,r11,12,16	/* r9 = vsid << 12 */
 
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index f6a2625..1a16ca2 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -56,6 +56,12 @@ _GLOBAL(slb_allocate_realmode)
 	 */
 _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
+	li	r9,0x1
+	/*
+	 * for 1T we shift 12 bits more.  slb_finish_load_1T will do
+	 * the necessary adjustment
+	 */
+	rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
@@ -85,6 +91,12 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
 	_GLOBAL(slb_miss_kernel_load_io)
 	li	r11,0
 6:
+	li	r9,0x1
+	/*
+	 * for 1T we shift 12 bits more.  slb_finish_load_1T will do
+	 * the necessary adjustment
+	 */
+	rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V6 12/12] arch/powerpc: Update VSID allocation documentation
From: Aneesh Kumar K.V @ 2012-08-01 16:13 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1343837623-9046-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This update the proto-VSID and VSID scramble related information
to be more generic by using names instead of current values.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h |   40 ++++++++++++++-------------------
 arch/powerpc/mm/mmu_context_hash64.c  |    8 ++++---
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 5c52691..8dfe31a 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -328,51 +328,45 @@ extern void slb_set_size(u16 size);
 #endif /* __ASSEMBLY__ */
 
 /*
- * VSID allocation
+ * VSID allocation (256MB segment)
  *
- * We first generate a 36-bit "proto-VSID".  For kernel addresses this
- * is equal to the ESID, for user addresses it is:
- *	(context << 15) | (esid & 0x7fff)
+ * We first generate a 38-bit "proto-VSID".  For kernel addresses this
+ * is equal to the ESID | 1 << 37, for user addresses it is:
+ *	(context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1)
  *
- * The two forms are distinguishable because the top bit is 0 for user
- * addresses, whereas the top two bits are 1 for kernel addresses.
- * Proto-VSIDs with the top two bits equal to 0b10 are reserved for
- * now.
+ * This splits the proto-VSID into the below range
+ *  0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range
+ *  2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range
+ *
+ * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1
+ * That is, we assign half of the space to user processes and half
+ * to the kernel.
  *
  * The proto-VSIDs are then scrambled into real VSIDs with the
  * multiplicative hash:
  *
  *	VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
- *	where	VSID_MULTIPLIER = 268435399 = 0xFFFFFC7
- *		VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF
  *
- * This scramble is only well defined for proto-VSIDs below
- * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are
- * reserved.  VSID_MULTIPLIER is prime, so in particular it is
+ * VSID_MULTIPLIER is prime, so in particular it is
  * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
  * Because the modulus is 2^n-1 we can compute it efficiently without
  * a divide or extra multiply (see below).
  *
  * This scheme has several advantages over older methods:
  *
- * 	- We have VSIDs allocated for every kernel address
+ *	- We have VSIDs allocated for every kernel address
  * (i.e. everything above 0xC000000000000000), except the very top
  * segment, which simplifies several things.
  *
- *	- We allow for 16 significant bits of ESID and 19 bits of
- * context for user addresses.  i.e. 16T (44 bits) of address space for
- * up to half a million contexts.
+ *	- We allow for USER_ESID_BITS significant bits of ESID and
+ * CONTEXT_BITS  bits of context for user addresses.
+ *  i.e. 64T (46 bits) of address space for up to half a million contexts.
  *
- * 	- The scramble function gives robust scattering in the hash
+ *	- The scramble function gives robust scattering in the hash
  * table (at least based on some initial results).  The previous
  * method was more susceptible to pathological cases giving excessive
  * hash collisions.
  */
-/*
- * WARNING - If you change these you must make sure the asm
- * implementations in slb_allocate (slb_low.S), do_stab_bolted
- * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
- */
 
 /*
  * This should be computed such that protovosid * vsid_mulitplier
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index daa076c..ceb32e3 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -30,9 +30,11 @@ static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
 /*
- * The proto-VSID space has 2^35 - 1 segments available for user mappings.
- * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
- * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
+ * 256MB segment
+ * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
+ * available for user mappings. Each segment contains 2^28 bytes. Each
+ * context maps 2^46 bytes so we can support 2^19-1 contexts
+ * (19 == 37 + 28 - 46).
  */
 #define MAX_CONTEXT	((1UL << CONTEXT_BITS) - 1)
 
-- 
1.7.10

^ permalink raw reply related

* Re: [PATCH v5 4/6] fsl-dma: move the function ahead of its invoke function
From: Ira W. Snyder @ 2012-08-01 16:31 UTC (permalink / raw)
  To: qiang.liu
  Cc: Vinod Koul, linux-kernel, dan.j.williams, herbert, linux-crypto,
	Dan Williams, linuxppc-dev, davem
In-Reply-To: <1343810983-25412-1-git-send-email-qiang.liu@freescale.com>

On Wed, Aug 01, 2012 at 04:49:43PM +0800, qiang.liu@freescale.com wrote:
> From: Qiang Liu <qiang.liu@freescale.com>
> 
> Move the function fsldma_cleanup_descriptor() and fsl_chan_xfer_ld_queue()
> ahead of its invoke function for avoiding redundant definition.
> 
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Li Yang <leoli@freescale.com>
> Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> ---
>  drivers/dma/fsldma.c |  252 +++++++++++++++++++++++++-------------------------
>  1 files changed, 124 insertions(+), 128 deletions(-)
> 
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 87f52c0..bb883c0 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -400,9 +400,6 @@ out_splice:
>  	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
>  }
> 
> -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan);
> -static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan);
> -

Please swap the order of this patch (patch 4/6) and the previous patch
(patch 3/6).

You added these lines in the patch 3/6 and deleted them here. If you
reverse the order of the patches, this doesn't happen.

Adding lines only to delete them in the next patch should be avoided.

>  /**
>   * fsldma_clean_completed_descriptor - free all descriptors which
>   * has been completed and acked
> @@ -519,6 +516,130 @@ fsldma_clean_running_descriptor(struct fsldma_chan *chan,
>  	return 0;
>  }
> 
> +/**
> + * fsl_chan_xfer_ld_queue - transfer any pending transactions
> + * @chan : Freescale DMA channel
> + *
> + * HARDWARE STATE: idle
> + * LOCKING: must hold chan->desc_lock
> + */
> +static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
> +{
> +	struct fsl_desc_sw *desc;
> +
> +	/*
> +	 * If the list of pending descriptors is empty, then we
> +	 * don't need to do any work at all
> +	 */
> +	if (list_empty(&chan->ld_pending)) {
> +		chan_dbg(chan, "no pending LDs\n");
> +		return;
> +	}
> +
> +	/*
> +	 * The DMA controller is not idle, which means that the interrupt
> +	 * handler will start any queued transactions when it runs after
> +	 * this transaction finishes
> +	 */
> +	if (!chan->idle) {
> +		chan_dbg(chan, "DMA controller still busy\n");
> +		return;
> +	}
> +
> +	/*
> +	 * If there are some link descriptors which have not been
> +	 * transferred, we need to start the controller
> +	 */
> +
> +	/*
> +	 * Move all elements from the queue of pending transactions
> +	 * onto the list of running transactions
> +	 */
> +	chan_dbg(chan, "idle, starting controller\n");
> +	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
> +	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
> +
> +	/*
> +	 * The 85xx DMA controller doesn't clear the channel start bit
> +	 * automatically at the end of a transfer. Therefore we must clear
> +	 * it in software before starting the transfer.
> +	 */
> +	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
> +		u32 mode;
> +
> +		mode = DMA_IN(chan, &chan->regs->mr, 32);
> +		mode &= ~FSL_DMA_MR_CS;
> +		DMA_OUT(chan, &chan->regs->mr, mode, 32);
> +	}
> +
> +	/*
> +	 * Program the descriptor's address into the DMA controller,
> +	 * then start the DMA transaction
> +	 */
> +	set_cdar(chan, desc->async_tx.phys);
> +	get_cdar(chan);
> +
> +	dma_start(chan);
> +	chan->idle = false;
> +}
> +
> +/**
> + * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
> + * @chan: Freescale DMA channel
> + * @desc: descriptor to cleanup and free
> + *
> + * This function is used on a descriptor which has been executed by the DMA
> + * controller. It will run any callbacks, submit any dependencies, and then
> + * free the descriptor.
> + */
> +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
> +{
> +	struct fsl_desc_sw *desc, *_desc;
> +	dma_cookie_t cookie = 0;
> +	dma_addr_t curr_phys = get_cdar(chan);
> +	int idle = dma_is_idle(chan);
> +	int seen_current = 0;
> +
> +	fsldma_clean_completed_descriptor(chan);
> +
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> +		/*
> +		 * do not advance past the current descriptor loaded into the
> +		 * hardware channel, subsequent descriptors are either in
> +		 * process or have not been submitted
> +		 */
> +		if (seen_current)
> +			break;
> +
> +		/*
> +		 * stop the search if we reach the current descriptor and the
> +		 * channel is busy
> +		 */
> +		if (desc->async_tx.phys == curr_phys) {
> +			seen_current = 1;
> +			if (!idle)
> +				break;
> +		}
> +
> +		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
> +
> +		if (fsldma_clean_running_descriptor(chan, desc))
> +			break;
> +	}
> +
> +	/*
> +	 * Start any pending transactions automatically
> +	 *
> +	 * In the ideal case, we keep the DMA controller busy while we go
> +	 * ahead and free the descriptors below.
> +	 */
> +	fsl_chan_xfer_ld_queue(chan);
> +
> +	if (cookie > 0)
> +		chan->common.completed_cookie = cookie;
> +}
> +
>  static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
>  {
>  	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
> @@ -932,131 +1053,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
>  }
> 
>  /**
> - * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
> - * @chan: Freescale DMA channel
> - * @desc: descriptor to cleanup and free
> - *
> - * This function is used on a descriptor which has been executed by the DMA
> - * controller. It will run any callbacks, submit any dependencies, and then
> - * free the descriptor.
> - */
> -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)
> -{
> -	struct fsl_desc_sw *desc, *_desc;
> -	dma_cookie_t cookie = 0;
> -	dma_addr_t curr_phys = get_cdar(chan);
> -	int idle = dma_is_idle(chan);
> -	int seen_current = 0;
> -
> -	fsldma_clean_completed_descriptor(chan);
> -
> -	/* Run the callback for each descriptor, in order */
> -	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> -		/*
> -		 * do not advance past the current descriptor loaded into the
> -		 * hardware channel, subsequent descriptors are either in
> -		 * process or have not been submitted
> -		 */
> -		if (seen_current)
> -			break;
> -
> -		/*
> -		 * stop the search if we reach the current descriptor and the
> -		 * channel is busy
> -		 */
> -		if (desc->async_tx.phys == curr_phys) {
> -			seen_current = 1;
> -			if (!idle)
> -				break;
> -		}
> -
> -		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
> -
> -		if (fsldma_clean_running_descriptor(chan, desc))
> -			break;
> -
> -	}
> -
> -	/*
> -	 * Start any pending transactions automatically
> -	 *
> -	 * In the ideal case, we keep the DMA controller busy while we go
> -	 * ahead and free the descriptors below.
> -	 */
> -	fsl_chan_xfer_ld_queue(chan);
> -
> -	if (cookie > 0)
> -		chan->common.completed_cookie = cookie;
> -}
> -
> -/**
> - * fsl_chan_xfer_ld_queue - transfer any pending transactions
> - * @chan : Freescale DMA channel
> - *
> - * HARDWARE STATE: idle
> - * LOCKING: must hold chan->desc_lock
> - */
> -static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
> -{
> -	struct fsl_desc_sw *desc;
> -
> -	/*
> -	 * If the list of pending descriptors is empty, then we
> -	 * don't need to do any work at all
> -	 */
> -	if (list_empty(&chan->ld_pending)) {
> -		chan_dbg(chan, "no pending LDs\n");
> -		return;
> -	}
> -
> -	/*
> -	 * The DMA controller is not idle, which means that the interrupt
> -	 * handler will start any queued transactions when it runs after
> -	 * this transaction finishes
> -	 */
> -	if (!chan->idle) {
> -		chan_dbg(chan, "DMA controller still busy\n");
> -		return;
> -	}
> -
> -	/*
> -	 * If there are some link descriptors which have not been
> -	 * transferred, we need to start the controller
> -	 */
> -
> -	/*
> -	 * Move all elements from the queue of pending transactions
> -	 * onto the list of running transactions
> -	 */
> -	chan_dbg(chan, "idle, starting controller\n");
> -	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
> -	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
> -
> -	/*
> -	 * The 85xx DMA controller doesn't clear the channel start bit
> -	 * automatically at the end of a transfer. Therefore we must clear
> -	 * it in software before starting the transfer.
> -	 */
> -	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
> -		u32 mode;
> -
> -		mode = DMA_IN(chan, &chan->regs->mr, 32);
> -		mode &= ~FSL_DMA_MR_CS;
> -		DMA_OUT(chan, &chan->regs->mr, mode, 32);
> -	}
> -
> -	/*
> -	 * Program the descriptor's address into the DMA controller,
> -	 * then start the DMA transaction
> -	 */
> -	set_cdar(chan, desc->async_tx.phys);
> -	get_cdar(chan);
> -
> -	dma_start(chan);
> -	chan->idle = false;
> -}
> -
> -/**
>   * fsl_dma_memcpy_issue_pending - Issue the DMA start command
>   * @chan : Freescale DMA channel
>   */
> --
> 1.7.5.1
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH v5 2/6] fsl-dma: remove attribute DMA_INTERRUPT of dmaengine
From: Ira W. Snyder @ 2012-08-01 16:35 UTC (permalink / raw)
  To: qiang.liu
  Cc: Vinod Koul, linux-kernel, dan.j.williams, herbert, linux-crypto,
	Dan Williams, linuxppc-dev, davem
In-Reply-To: <1343810948-25343-1-git-send-email-qiang.liu@freescale.com>

On Wed, Aug 01, 2012 at 04:49:08PM +0800, qiang.liu@freescale.com wrote:
> From: Qiang Liu <qiang.liu@freescale.com>
> 
> Delete attribute DMA_INTERRUPT because fsl-dma doesn't support this function,
> exception will be thrown if talitos is used to offload xor at the same time.
> 

I have no problem with this patch.

However, it ***WILL BREAK*** both drivers in drivers/misc/carma. Please
add my patch 7/7 titled "[PATCH 7/7] carma: remove unnecessary
DMA_INTERRUPT capability" to your series. I suggest placing it
immediately after this patch in your series.

The carma drivers use the fsldma driver exclusively.

> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Li Yang <leoli@freescale.com>
> Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> Acked-by: Ira W. Snyder <iws@ovro.caltech.edu>
> ---
>  drivers/dma/fsldma.c |   31 -------------------------------
>  1 files changed, 0 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 8f84761..4f2f212 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -543,35 +543,6 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
>  }
> 
>  static struct dma_async_tx_descriptor *
> -fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
> -{
> -	struct fsldma_chan *chan;
> -	struct fsl_desc_sw *new;
> -
> -	if (!dchan)
> -		return NULL;
> -
> -	chan = to_fsl_chan(dchan);
> -
> -	new = fsl_dma_alloc_descriptor(chan);
> -	if (!new) {
> -		chan_err(chan, "%s\n", msg_ld_oom);
> -		return NULL;
> -	}
> -
> -	new->async_tx.cookie = -EBUSY;
> -	new->async_tx.flags = flags;
> -
> -	/* Insert the link descriptor to the LD ring */
> -	list_add_tail(&new->node, &new->tx_list);
> -
> -	/* Set End-of-link to the last link descriptor of new list */
> -	set_ld_eol(chan, new);
> -
> -	return &new->async_tx;
> -}
> -
> -static struct dma_async_tx_descriptor *
>  fsl_dma_prep_memcpy(struct dma_chan *dchan,
>  	dma_addr_t dma_dst, dma_addr_t dma_src,
>  	size_t len, unsigned long flags)
> @@ -1352,12 +1323,10 @@ static int __devinit fsldma_of_probe(struct platform_device *op)
>  	fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);
> 
>  	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> -	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
>  	dma_cap_set(DMA_SG, fdev->common.cap_mask);
>  	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
>  	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
>  	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
> -	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
>  	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
>  	fdev->common.device_prep_dma_sg = fsl_dma_prep_sg;
>  	fdev->common.device_tx_status = fsl_tx_status;
> --
> 1.7.5.1
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH v5 3/6] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Ira W. Snyder @ 2012-08-01 17:25 UTC (permalink / raw)
  To: qiang.liu
  Cc: Vinod Koul, linux-kernel, dan.j.williams, herbert, linux-crypto,
	Dan Williams, linuxppc-dev, davem
In-Reply-To: <1343810957-25378-1-git-send-email-qiang.liu@freescale.com>

On Wed, Aug 01, 2012 at 04:49:17PM +0800, qiang.liu@freescale.com wrote:
> From: Qiang Liu <qiang.liu@freescale.com>
> 
> Fix the potential risk when enable config NET_DMA and ASYNC_TX.
> Async_tx is lack of support in current release process of dma descriptor,
> all descriptors will be released whatever is acked or no-acked by async_tx,
> so there is a potential race condition when dma engine is uesd by others
> clients (e.g. when enable NET_DMA to offload TCP).
> 
> In our case, a race condition which is raised when use both of talitos
> and dmaengine to offload xor is because napi scheduler will sync all
> pending requests in dma channels, it affects the process of raid operations
> due to ack_tx is not checked in fsl dma. The no-acked descriptor is freed
> which is submitted just now, as a dependent tx, this freed descriptor trigger
> BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().
> 
> TASK = ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0
> GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4 00000000 00000001
> GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4 ed576d98 00000000
> GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000 ed3015e8 c15a7aa0
> GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0 ef640c30 ecf41ca0
> NIP [c02b048c] async_tx_submit+0x6c/0x2b4
> LR [c02b068c] async_tx_submit+0x26c/0x2b4
> Call Trace:
> [ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable)
> [ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c
> [ecf41d20] [c0421064] async_copy_data+0xa0/0x17c
> [ecf41d70] [c0421cf4] __raid_run_ops+0x874/0xe10
> [ecf41df0] [c0426ee4] handle_stripe+0x820/0x25e8
> [ecf41e90] [c0429080] raid5d+0x3d4/0x5b4
> [ecf41f40] [c04329b8] md_thread+0x138/0x16c
> [ecf41f90] [c008277c] kthread+0x8c/0x90
> [ecf41ff0] [c0011630] kernel_thread+0x4c/0x68
> 
> Another major modification in this patch is the change to completed descriptors,
> there is a potential risk which caused by exception interrupt, all descriptors
> in ld_running list are seemed completed when an interrupt raised, it works fine
> under normal condition, but if there is an exception occured, it cannot work
> as our excepted. Hardware should not depend on s/w list, the right way is
> to read current descriptor address register to find the last completed
> descriptor. If an interrupt is raised by an error, all descriptors in ld_running
> should not be seemed finished, or these unfinished descriptors in ld_running
> will be released wrongly.
> 
> A simple way to reproduce,
> Enable dmatest first, then insert some bad descriptors which can trigger
> Programming Error interrupts before the good descriptors. Last, the good
> descriptors will be freed before they are processsed because of the exception
> intrerrupt.
> 
> Note: the bad descriptors are only for simulating an exception interrupt.
> This case can illustrate the potential risk in current fsl-dma very well.
> 

I've never managed to trigger a PE (programming error) interrupt on the
83xx hardware. Any time I intentionally caused an error, the hardware
wedged itself. The CB (channel busy) bit is stuck high, and cannot be
cleared without a hard reset of the board.

I agree the "snoop on the hardware" technique works. As far as I can
tell, you have implemented the code correctly.

The MPC8349EARM.pdf from Freescale indicates that the hardware will halt
in response to a programming error, and generate a PE interrupt. See
section 12.5.3.3 (pg 568).

The driver, as it is written, will never recover from such a condition.
Since you are complaining about this situation, do you intend to fix it?

> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dan Williams <dan.j.williams@gmail.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Li Yang <leoli@freescale.com>
> Cc: Ira W. Snyder <iws@ovro.caltech.edu>
> Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> ---
>  drivers/dma/fsldma.c |  242 +++++++++++++++++++++++++++++++++++---------------
>  drivers/dma/fsldma.h |    1 +
>  2 files changed, 172 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 4f2f212..87f52c0 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -400,6 +400,125 @@ out_splice:
>  	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
>  }
> 
> +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan);
> +static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan);
> +

As noted in my reply to patch 4/6, please swap the order of this patch
and the following patch.

These lines should not be added or removed in either patch.

> +/**
> + * fsldma_clean_completed_descriptor - free all descriptors which
> + * has been completed and acked
> + * @chan: Freescale DMA channel
> + *
> + * This function is used on all completed and acked descriptors.
> + * All descriptors should only be freed in this function.
> + */
> +static int
> +fsldma_clean_completed_descriptor(struct fsldma_chan *chan)

This should be 'static void'. It does not return an error code.

> +{
> +	struct fsl_desc_sw *desc, *_desc;
> +
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
> +
> +		if (async_tx_test_ack(&desc->async_tx)) {
> +			/* Remove from the list of transactions */
> +			list_del(&desc->node);
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p free\n", desc);
> +#endif
> +			dma_pool_free(chan->desc_pool, desc,
> +					desc->async_tx.phys);

This code appears in multiple places in the driver. Please consider
adding my patch 3/7 titled "[PATCH 3/7] fsl-dma: add
fsl_dma_free_descriptor() to reduce code duplication" to your patch
series.

> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * fsldma_run_tx_complete_actions - cleanup and free a single link descriptor

This documentation is incorrect. This code NEVER frees a descriptor.

> + * @chan: Freescale DMA channel
> + * @desc: descriptor to cleanup and free
> + * @cookie: Freescale DMA transaction identifier
> + *
> + * This function is used on a descriptor which has been executed by the DMA
> + * controller. It will run any callbacks, submit any dependencies.
> + */
> +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsl_desc_sw *desc,
> +		struct fsldma_chan *chan, dma_cookie_t cookie)

Please change the parameter order to:

static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
		struct fsl_desc_sw *desc, dma_cookie_t cookie)

Every other function in the driver uses this parameter order. Channel
comes first, then descriptor.

> +{
> +	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> +	struct device *dev = chan->common.device->dev;
> +	dma_addr_t src = get_desc_src(chan, desc);
> +	dma_addr_t dst = get_desc_dst(chan, desc);
> +	u32 len = get_desc_cnt(chan, desc);
> +
> +	BUG_ON(txd->cookie < 0);
> +
> +	if (txd->cookie > 0) {

It will significantly reduce your patch size if you move this if
statement to the function which calls this one. I've provided an example
down below, in the one place where this code is used.

> +		cookie = txd->cookie;
> +
> +		/* Run the link descriptor callback function */
> +		if (txd->callback) {
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p callback\n", desc);
> +#endif
> +			txd->callback(txd->callback_param);
> +		}
> +
> +		/* Unmap the dst buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> +				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> +			else
> +				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> +		}
> +
> +		/* Unmap the src buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> +				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> +			else
> +				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> +		}
> +	}
> +
> +	/* Run any dependencies */
> +	dma_run_dependencies(txd);
> +
> +	return cookie;
> +}
> +
> +/**
> + * fsldma_clean_running_descriptor - move the completed descriptor from
> + * ld_running to ld_completed
> + * @chan: Freescale DMA channel
> + * @desc: the descriptor which is completed
> + *
> + * Free the descriptor directly if acked by async_tx api, or move it to
> + * queue ld_completed.
> + */
> +static int

This code never returns an error code. It should be 'static void'.

> +fsldma_clean_running_descriptor(struct fsldma_chan *chan,
> +		struct fsl_desc_sw *desc)
> +{
> +	/* Remove from the list of transactions */
> +	list_del(&desc->node);
> +	/*
> +	 * the client is allowed to attach dependent operations
> +	 * until 'ack' is set
> +	 */
> +	if (!async_tx_test_ack(&desc->async_tx)) {
> +		/*
> +		 * Move this descriptor to the list of descriptors which is
> +		 * completed, but still awaiting the 'ack' bit to be set.
> +		 */
> +		list_add_tail(&desc->node, &chan->ld_completed);
> +		return 0;
> +	}
> +
> +	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
> +	return 0;
> +}
> +
>  static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
>  {
>  	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
> @@ -534,8 +653,10 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
> 
>  	chan_dbg(chan, "free all channel resources\n");
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptor(chan);
>  	fsldma_free_desc_list(chan, &chan->ld_pending);
>  	fsldma_free_desc_list(chan, &chan->ld_running);
> +	fsldma_free_desc_list(chan, &chan->ld_completed);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
>  	dma_pool_destroy(chan->desc_pool);
> @@ -819,46 +940,53 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
>   * controller. It will run any callbacks, submit any dependencies, and then
>   * free the descriptor.
>   */

This documentation is now wrong. This function no longer operates on a
single descriptor. It operates on all descriptors in ld_running and
ld_completed.

Please fix the documentation, and add locking notes.

> -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
> -				      struct fsl_desc_sw *desc)
> +static void fsldma_cleanup_descriptor(struct fsldma_chan *chan)

I think the name should change to fsldma_cleanup_descriptors(). It
cleans up one or more descriptors now.

>  {
> -	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> -	struct device *dev = chan->common.device->dev;
> -	dma_addr_t src = get_desc_src(chan, desc);
> -	dma_addr_t dst = get_desc_dst(chan, desc);
> -	u32 len = get_desc_cnt(chan, desc);
> +	struct fsl_desc_sw *desc, *_desc;
> +	dma_cookie_t cookie = 0;
> +	dma_addr_t curr_phys = get_cdar(chan);
> +	int idle = dma_is_idle(chan);
> +	int seen_current = 0;
> 

The hardware can advance quite a bit between here, where you save the
current descriptor address and idle status.

> -	/* Run the link descriptor callback function */
> -	if (txd->callback) {
> -#ifdef FSL_DMA_LD_DEBUG
> -		chan_dbg(chan, "LD %p callback\n", desc);
> -#endif
> -		txd->callback(txd->callback_param);
> -	}
> +	fsldma_clean_completed_descriptor(chan);
> 
> -	/* Run any dependencies */
> -	dma_run_dependencies(txd);
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> +		/*
> +		 * do not advance past the current descriptor loaded into the
> +		 * hardware channel, subsequent descriptors are either in
> +		 * process or have not been submitted
> +		 */
> +		if (seen_current)
> +			break;
> 
> -	/* Unmap the dst buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> -			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> -		else
> -			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> -	}
> +		/*
> +		 * stop the search if we reach the current descriptor and the
> +		 * channel is busy
> +		 */
> +		if (desc->async_tx.phys == curr_phys) {
> +			seen_current = 1;
> +			if (!idle)
> +				break;
> +		}

And here, where you check the current descriptor address and idle
status.

Should this change to:

if (desc->async_tx.phys == get_cdar(chan)) {
	seen_current = 1;
	if (!dma_is_idle(chan))
		break;
}

> +
> +		cookie = fsldma_run_tx_complete_actions(desc, chan, cookie);
> +

I would prefer if the code just kept track of the cookie here, rather
than passing it through this function call. This code also illustrates
how you can remove the "if (txd->cookie > 0)" check from
fsldma_run_tx_complete_actions() to reduce the patch size.

/*
 * Only descriptors with non-zero cookies need their completion
 * actions run.
 */
if (desc->async_tx.cookie > 0) {
	cookie = desc->async_tx.cookie;
	fsldma_run_tx_complete_actions(chan, desc);
	desc->async_tx.cookie = 0;
}

/* This descriptor has been ACKed, free it */
if (async_tx_test_ack(&desc->async_tx)) {
	fsl_dma_free_descriptor(chan, desc);
	continue;
}

/*
 * This descriptor was not ACKed, add it to the ld_completed
 * list, to be freed after the ACK bit is set.
 */
list_del(&desc->node);
list_add_tail(&desc->node, &chan->ld_completed);


> +		if (fsldma_clean_running_descriptor(chan, desc))
> +			break;
> 

This if statement will never trigger. fsldma_clean_running_descriptor()
only returns 0. It is useless.

> -	/* Unmap the src buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> -			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> -		else
> -			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
>  	}
> 
> -#ifdef FSL_DMA_LD_DEBUG
> -	chan_dbg(chan, "LD %p free\n", desc);
> -#endif
> -	dma_pool_free(chan->desc_pool, desc, txd->phys);
> +	/*
> +	 * Start any pending transactions automatically
> +	 *
> +	 * In the ideal case, we keep the DMA controller busy while we go
> +	 * ahead and free the descriptors below.
> +	 */
> +	fsl_chan_xfer_ld_queue(chan);
> +
> +	if (cookie > 0)
> +		chan->common.completed_cookie = cookie;
>  }
> 
>  /**
> @@ -954,11 +1082,15 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
>  	enum dma_status ret;
>  	unsigned long flags;
> 
> -	spin_lock_irqsave(&chan->desc_lock, flags);
>  	ret = dma_cookie_status(dchan, cookie, txstate);
> +	if (ret == DMA_SUCCESS)
> +		return ret;
> +
> +	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptor(chan);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
> -	return ret;
> +	return dma_cookie_status(dchan, cookie, txstate);
>  }
> 
>  /*----------------------------------------------------------------------------*/
> @@ -1035,52 +1167,19 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
>  static void dma_do_tasklet(unsigned long data)
>  {
>  	struct fsldma_chan *chan = (struct fsldma_chan *)data;
> -	struct fsl_desc_sw *desc, *_desc;
> -	LIST_HEAD(ld_cleanup);
>  	unsigned long flags;
> 
>  	chan_dbg(chan, "tasklet entry\n");
> 
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> 
> -	/* update the cookie if we have some descriptors to cleanup */
> -	if (!list_empty(&chan->ld_running)) {
> -		dma_cookie_t cookie;
> -
> -		desc = to_fsl_desc(chan->ld_running.prev);
> -		cookie = desc->async_tx.cookie;
> -		dma_cookie_complete(&desc->async_tx);
> -
> -		chan_dbg(chan, "completed_cookie=%d\n", cookie);
> -	}
> -
> -	/*
> -	 * move the descriptors to a temporary list so we can drop the lock
> -	 * during the entire cleanup operation
> -	 */
> -	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
> -
>  	/* the hardware is now idle and ready for more */
>  	chan->idle = true;
> 
> -	/*
> -	 * Start any pending transactions automatically
> -	 *
> -	 * In the ideal case, we keep the DMA controller busy while we go
> -	 * ahead and free the descriptors below.
> -	 */
> -	fsl_chan_xfer_ld_queue(chan);
> -	spin_unlock_irqrestore(&chan->desc_lock, flags);
> -
> -	/* Run the callback for each descriptor, in order */
> -	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
> +	/* Run all cleanup for this descriptor */

Nitpick. This should be:

/* Run cleanup for all descriptors */

> +	fsldma_cleanup_descriptor(chan);
> 
> -		/* Remove from the list of transactions */
> -		list_del(&desc->node);
> -
> -		/* Run all cleanup for this descriptor */
> -		fsldma_cleanup_descriptor(chan, desc);
> -	}
> +	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
>  	chan_dbg(chan, "tasklet exit\n");
>  }
> @@ -1262,6 +1361,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
>  	spin_lock_init(&chan->desc_lock);
>  	INIT_LIST_HEAD(&chan->ld_pending);
>  	INIT_LIST_HEAD(&chan->ld_running);
> +	INIT_LIST_HEAD(&chan->ld_completed);
>  	chan->idle = true;
> 
>  	chan->common.device = &fdev->common;
> diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
> index f5c3879..7ede908 100644
> --- a/drivers/dma/fsldma.h
> +++ b/drivers/dma/fsldma.h
> @@ -140,6 +140,7 @@ struct fsldma_chan {
>  	spinlock_t desc_lock;		/* Descriptor operation lock */
>  	struct list_head ld_pending;	/* Link descriptors queue */
>  	struct list_head ld_running;	/* Link descriptors queue */
> +	struct list_head ld_completed;	/* Link descriptors queue */

It may help to add some documentation here. It would have helped me to
review this patch. Something like this:

/*
 * Descriptors which are queued to run, but have not yet been handed
 * to the hardware for execution
 */
struct list_head ld_pending;

/*
 * Descriptors which are currently being executed by the hardware
 */
struct list_head ld_running;

/*
 * Descriptors which have finished execution by the hardware. These
 * descriptors have already had their cleanup actions run. They are
 * waiting for the ACK bit to be set by the async_tx API.
 */
struct list_head ld_completed;

>  	struct dma_chan common;		/* DMA common channel */
>  	struct dma_pool *desc_pool;	/* Descriptors pool */
>  	struct device *dev;		/* Channel device */
> --
> 1.7.5.1
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox