LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] vfio: enabled and supported on power (v7)
From: Alexey Kardashevskiy @ 2012-09-04  7:33 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, David Gibson
  Cc: Alexey Kardashevskiy, linuxppc-dev, Alex Williamson,
	Paul Mackerras
In-Reply-To: <20120821113534.GS29724@truffula.fritz.box>

Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/include/asm/iommu.h    |    3 +
 drivers/iommu/Kconfig               |    8 +
 drivers/vfio/Kconfig                |    6 +
 drivers/vfio/Makefile               |    1 +
 drivers/vfio/vfio_iommu_spapr_tce.c |  440 +++++++++++++++++++++++++++++++++++
 include/linux/vfio.h                |   29 +++
 6 files changed, 487 insertions(+)
 create mode 100644 drivers/vfio/vfio_iommu_spapr_tce.c

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 957a83f..c64bce7 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -66,6 +66,9 @@ struct iommu_table {
 	unsigned long  it_halfpoint; /* Breaking point for small/large allocs */
 	spinlock_t     it_lock;      /* Protects it_map */
 	unsigned long *it_map;       /* A simple allocation bitmap for now */
+#ifdef CONFIG_IOMMU_API
+	struct iommu_group *it_group;
+#endif
 };
 
 struct scatterlist;
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 3bd9fff..19cf2d9 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -162,4 +162,12 @@ config TEGRA_IOMMU_SMMU
 	  space through the SMMU (System Memory Management Unit)
 	  hardware included on Tegra SoCs.
 
+config SPAPR_TCE_IOMMU
+	bool "sPAPR TCE IOMMU Support"
+	depends on PPC_PSERIES
+	select IOMMU_API
+	help
+	  Enables bits of IOMMU API required by VFIO. The iommu_ops is
+	  still not implemented.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 7cd5dec..b464687 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1
 	depends on VFIO
 	default n
 
+config VFIO_IOMMU_SPAPR_TCE
+	tristate
+	depends on VFIO && SPAPR_TCE_IOMMU
+	default n
+
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
 	select VFIO_IOMMU_TYPE1 if X86
+	select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV
 	help
 	  VFIO provides a framework for secure userspace device drivers.
 	  See Documentation/vfio.txt for more details.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 2398d4a..72bfabc 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
+obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
new file mode 100644
index 0000000..21f1909
--- /dev/null
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -0,0 +1,440 @@
+/*
+ * VFIO: IOMMU DMA mapping support for TCE on POWER
+ *
+ * Copyright (C) 2012 IBM Corp.  All rights reserved.
+ *     Author: Alexey Kardashevskiy <aik@ozlabs.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from original vfio_iommu_x86.c:
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/err.h>
+#include <linux/vfio.h>
+#include <linux/spinlock.h>
+#include <asm/iommu.h>
+
+#define DRIVER_VERSION  "0.1"
+#define DRIVER_AUTHOR   "aik@ozlabs.ru"
+#define DRIVER_DESC     "VFIO IOMMU SPAPR TCE"
+
+
+/*
+ * SPAPR TCE API
+ */
+static void tce_free(struct iommu_table *tbl, unsigned long entry,
+		unsigned long tce)
+{
+	struct page *page = pfn_to_page(tce >> PAGE_SHIFT);
+
+	WARN_ON(!page);
+	if (page) {
+		if (tce & VFIO_SPAPR_TCE_WRITE)
+			SetPageDirty(page);
+		put_page(page);
+	}
+	ppc_md.tce_free(tbl, entry, 1);
+}
+
+static long tce_put(struct iommu_table *tbl,
+		unsigned long entry, uint64_t tce, uint32_t flags)
+{
+	int ret;
+	unsigned long oldtce, kva, offset;
+	struct page *page = NULL;
+	enum dma_data_direction direction = DMA_NONE;
+
+	switch (flags & VFIO_SPAPR_TCE_PUT_MASK) {
+	case VFIO_SPAPR_TCE_READ:
+		direction = DMA_TO_DEVICE;
+		break;
+	case VFIO_SPAPR_TCE_WRITE:
+		direction = DMA_FROM_DEVICE;
+		break;
+	case VFIO_SPAPR_TCE_BIDIRECTIONAL:
+		direction = DMA_BIDIRECTIONAL;
+		break;
+	}
+
+	oldtce = ppc_md.tce_get(tbl, entry);
+
+	/* Free page if still allocated */
+	if (oldtce & VFIO_SPAPR_TCE_PUT_MASK)
+		tce_free(tbl, entry, oldtce);
+
+	/* Map new TCE */
+	if (direction != DMA_NONE) {
+		offset = (tce & IOMMU_PAGE_MASK) - (tce & PAGE_MASK);
+		ret = get_user_pages_fast(tce & PAGE_MASK, 1,
+				direction != DMA_TO_DEVICE, &page);
+		BUG_ON(ret > 1);
+		if (ret < 1) {
+			printk(KERN_ERR "tce_vfio: get_user_pages_fast failed "
+					"tce=%llx ioba=%lx ret=%d\n",
+					tce, entry << IOMMU_PAGE_SHIFT, ret);
+			if (!ret)
+				ret = -EFAULT;
+			goto unlock_exit;
+		}
+
+		kva = (unsigned long) page_address(page);
+		kva += offset;
+		BUG_ON(!kva);
+		if (WARN_ON(kva & ~IOMMU_PAGE_MASK))
+			return -EINVAL;
+
+		/* Preserve access bits */
+		kva |= flags & VFIO_SPAPR_TCE_PUT_MASK;
+
+		/* tce_build receives a virtual address */
+		entry += tbl->it_offset;	/* Offset into real TCE table */
+		ret = ppc_md.tce_build(tbl, entry, 1, kva, direction, NULL);
+
+		/* tce_build() only returns non-zero for transient errors */
+		if (unlikely(ret)) {
+			printk(KERN_ERR "tce_vfio: Failed to add TCE\n");
+			ret = -EIO;
+			goto unlock_exit;
+		}
+	}
+	/* Flush/invalidate TLB caches if necessary */
+	if (ppc_md.tce_flush)
+		ppc_md.tce_flush(tbl);
+
+	/* Make sure updates are seen by hardware */
+	mb();
+
+unlock_exit:
+	if (ret && page)
+		put_page(page);
+
+	if (ret)
+		printk(KERN_ERR "tce_vfio: tce_put failed on tce=%llx "
+				"ioba=%lx kva=%lx\n", tce,
+				entry << IOMMU_PAGE_SHIFT, kva);
+	return ret;
+}
+
+/*
+ * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
+ */
+
+/*
+ * The container descriptor supports only a single group per container.
+ * Required by the API as the container is not supplied with the IOMMU group
+ * at the moment of initialization.
+ */
+struct tce_container {
+	struct iommu_table *tbl;
+};
+
+static void *tce_iommu_open(unsigned long arg)
+{
+	struct tce_container *container;
+
+	if (arg != VFIO_SPAPR_TCE_IOMMU) {
+		printk(KERN_ERR "tce_vfio: Wrong IOMMU type\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	container = kzalloc(sizeof(*container), GFP_KERNEL);
+	if (!container)
+		return ERR_PTR(-ENOMEM);
+
+	return container;
+}
+
+static void tce_iommu_release(void *iommu_data)
+{
+	struct tce_container *container = iommu_data;
+	struct iommu_table *tbl = container->tbl;
+	unsigned long i, tce;
+
+	/* Unmap leftovers */
+	spin_lock_irq(&tbl->it_lock);
+	for (i = tbl->it_offset; i < tbl->it_offset + tbl->it_size; ++i) {
+		tce = ppc_md.tce_get(tbl, i);
+		if (tce & VFIO_SPAPR_TCE_PUT_MASK)
+			tce_free(tbl, i, tce);
+	}
+	/* Flush/invalidate TLB caches if necessary */
+	if (ppc_md.tce_flush)
+		ppc_md.tce_flush(tbl);
+
+	/* Make sure updates are seen by hardware */
+	mb();
+
+	spin_unlock_irq(&tbl->it_lock);
+
+	kfree(container);
+}
+
+static long tce_iommu_ioctl(void *iommu_data,
+				 unsigned int cmd, unsigned long arg)
+{
+	struct tce_container *container = iommu_data;
+	unsigned long minsz;
+	long ret;
+
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION: {
+		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+	}
+	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
+		struct vfio_iommu_spapr_tce_info info;
+		struct iommu_table *tbl = container->tbl;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
+				dma64_window_size);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (!tbl)
+			return -ENXIO;
+
+		info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT;
+		info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT;
+		info.dma64_window_start = 0;
+		info.dma64_window_size = 0;
+		info.flags = 0;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_IOMMU_SPAPR_TCE_PUT: {
+		struct vfio_iommu_spapr_tce_put par;
+		struct iommu_table *tbl = container->tbl;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_tce_put, tce);
+
+		if (copy_from_user(&par, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (par.argsz < minsz)
+			return -EINVAL;
+
+		if (!tbl) {
+			return -ENXIO;
+		}
+
+		spin_lock_irq(&tbl->it_lock);
+		ret = tce_put(tbl, par.ioba >> IOMMU_PAGE_SHIFT,
+				par.tce, par.flags);
+		spin_unlock_irq(&tbl->it_lock);
+
+		return ret;
+	}
+	default:
+		printk(KERN_WARNING "tce_vfio: unexpected cmd %x\n", cmd);
+	}
+
+	return -ENOTTY;
+}
+
+static int tce_iommu_attach_group(void *iommu_data,
+		struct iommu_group *iommu_group)
+{
+	struct tce_container *container = iommu_data;
+	struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
+
+	printk(KERN_DEBUG "tce_vfio: Attaching group #%u to iommu %p\n",
+			iommu_group_id(iommu_group), iommu_group);
+	if (container->tbl) {
+		printk(KERN_WARNING "tce_vfio: Only one group per IOMMU "
+				"container is allowed, "
+				"existing id=%d, attaching id=%d\n",
+				iommu_group_id(container->tbl->it_group),
+				iommu_group_id(iommu_group));
+		return -EBUSY;
+	}
+
+	container->tbl = tbl;
+
+	return 0;
+}
+
+static void tce_iommu_detach_group(void *iommu_data,
+		struct iommu_group *iommu_group)
+{
+	struct tce_container *container = iommu_data;
+	struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
+
+	BUG_ON(!tbl);
+	if (tbl != container->tbl) {
+		printk(KERN_WARNING "tce_vfio: detaching group #%u, expected "
+				"group is #%u\n", iommu_group_id(iommu_group),
+				iommu_group_id(tbl->it_group));
+		return;
+	}
+	printk(KERN_DEBUG "tce_vfio: detaching group #%u from iommu %p\n",
+			iommu_group_id(iommu_group), iommu_group);
+}
+
+const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
+	.name		= "iommu-vfio-powerpc",
+	.owner		= THIS_MODULE,
+	.open		= tce_iommu_open,
+	.release	= tce_iommu_release,
+	.ioctl		= tce_iommu_ioctl,
+	.attach_group	= tce_iommu_attach_group,
+	.detach_group	= tce_iommu_detach_group,
+};
+
+/*
+ * Add/delete devices support (hotplug, module_init, module_exit)
+ */
+static int add_device(struct device *dev)
+{
+	struct iommu_table *tbl;
+	int ret = 0;
+
+	if (dev->iommu_group) {
+		printk(KERN_WARNING "tce_vfio: device %s is already in iommu "
+				"group %d, skipping\n", dev->kobj.name,
+				iommu_group_id(dev->iommu_group));
+		return -EBUSY;
+	}
+
+	tbl = get_iommu_table_base(dev);
+	if (!tbl) {
+		printk(KERN_DEBUG "tce_vfio: skipping device %s with no tbl\n",
+				dev->kobj.name);
+		return 0;
+	}
+
+	printk(KERN_DEBUG "tce_vfio: adding %s to iommu group %d\n",
+			dev->kobj.name, iommu_group_id(tbl->it_group));
+
+	ret = iommu_group_add_device(tbl->it_group, dev);
+	if (ret < 0)
+		printk(KERN_ERR "tce_vfio: %s has not been added, ret=%d\n",
+				dev->kobj.name, ret);
+
+	return ret;
+}
+
+static void del_device(struct device *dev)
+{
+	iommu_group_remove_device(dev);
+}
+
+static int iommu_bus_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		return add_device(dev);
+	case BUS_NOTIFY_DEL_DEVICE:
+		del_device(dev);
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+	.notifier_call = iommu_bus_notifier,
+};
+
+void group_release(void *iommu_data)
+{
+	struct iommu_table *tbl = iommu_data;
+	tbl->it_group = NULL;
+}
+
+static int __init tce_iommu_init(void)
+{
+	struct pci_dev *pdev = NULL;
+	struct iommu_table *tbl;
+	struct iommu_group *grp;
+
+	/* If the current platform does not support tce_get
+	   we are unable to clean TCE table properly and
+	   therefore it is better not to touch it at all */
+	if (!ppc_md.tce_get) {
+		printk(KERN_ERR "tce_vfio: ppc_md.tce_get isn't implemented\n");
+		return -EOPNOTSUPP;
+	}
+
+	bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+
+	/* Allocate and initialize VFIO groups */
+	for_each_pci_dev(pdev) {
+		tbl = get_iommu_table_base(&pdev->dev);
+		if (!tbl)
+			continue;
+
+		/* Skip already initialized */
+		if (tbl->it_group)
+			continue;
+
+		grp = iommu_group_alloc();
+		if (IS_ERR(grp)) {
+			printk(KERN_INFO "tce_vfio: cannot create "
+					"new IOMMU group, ret=%ld\n",
+					PTR_ERR(grp));
+			return -EFAULT;
+		}
+		tbl->it_group = grp;
+		iommu_group_set_iommudata(grp, tbl, group_release);
+	}
+
+	/* Add PCI devices to VFIO groups */
+	for_each_pci_dev(pdev)
+		add_device(&pdev->dev);
+
+	return vfio_register_iommu_driver(&tce_iommu_driver_ops);
+}
+
+static void __exit tce_iommu_cleanup(void)
+{
+	struct pci_dev *pdev = NULL;
+	struct iommu_table *tbl;
+	struct iommu_group *grp = NULL;
+
+	bus_unregister_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+
+	/* Delete PCI devices from VFIO groups */
+	for_each_pci_dev(pdev)
+		del_device(&pdev->dev);
+
+	/* Release VFIO groups */
+	for_each_pci_dev(pdev) {
+		tbl = get_iommu_table_base(&pdev->dev);
+		if (!tbl)
+			continue;
+		grp = tbl->it_group;
+
+		/* Skip (already) uninitialized */
+		if (!grp)
+			continue;
+
+		/* Do actual release, group_release() is expected to work */
+		iommu_group_put(grp);
+		BUG_ON(tbl->it_group);
+	}
+
+	vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
+}
+
+module_init(tce_iommu_init);
+module_exit(tce_iommu_cleanup);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 0a4f180..2c0a927 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -99,6 +99,7 @@ extern void vfio_unregister_iommu_driver(
 /* Extensions */
 
 #define VFIO_TYPE1_IOMMU		1
+#define VFIO_SPAPR_TCE_IOMMU		2
 
 /*
  * The IOCTL interface is designed for extensibility by embedding the
@@ -442,4 +443,32 @@ struct vfio_iommu_type1_dma_unmap {
 
 #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
 
+/* -------- API for SPAPR TCE (Server POWERPC) IOMMU -------- */
+
+struct vfio_iommu_spapr_tce_info {
+	__u32 argsz;
+	__u32 flags;
+	__u32 dma32_window_start;
+	__u32 dma32_window_size;
+	__u64 dma64_window_start;
+	__u64 dma64_window_size;
+};
+
+#define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct vfio_iommu_spapr_tce_put {
+	__u32 argsz;
+	__u32 flags;
+#define VFIO_SPAPR_TCE_READ		1
+#define VFIO_SPAPR_TCE_WRITE		2
+#define VFIO_SPAPR_TCE_BIDIRECTIONAL	(VFIO_SPAPR_TCE_READ|VFIO_SPAPR_TCE_WRITE)
+#define VFIO_SPAPR_TCE_PUT_MASK		VFIO_SPAPR_TCE_BIDIRECTIONAL
+	__u64 ioba;
+	__u64 tce;
+};
+
+#define VFIO_IOMMU_SPAPR_TCE_PUT	_IO(VFIO_TYPE, VFIO_BASE + 13)
+
+/* ***************************************************************** */
+
 #endif /* VFIO_H */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH] powerpc-powernv: added tce_get callback for powernv platform
From: Alexey Kardashevskiy @ 2012-09-04  7:35 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Alexey Kardashevskiy, linuxppc-dev, Paul Mackerras, David Gibson
In-Reply-To: <1346744035-31154-1-git-send-email-aik@ozlabs.ru>

The upcoming VFIO support requires a way to know which
entry in the TCE map is not empty in order to do cleanup
at QEMU exit/crash. This patch adds such functionality
to POWERNV platform code.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/platforms/powernv/pci.c |    6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index be3cfc5..61f8068 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -447,6 +447,11 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 		pnv_tce_invalidate(tbl, tces, tcep - 1);
 }
 
+static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+	return ((u64 *)tbl->it_base)[index - tbl->it_offset] & IOMMU_PAGE_MASK;
+}
+
 void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 			       void *tce_mem, u64 tce_size,
 			       u64 dma_offset)
@@ -597,6 +602,7 @@ void __init pnv_pci_init(void)
 	ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup;
 	ppc_md.tce_build = pnv_tce_build;
 	ppc_md.tce_free = pnv_tce_free;
+	ppc_md.tce_get = pnv_tce_get;
 	ppc_md.pci_probe_mode = pnv_pci_probe_mode;
 	set_pci_dma_ops(&dma_iommu_ops);
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH] powerpc-kvm: fixing page alignment for TCE
From: Alexey Kardashevskiy @ 2012-09-04  7:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Alexey Kardashevskiy, linuxppc-dev, Paul Mackerras, kvm-ppc,
	David Gibson
In-Reply-To: <1346744035-31154-1-git-send-email-aik@ozlabs.ru>

From: Paul Mackerras <paulus@samba.org>

TODO: ask Paul to make a proper message.

This is the fix for a host kernel compiled with a page size
other than 4K (TCE page size). In the case of a 64K page size,
the host used to lose address bits in hpte_rpn().
The patch fixes it.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |    9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 80a5775..a41f11b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *hptep, hpte[3], r;
 	unsigned long mmu_seq, psize, pte_size;
-	unsigned long gfn, hva, pfn;
+	unsigned long gpa, gfn, hva, pfn;
 	struct kvm_memory_slot *memslot;
 	unsigned long *rmap;
 	struct revmap_entry *rev;
@@ -541,15 +541,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* Translate the logical address and get the page */
 	psize = hpte_page_size(hpte[0], r);
-	gfn = hpte_rpn(r, psize);
+	gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
+	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 
 	/* No memslot means it's an emulated MMIO region */
-	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
-		unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
 					      dsisr & DSISR_ISSTORE);
-	}
 
 	if (!kvm->arch.using_mmu_notifiers)
 		return -EFAULT;		/* should never get here */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH] powerpc-powernv: align BARs to PAGE_SIZE on powernv platform
From: Alexey Kardashevskiy @ 2012-09-04  7:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Alexey Kardashevskiy, linuxppc-dev, Paul Mackerras, David Gibson
In-Reply-To: <1346744035-31154-1-git-send-email-aik@ozlabs.ru>

VFIO adds a separate memory region for every BAR and tries
to mmap() it to provide direct BAR mapping to the guest.
If it succeedes, QEMU registers this address with kvm_set_phys_mem().
However it is not always possible because such a BAR should
be host page size aligned. In this case VFIO uses "slow" path
and emulated BAR access in QEMU.

In order to avoid "slow" path, BARs have to be PAGE_SIZE aligned
in the host kernel and this is what the patch does.

The patch adds powernv platform specific hook which makes all
BARs sizes 64K aligned. The pci_reassigndev_resource_alignment()
function from drivers/pci/pci.c has been used as a reference.

This is purely an optimization patch, the things will work without
it, just a bit slower.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/platforms/powernv/setup.c |   26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index db1ad1c..331838e 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -25,6 +25,7 @@
 #include <linux/of.h>
 #include <linux/interrupt.h>
 #include <linux/bug.h>
+#include <linux/pci.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -179,6 +180,30 @@ static int __init pnv_probe(void)
 	return 1;
 }
 
+static void pnv_pcibios_fixup_resources(struct pci_dev *pdev)
+{
+	struct resource *r;
+	int i;
+
+	/*
+	 * Aligning resources to PAGE_SIZE in order to
+	 * support "fast" path for PCI BAR access under VFIO
+	 * which maps every BAR individually to the guest
+	 * so BARs have to be PAGE aligned.
+	 */
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		r = &pdev->resource[i];
+		if (!r->flags)
+			continue;
+		pr_debug("powernv: %s, aligning BAR#%d %llx..%llx",
+			pdev->dev.kobj.name, i, r->start, r->end);
+		r->end = PAGE_ALIGN(r->end - r->start + 1) - 1;
+		r->start = 0;
+		r->flags |= IORESOURCE_UNSET;
+		pr_debug(" to  %llx..%llx\n", r->start, r->end);
+	}
+}
+
 define_machine(powernv) {
 	.name			= "PowerNV",
 	.probe			= pnv_probe,
@@ -189,6 +214,7 @@ define_machine(powernv) {
 	.progress		= pnv_progress,
 	.power_save             = power7_idle,
 	.calibrate_decr		= generic_calibrate_decr,
+	.pcibios_fixup_resources= pnv_pcibios_fixup_resources,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 4/4] drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups
From: Artem Bityutskiy @ 2012-09-04  8:42 UTC (permalink / raw)
  To: Julia Lawall, linuxppc-dev
  Cc: kernel-janitors, David Woodhouse, linux-kernel, linux-mtd
In-Reply-To: <1346517191-8794-4-git-send-email-Julia.Lawall@lip6.fr>

[-- Attachment #1: Type: text/plain, Size: 5912 bytes --]

Aiaiai! :-) [1] [2]

I've build-tested this using aiaiai and it reports that this change breaks the build:

dedekind@blue:~/git/maintaining$ ./verify ../l2-mtd/ mpc5121_nfc < ~/tmp/julia2.mbox 
Tested the patch(es) on top of the following commits:
ba64756 Quick fixes - applied by aiaiai
651c6fa JFFS2: don't fail on bitflips in OOB
e22ac84 mtd: autcpu12-nvram: drop frees of devm_ alloc'd data
ea9d312 mtd: cmdlinepart: minor cleanups

--------------------------------------------------------------------------------
Failed to build the following commit for configuration "powerpc-mpc512x_defconfig" (architecture powerpc)":

0fe13ab drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups

...
drivers/mtd/nand/mpc5121_nfc.c: In function 'mpc5121_nfc_probe':
drivers/mtd/nand/mpc5121_nfc.c:622:28: warning: variable 'regs_size' set but not used [-Wunused-but-set-variable]
drivers/mtd/nand/mpc5121_nfc.c:622:16: warning: variable 'regs_paddr' set but not used [-Wunused-but-set-variable]
drivers/built-in.o: In function `mpc5121_nfc_probe':
mpc5121_nfc.c:(.devinit.text+0x2a14): undefined reference to `devm_clk_get'
make[1]: *** [vmlinux] Error 1

--------------------------------------------------------------------------------

I do not really know why, but it seems that clock framework is not supported for powerpc. CCing the PPC mailing list. Preserved the context below for the PPC people.

So, not taking this patch.

References:

1. http://git.infradead.org/users/dedekind/aiaiai.git
2. http://git.infradead.org/users/dedekind/maintaining.git

On Sat, 2012-09-01 at 18:33 +0200, Julia Lawall wrote:
> From: Julia Lawall <Julia.Lawall@lip6.fr>
> 
> devm free functions should not have to be explicitly used.
> 
> The only thing left that is useful in the function mpc5121_nfc_free is the
> call to clk_disable, which is moved to the call sites.
> 
> This function also incorrectly called iounmap on devm_ioremap allocated
> data.
> 
> Use devm_request_and_ioremap in place of devm_request_mem_region and
> devm_ioremap.
> 
> Use devm_clk_get.
> 
> A semantic match that finds the first problem is as follows:
> (http://coccinelle.lip6.fr/)
> 
> // <smpl>
> @@
> @@
> 
> (
> * devm_kfree(...);
> |
> * devm_free_irq(...);
> |
> * devm_iounmap(...);
> |
> * devm_release_region(...);
> |
> * devm_release_mem_region(...);
> )
> // </smpl>
> 
> Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
> 
> ---
>  drivers/mtd/nand/mpc5121_nfc.c |   35 +++++------------------------------
>  1 file changed, 5 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
> index c259c24..45183ba 100644
> --- a/drivers/mtd/nand/mpc5121_nfc.c
> +++ b/drivers/mtd/nand/mpc5121_nfc.c
> @@ -632,21 +632,6 @@ out:
>  	return ret;
>  }
>  
> -/* Free driver resources */
> -static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd)
> -{
> -	struct nand_chip *chip = mtd->priv;
> -	struct mpc5121_nfc_prv *prv = chip->priv;
> -
> -	if (prv->clk) {
> -		clk_disable(prv->clk);
> -		clk_put(prv->clk);
> -	}
> -
> -	if (prv->csreg)
> -		iounmap(prv->csreg);
> -}
> -
>  static int __devinit mpc5121_nfc_probe(struct platform_device *op)
>  {
>  	struct device_node *rootnode, *dn = op->dev.of_node;
> @@ -713,12 +698,7 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
>  	regs_paddr = res.start;
>  	regs_size = resource_size(&res);
>  
> -	if (!devm_request_mem_region(dev, regs_paddr, regs_size, DRV_NAME)) {
> -		dev_err(dev, "Error requesting memory region!\n");
> -		return -EBUSY;
> -	}
> -
> -	prv->regs = devm_ioremap(dev, regs_paddr, regs_size);
> +	prv->regs = devm_request_and_ioremap(dev, &res);
>  	if (!prv->regs) {
>  		dev_err(dev, "Error mapping memory region!\n");
>  		return -ENOMEM;
> @@ -752,11 +732,10 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
>  	of_node_put(rootnode);
>  
>  	/* Enable NFC clock */
> -	prv->clk = clk_get(dev, "nfc_clk");
> +	prv->clk = devm_clk_get(dev, "nfc_clk");
>  	if (IS_ERR(prv->clk)) {
>  		dev_err(dev, "Unable to acquire NFC clock!\n");
> -		retval = PTR_ERR(prv->clk);
> -		goto error;
> +		return PTR_ERR(prv->clk);
>  	}
>  
>  	clk_enable(prv->clk);
> @@ -803,7 +782,6 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
>  	/* Detect NAND chips */
>  	if (nand_scan(mtd, be32_to_cpup(chips_no))) {
>  		dev_err(dev, "NAND Flash not found !\n");
> -		devm_free_irq(dev, prv->irq, mtd);
>  		retval = -ENXIO;
>  		goto error;
>  	}
> @@ -828,7 +806,6 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
>  
>  	default:
>  		dev_err(dev, "Unsupported NAND flash!\n");
> -		devm_free_irq(dev, prv->irq, mtd);
>  		retval = -ENXIO;
>  		goto error;
>  	}
> @@ -839,13 +816,12 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
>  	retval = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
>  	if (retval) {
>  		dev_err(dev, "Error adding MTD device!\n");
> -		devm_free_irq(dev, prv->irq, mtd);
>  		goto error;
>  	}
>  
>  	return 0;
>  error:
> -	mpc5121_nfc_free(dev, mtd);
> +	clk_disable(prv->clk);
>  	return retval;
>  }
>  
> @@ -857,8 +833,7 @@ static int __devexit mpc5121_nfc_remove(struct platform_device *op)
>  	struct mpc5121_nfc_prv *prv = chip->priv;
>  
>  	nand_release(mtd);
> -	devm_free_irq(dev, prv->irq, mtd);
> -	mpc5121_nfc_free(dev, mtd);
> +	clk_disable(prv->clk);
>  
>  	return 0;
>  }
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

-- 
Best Regards,
Artem Bityutskiy

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [PATCH -V7 0/12] arch/powerpc: Add 64TB support to ppc64
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev

Hi,

This patchset include patches for supporting 64TB with ppc64. I haven't booted
this on hardware with 64TB memory yet. But they boot fine on real hardware with
less memory. Changes extend VSID bits to 38 bits for a 256MB segment
and 26 bits for 1TB segments.

Changes from V6:
 * rebase to latest upstream (5b716ac728bcc01b1f2a7ed6e437196602237c27)

Changes from v5:
 * Address review feedback

Changes from v4:
 * Drop patch "arch/powerpc: properly offset the context bits for 1T segemnts"
   based on review feedback
 * split CONTEXT_BITS related changes from patch 12
 * Add a new doc update patch

Changes from v3:
 * Address review comments.
 * Added new patch to ensure proto-VSID isolation between kernel and user space

Changes from V2:
 * Fix few FIXMEs in the patchset. I have added them as separate patch for
   easier review. That should help us to drop those changes if we don't agree.

Changes from V1:
* Drop the usage of structure (struct virt_addr) to carry virtual address.
  We now represent virtual address via vpn which is virtual address shifted
  right 12 bits.

Thanks,
-aneesh

^ permalink raw reply

* [PATCH -V7 01/12] arch/powerpc: Replace open coded CONTEXT_BITS value
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

To clarify the meaning for future readers, replace the open coded
19 with CONTEXT_BITS

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/mmu_context_hash64.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 40677aa..daa076c 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -34,7 +34,7 @@ static DEFINE_IDA(mmu_context_ida);
  * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
  * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
  */
-#define MAX_CONTEXT	((1UL << 19) - 1)
+#define MAX_CONTEXT	((1UL << CONTEXT_BITS) - 1)
 
 int __init_new_context(void)
 {
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 02/12] arch/powerpc: Use hpt_va to compute virtual address
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Don't open code the same

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/cell/beat_htab.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 943c9d3..b83077e 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = (vsid << 28) | (ea & 0x0fffffff);
+	va = hpt_va(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
 	slot = beat_lpar_hpte_find(va, psize);
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 04/12] arch/powerpc: Convert virtual address to vpn
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch convert different functions to take virtual page number
instead of virtual address. Virtual page number is virtual address
shifted right by VPN_SHIFT (12) bits. This enable us to have an
address range of upto 76 bits.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h     |   71 +++++++++++++++++----
 arch/powerpc/include/asm/pte-hash64-64k.h |   18 +++---
 arch/powerpc/kvm/book3s_32_mmu_host.c     |    2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c     |    2 +-
 arch/powerpc/mm/hash_low_64.S             |   97 ++++++++++++++++++-----------
 arch/powerpc/mm/hash_native_64.c          |   45 +++++++++----
 arch/powerpc/mm/hash_utils_64.c           |    6 +-
 arch/powerpc/mm/hugetlbpage-hash64.c      |    2 +-
 arch/powerpc/mm/tlb_hash64.c              |    2 +-
 arch/powerpc/platforms/cell/beat_htab.c   |    2 +-
 arch/powerpc/platforms/pseries/lpar.c     |   20 +-----
 11 files changed, 173 insertions(+), 94 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 1c65a59..d3a1139 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -15,6 +15,10 @@
 #include <asm/asm-compat.h>
 #include <asm/page.h>
 
+#ifndef __ASSEMBLY__
+#include <linux/bug.h>
+#endif
+
 /*
  * Segment table
  */
@@ -154,9 +158,25 @@ struct mmu_psize_def
 #define MMU_SEGSIZE_256M	0
 #define MMU_SEGSIZE_1T		1
 
+/*
+ * encode page number shift.
+ * in order to fit the 78 bit va in a 64 bit variable we shift the va by
+ * 12 bits. This enable us to address upto 76 bit va.
+ * For hpt hash from a va we can ignore the page size bits of va and for
+ * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
+ * we work in all cases including 4k page size.
+ */
+#define VPN_SHIFT	12
 
 #ifndef __ASSEMBLY__
 
+static inline int segment_shift(int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return SID_SHIFT;
+	return SID_SHIFT_1T;
+}
+
 /*
  * The current system page and segment sizes
  */
@@ -180,6 +200,29 @@ extern unsigned long tce_alloc_start, tce_alloc_end;
 extern int mmu_ci_restrictions;
 
 /*
+ * This computes the AVPN and B fields of the first dword of a HPTE,
+ * for use when we want to match an existing PTE.  The bottom 7 bits
+ * of the returned value are zero.
+ */
+static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
+					     int ssize)
+{
+	unsigned long v;
+	/*
+	 * The AVA field omits the low-order 23 bits of the 78 bits VA.
+	 * These bits are not needed in the PTE, because the
+	 * low-order b of these bits are part of the byte offset
+	 * into the virtual page and, if b < 23, the high-order
+	 * 23-b of these bits are always used in selecting the
+	 * PTEGs to be searched
+	 */
+	v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
+	v <<= HPTE_V_AVPN_SHIFT;
+	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+	return v;
+}
+
+/*
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
  */
@@ -187,11 +230,9 @@ static inline unsigned long hpte_encode_v(unsigned long va, int psize,
 					  int ssize)
 {
 	unsigned long v;
-	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
-	v <<= HPTE_V_AVPN_SHIFT;
+	v = hpte_encode_avpn(va, psize, ssize);
 	if (psize != MMU_PAGE_4K)
 		v |= HPTE_V_LARGE;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
 	return v;
 }
 
@@ -216,14 +257,16 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
 }
 
 /*
- * Build a VA given VSID, EA and segment size
+ * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
  */
-static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
+static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
 				   int ssize)
 {
-	if (ssize == MMU_SEGSIZE_256M)
-		return (vsid << 28) | (ea & 0xfffffffUL);
-	return (vsid << 40) | (ea & 0xffffffffffUL);
+	unsigned long mask;
+	int s_shift = segment_shift(ssize);
+
+	mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
+	return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
 }
 
 /*
@@ -233,13 +276,19 @@ static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid,
 static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
 				     int ssize)
 {
+	int mask;
 	unsigned long hash, vsid;
 
+	/* VPN_SHIFT can be atmost 12 */
 	if (ssize == MMU_SEGSIZE_256M) {
-		hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift);
+		mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
+		hash = ((va >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
+			(((va & mask) >> (shift - VPN_SHIFT)) & 0xffff);
 	} else {
-		vsid = va >> 40;
-		hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift);
+		mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
+		vsid = va >> (SID_SHIFT_1T - VPN_SHIFT);
+		hash = (vsid & 0xffffff) ^ ((vsid << 25) & 0x7fffffffff) ^
+			(((va & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
 	}
 	return hash & 0x7fffffffffUL;
 }
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index 59247e8..eedf427 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -58,14 +58,16 @@
 /* Trick: we set __end to va + 64k, which happens works for
  * a 16M page as well as we want only one iteration
  */
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)	    \
-        do {                                                                \
-                unsigned long __end = va + PAGE_SIZE;                       \
-                unsigned __split = (psize == MMU_PAGE_4K ||                 \
-				    psize == MMU_PAGE_64K_AP);              \
-                shift = mmu_psize_defs[psize].shift;                        \
-		for (index = 0; va < __end; index++, va += (1L << shift)) { \
-		        if (!__split || __rpte_sub_valid(rpte, index)) do { \
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
+	do {								\
+		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
+		unsigned __split = (psize == MMU_PAGE_4K ||		\
+				    psize == MMU_PAGE_64K_AP);		\
+		shift = mmu_psize_defs[psize].shift;			\
+		for (index = 0; vpn < __end; index++,			\
+			     vpn += (1L << (shift - VPN_SHIFT))) {	\
+			if (!__split || __rpte_sub_valid(rpte, index))	\
+				do {
 
 #define pte_iterate_hashed_end() } while(0); } } while(0)
 
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 837f13e..45db3b9 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -173,7 +173,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	BUG_ON(!map);
 
 	vsid = map->host_vsid;
-	va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK);
+	va = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
 
 next_pteg:
 	if (rr == 16) {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 0688b6b..d97c65c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -117,7 +117,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	}
 
 	vsid = map->host_vsid;
-	va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+	va = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
 
 	if (!orig_pte->may_write)
 		rflags |= HPTE_R_PP;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 602aeb0..5658508 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -63,7 +63,7 @@ _GLOBAL(__hash_page_4K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 */
@@ -111,10 +111,10 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -122,14 +122,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * calculate hash value for primary slot and
+	 * store it in r28 for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-12,36		/* (ea >> 12) & 0xfffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -185,7 +190,7 @@ htab_insert_pte:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARAM(R9)(r1)	/* segment size */
@@ -208,7 +213,7 @@ _GLOBAL(htab_call_hpte_insert1)
 	
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARAM(R9)(r1)	/* segment size */
@@ -278,7 +283,7 @@ htab_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_4K		/* page size */
 	ld	r7,STK_PARAM(R9)(r1)	/* segment size */
 	ld	r8,STK_PARAM(R8)(r1)	/* get "local" param */
@@ -339,7 +344,7 @@ _GLOBAL(__hash_page_4K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 * r26 is the hidx mask
@@ -394,10 +399,14 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28		/* r29 = (vsid << 28) */
-	rldicl	r3,r3,0,36		/* r3 = (ea & 0x0fffffff) */
-	or	r29,r3,r29		/* r29 = va */
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	/*
+	 * clrldi r3,r3,64 - SID_SHIFT -->  ea & 0xfffffff
+	 * srdi	 r28,r3,VPN_SHIFT
+	 */
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -405,14 +414,23 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	/*
+	 * clrldi r3,r3,64 - SID_SHIFT_1T -->  ea & 0xffffffffff
+	 * srdi	r28,r3,VPN_SHIFT
+	 */
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * Calculate hash value for primary slot and
+	 * store it in r28  for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-12,36		/* (ea >> 12) & 0xfffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -488,7 +506,7 @@ htab_special_pfn:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARAM(R9)(r1)	/* segment size */
@@ -515,7 +533,7 @@ _GLOBAL(htab_call_hpte_insert1)
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_4K		/* page size */
 	ld	r9,STK_PARAM(R9)(r1)	/* segment size */
@@ -547,7 +565,7 @@ _GLOBAL(htab_call_hpte_remove)
 	 * useless now that the segment has been switched to 4k pages.
 	 */
 htab_inval_old_hpte:
-	mr	r3,r29			/* virtual addr */
+	mr	r3,r29			/* vpn */
 	mr	r4,r31			/* PTE.pte */
 	li	r5,0			/* PTE.hidx */
 	li	r6,MMU_PAGE_64K		/* psize */
@@ -620,7 +638,7 @@ htab_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_4K		/* page size */
 	ld	r7,STK_PARAM(R9)(r1)	/* segment size */
 	ld	r8,STK_PARAM(R8)(r1)	/* get "local" param */
@@ -676,7 +694,7 @@ _GLOBAL(__hash_page_64K)
 	/* Save non-volatile registers.
 	 * r31 will hold "old PTE"
 	 * r30 is "new PTE"
-	 * r29 is "va"
+	 * r29 is vpn
 	 * r28 is a hash value
 	 * r27 is hashtab mask (maybe dynamic patched instead ?)
 	 */
@@ -729,10 +747,10 @@ BEGIN_FTR_SECTION
 	cmpdi	r9,0			/* check segment size */
 	bne	3f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc va and put it in r29 */
-	rldicr	r29,r5,28,63-28
-	rldicl	r3,r3,0,36
-	or	r29,r3,r29
+	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
+	or	r29,r28,r29
 
 	/* Calculate hash value for primary slot and store it in r28 */
 	rldicl	r5,r5,0,25		/* vsid & 0x0000007fffffffff */
@@ -740,14 +758,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	xor	r28,r5,r0
 	b	4f
 
-3:	/* Calc VA and hash in r29 and r28 for 1T segment */
-	sldi	r29,r5,40		/* vsid << 40 */
-	clrldi	r3,r3,24		/* ea & 0xffffffffff */
+3:	/* Calc vpn and put it in r29 */
+	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
+	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
+	or	r29,r28,r29
+
+	/*
+	 * calculate hash value for primary slot and
+	 * store it in r28 for 1T segment
+	 */
 	rldic	r28,r5,25,25		/* (vsid << 25) & 0x7fffffffff */
 	clrldi	r5,r5,40		/* vsid & 0xffffff */
 	rldicl	r0,r3,64-16,40		/* (ea >> 16) & 0xffffff */
 	xor	r28,r28,r5
-	or	r29,r3,r29		/* VA */
 	xor	r28,r28,r0		/* hash */
 
 	/* Convert linux PTE bits into HW equivalents */
@@ -806,7 +829,7 @@ ht64_insert_pte:
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,0			/* !bolted, !secondary */
 	li	r8,MMU_PAGE_64K
 	ld	r9,STK_PARAM(R9)(r1)	/* segment size */
@@ -829,7 +852,7 @@ _GLOBAL(ht64_call_hpte_insert1)
 
 	/* Call ppc_md.hpte_insert */
 	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve va */
+	mr	r4,r29			/* Retrieve vpn */
 	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
 	li	r8,MMU_PAGE_64K
 	ld	r9,STK_PARAM(R9)(r1)	/* segment size */
@@ -899,7 +922,7 @@ ht64_modify_pte:
 	add	r3,r0,r3	/* add slot idx */
 
 	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* va */
+	mr	r5,r29			/* vpn */
 	li	r6,MMU_PAGE_64K
 	ld	r7,STK_PARAM(R9)(r1)	/* segment size */
 	ld	r8,STK_PARAM(R8)(r1)	/* get "local" param */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 660b8bb..a5c08c3 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -39,22 +39,35 @@
 
 DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbie(unsigned long va, int psize, int ssize)
+static inline void __tlbie(unsigned long vpn, int psize, int ssize)
 {
+	unsigned long va;
 	unsigned int penc;
 
-	/* clear top 16 bits, non SLS segment */
+	/*
+	 * We need 14 to 65 bits of va for a tlibe of 4K page
+	 * With vpn we ignore the lower VPN_SHIFT bits already.
+	 * And top two bits are already ignored because we can
+	 * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+	 * of 12.
+	 */
+	va = vpn << VPN_SHIFT;
+	/*
+	 * clear top 16 bits of 64bit va, non SLS segment
+	 * Older versions of the architecture (2.02 and earler) require the
+	 * masking of the top 16 bits.
+	 */
 	va &= ~(0xffffULL << 48);
 
 	switch (psize) {
 	case MMU_PAGE_4K:
-		va &= ~0xffful;
 		va |= ssize << 8;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 			     : "memory");
 		break;
 	default:
+		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 		va |= penc << 12;
@@ -67,21 +80,28 @@ static inline void __tlbie(unsigned long va, int psize, int ssize)
 	}
 }
 
-static inline void __tlbiel(unsigned long va, int psize, int ssize)
+static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
 {
+	unsigned long va;
 	unsigned int penc;
 
-	/* clear top 16 bits, non SLS segment */
+	/* VPN_SHIFT can be atmost 12 */
+	va = vpn << VPN_SHIFT;
+	/*
+	 * clear top 16 bits of 64 bit va, non SLS segment
+	 * Older versions of the architecture (2.02 and earler) require the
+	 * masking of the top 16 bits.
+	 */
 	va &= ~(0xffffULL << 48);
 
 	switch (psize) {
 	case MMU_PAGE_4K:
-		va &= ~0xffful;
 		va |= ssize << 8;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
 		break;
 	default:
+		/* We need 14 to 14 + i bits of va */
 		penc = mmu_psize_defs[psize].penc;
 		va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
 		va |= penc << 12;
@@ -234,7 +254,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 
 	want_v = hpte_encode_v(va, psize, ssize);
 
-	DBG_LOW("    update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+	DBG_LOW("    update(va=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
 		va, want_v & HPTE_V_AVPN, slot, newpp);
 
 	native_lock_hpte(hptep);
@@ -300,7 +320,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	struct hash_pte *hptep;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = native_hpte_find(va, psize, ssize);
 	if (slot == -1)
@@ -325,7 +345,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	local_irq_save(flags);
 
-	DBG_LOW("    invalidate(va=%016lx, hash: %x)\n", va, slot);
+	DBG_LOW("    invalidate(va=%016lx, hash: %lx)\n", va, slot);
 
 	want_v = hpte_encode_v(va, psize, ssize);
 	native_lock_hpte(hptep);
@@ -399,7 +419,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << SID_SHIFT | seg_off;
+		*va = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -408,7 +428,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << SID_SHIFT_1T | seg_off;
+		*va = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	default:
 		*va = size = 0;
 	}
@@ -425,9 +445,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  */
 static void native_hpte_clear(void)
 {
+	unsigned long va = 0;
 	unsigned long slot, slots, flags;
 	struct hash_pte *hptep = htab_address;
-	unsigned long hpte_v, va;
+	unsigned long hpte_v;
 	unsigned long pteg_count;
 	int psize, ssize;
 
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 377e5cb..975c7d1 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,7 +192,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	     vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
-		unsigned long va = hpt_va(vaddr, vsid, ssize);
+		unsigned long va  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
 		/* Make kernel text executable */
@@ -1208,7 +1208,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hpteg;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 	unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
 	int ret;
 
@@ -1229,7 +1229,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hidx, slot;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 
 	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
 	spin_lock(&linear_map_hash_lock);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cc5c273..1331403 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -25,7 +25,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 31f1820..321c585 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -86,7 +86,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
 	}
-	vaddr = hpt_va(addr, vsid, ssize);
+	vaddr = hpt_vpn(addr, vsid, ssize);
 	rpte = __real_pte(__pte(pte), ptep);
 
 	/*
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index b83077e..c8c7bf6 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = hpt_va(ea, vsid, MMU_SEGSIZE_256M);
+	va = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
 	slot = beat_lpar_hpte_find(va, psize);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 5f3ef87..2127529 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -227,22 +227,6 @@ static void pSeries_lpar_hptab_clear(void)
 }
 
 /*
- * This computes the AVPN and B fields of the first dword of a HPTE,
- * for use when we want to match an existing PTE.  The bottom 7 bits
- * of the returned value are zero.
- */
-static inline unsigned long hpte_encode_avpn(unsigned long va, int psize,
-					     int ssize)
-{
-	unsigned long v;
-
-	v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
-	v <<= HPTE_V_AVPN_SHIFT;
-	v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
-	return v;
-}
-
-/*
  * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
  * the low 3 bits of flags happen to line up.  So no transform is needed.
  * We can probably optimize here and assume the high bits of newpp are
@@ -326,7 +310,7 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 	unsigned long lpar_rc, slot, vsid, va, flags;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = pSeries_lpar_hpte_find(va, psize, ssize);
 	BUG_ON(slot == -1);
@@ -361,7 +345,7 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 	unsigned long slot, vsid, va;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_va(ea, vsid, ssize);
+	va = hpt_vpn(ea, vsid, ssize);
 
 	slot = pSeries_lpar_hpte_find(va, psize, ssize);
 	BUG_ON(slot == -1);
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 03/12] arch/powerpc: Simplify hpte_decode
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch simplify hpte_decode for easy switching of virtual address to
virtual page number in the later patch

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/hash_native_64.c |   49 ++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 90039bc..660b8bb 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -351,9 +351,10 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			int *psize, int *ssize, unsigned long *va)
 {
+	unsigned long avpn, pteg, vpi;
 	unsigned long hpte_r = hpte->r;
 	unsigned long hpte_v = hpte->v;
-	unsigned long avpn;
+	unsigned long vsid, seg_off;
 	int i, size, shift, penc;
 
 	if (!(hpte_v & HPTE_V_LARGE))
@@ -380,32 +381,38 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 	}
 
 	/* This works for all page sizes, and for 256M and 1T segments */
+	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 	shift = mmu_psize_defs[size].shift;
-	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23;
-
-	if (shift < 23) {
-		unsigned long vpi, vsid, pteg;
 
-		pteg = slot / HPTES_PER_GROUP;
-		if (hpte_v & HPTE_V_SECONDARY)
-			pteg = ~pteg;
-		switch (hpte_v >> HPTE_V_SSIZE_SHIFT) {
-		case MMU_SEGSIZE_256M:
-			vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
-			break;
-		case MMU_SEGSIZE_1T:
-			vsid = avpn >> 40;
+	avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
+	pteg = slot / HPTES_PER_GROUP;
+	if (hpte_v & HPTE_V_SECONDARY)
+		pteg = ~pteg;
+
+	switch (*ssize) {
+	case MMU_SEGSIZE_256M:
+		/* We only have 28 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1f) << 23;
+		vsid    =  avpn >> 5;
+		/* We can find more bits from the pteg value */
+		if (shift < 23) {
+			vpi = (vsid ^ pteg) & htab_hash_mask;
+			seg_off |= vpi << shift;
+		}
+		*va = vsid << SID_SHIFT | seg_off;
+	case MMU_SEGSIZE_1T:
+		/* We only have 40 - 23 bits of seg_off in avpn */
+		seg_off = (avpn & 0x1ffff) << 23;
+		vsid    = avpn >> 17;
+		if (shift < 23) {
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
-			break;
-		default:
-			avpn = vpi = size = 0;
+			seg_off |= vpi << shift;
 		}
-		avpn |= (vpi << mmu_psize_defs[size].shift);
+		*va = vsid << SID_SHIFT_1T | seg_off;
+	default:
+		*va = size = 0;
 	}
-
-	*va = avpn;
 	*psize = size;
-	*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
 }
 
 /*
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 05/12] arch/powerpc: Rename va to vpn
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Rename the variable to better reflect the values. No functional change
in this patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s.h   |    2 +-
 arch/powerpc/include/asm/machdep.h      |    6 +--
 arch/powerpc/include/asm/mmu-hash64.h   |   23 ++++----
 arch/powerpc/include/asm/tlbflush.h     |    4 +-
 arch/powerpc/kvm/book3s_32_mmu_host.c   |    8 +--
 arch/powerpc/kvm/book3s_64_mmu_host.c   |   17 +++---
 arch/powerpc/kvm/trace.h                |   14 ++---
 arch/powerpc/mm/hash_native_64.c        |   88 ++++++++++++++++---------------
 arch/powerpc/mm/hash_utils_64.c         |   30 +++++------
 arch/powerpc/mm/hugetlbpage-hash64.c    |   15 +++---
 arch/powerpc/mm/tlb_hash64.c            |   11 ++--
 arch/powerpc/platforms/cell/beat_htab.c |   45 ++++++++--------
 arch/powerpc/platforms/ps3/htab.c       |   22 ++++----
 arch/powerpc/platforms/pseries/lpar.c   |   60 +++++++++++----------
 14 files changed, 177 insertions(+), 168 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a..7aefdb3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -59,7 +59,7 @@ struct hpte_cache {
 	struct hlist_node list_vpte;
 	struct hlist_node list_vpte_long;
 	struct rcu_head rcu_head;
-	u64 host_va;
+	u64 host_vpn;
 	u64 pfn;
 	ulong slot;
 	struct kvmppc_pte pte;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 42ce570..cd63f1a 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -34,19 +34,19 @@ struct machdep_calls {
 	char		*name;
 #ifdef CONFIG_PPC64
 	void            (*hpte_invalidate)(unsigned long slot,
-					   unsigned long va,
+					   unsigned long vpn,
 					   int psize, int ssize,
 					   int local);
 	long		(*hpte_updatepp)(unsigned long slot, 
 					 unsigned long newpp, 
-					 unsigned long va,
+					 unsigned long vpn,
 					 int psize, int ssize,
 					 int local);
 	void            (*hpte_updateboltedpp)(unsigned long newpp, 
 					       unsigned long ea,
 					       int psize, int ssize);
 	long		(*hpte_insert)(unsigned long hpte_group,
-				       unsigned long va,
+				       unsigned long vpn,
 				       unsigned long prpn,
 				       unsigned long rflags,
 				       unsigned long vflags,
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index d3a1139..d848c56 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -226,11 +226,11 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
  * This function sets the AVPN and L fields of the HPTE  appropriately
  * for the page size
  */
-static inline unsigned long hpte_encode_v(unsigned long va, int psize,
-					  int ssize)
+static inline unsigned long hpte_encode_v(unsigned long vpn,
+					  int psize, int ssize)
 {
 	unsigned long v;
-	v = hpte_encode_avpn(va, psize, ssize);
+	v = hpte_encode_avpn(vpn, psize, ssize);
 	if (psize != MMU_PAGE_4K)
 		v |= HPTE_V_LARGE;
 	return v;
@@ -259,8 +259,8 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
 /*
  * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
  */
-static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
-				   int ssize)
+static inline unsigned long hpt_vpn(unsigned long ea,
+				    unsigned long vsid, int ssize)
 {
 	unsigned long mask;
 	int s_shift = segment_shift(ssize);
@@ -272,9 +272,8 @@ static inline unsigned long hpt_vpn(unsigned long ea, unsigned long vsid,
 /*
  * This hashes a virtual address
  */
-
-static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
-				     int ssize)
+static inline unsigned long hpt_hash(unsigned long vpn,
+				     unsigned int shift, int ssize)
 {
 	int mask;
 	unsigned long hash, vsid;
@@ -282,13 +281,13 @@ static inline unsigned long hpt_hash(unsigned long va, unsigned int shift,
 	/* VPN_SHIFT can be atmost 12 */
 	if (ssize == MMU_SEGSIZE_256M) {
 		mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
-		hash = ((va >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
-			(((va & mask) >> (shift - VPN_SHIFT)) & 0xffff);
+		hash = ((vpn >> (SID_SHIFT - VPN_SHIFT)) & 0x0000007fffffffff) ^
+			(((vpn & mask) >> (shift - VPN_SHIFT)) & 0xffff);
 	} else {
 		mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
-		vsid = va >> (SID_SHIFT_1T - VPN_SHIFT);
+		vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
 		hash = (vsid & 0xffffff) ^ ((vsid << 25) & 0x7fffffffff) ^
-			(((va & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
+			(((vpn & mask) >> (shift - VPN_SHIFT)) & 0xfffffff);
 	}
 	return hash & 0x7fffffffffUL;
 }
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 81143fc..fc02d1d 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -95,7 +95,7 @@ struct ppc64_tlb_batch {
 	unsigned long		index;
 	struct mm_struct	*mm;
 	real_pte_t		pte[PPC64_TLB_BATCH_NR];
-	unsigned long		vaddr[PPC64_TLB_BATCH_NR];
+	unsigned long		vpn[PPC64_TLB_BATCH_NR];
 	unsigned int		psize;
 	int			ssize;
 };
@@ -127,7 +127,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
 #define arch_flush_lazy_mmu_mode()      do {} while (0)
 
 
-extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize,
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
 			    int ssize, int local);
 extern void flush_hash_range(unsigned long number, int local);
 
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 45db3b9..00aa612 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -141,7 +141,7 @@ extern char etext[];
 int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 {
 	pfn_t hpaddr;
-	u64 va;
+	u64 vpn;
 	u64 vsid;
 	struct kvmppc_sid_map *map;
 	volatile u32 *pteg;
@@ -173,7 +173,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	BUG_ON(!map);
 
 	vsid = map->host_vsid;
-	va = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
+	vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT)
 
 next_pteg:
 	if (rr == 16) {
@@ -244,11 +244,11 @@ next_pteg:
 	dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n",
 		    orig_pte->may_write ? 'w' : '-',
 		    orig_pte->may_execute ? 'x' : '-',
-		    orig_pte->eaddr, (ulong)pteg, va,
+		    orig_pte->eaddr, (ulong)pteg, vpn,
 		    orig_pte->vpage, hpaddr);
 
 	pte->slot = (ulong)&pteg[rr];
-	pte->host_va = va;
+	pte->host_vpn = vpn;
 	pte->pte = *orig_pte;
 	pte->pfn = hpaddr >> PAGE_SHIFT;
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index d97c65c..4d72f9e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -33,7 +33,7 @@
 
 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
-	ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+	ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
 			       MMU_PAGE_4K, MMU_SEGSIZE_256M,
 			       false);
 }
@@ -80,8 +80,9 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 
 int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 {
+	unsigned long vpn;
 	pfn_t hpaddr;
-	ulong hash, hpteg, va;
+	ulong hash, hpteg;
 	u64 vsid;
 	int ret;
 	int rflags = 0x192;
@@ -117,7 +118,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	}
 
 	vsid = map->host_vsid;
-	va = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+	vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
 
 	if (!orig_pte->may_write)
 		rflags |= HPTE_R_PP;
@@ -129,7 +130,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	else
 		kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT);
 
-	hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+	hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M);
 
 map_again:
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -141,7 +142,8 @@ map_again:
 			goto out;
 		}
 
-	ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M);
+	ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
+				 MMU_PAGE_4K, MMU_SEGSIZE_256M);
 
 	if (ret < 0) {
 		/* If we couldn't map a primary PTE, try a secondary */
@@ -152,7 +154,8 @@ map_again:
 	} else {
 		struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
 
-		trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
+		trace_kvm_book3s_64_mmu_map(rflags, hpteg,
+					    vpn, hpaddr, orig_pte);
 
 		/* The ppc_md code may give us a secondary entry even though we
 		   asked for a primary. Fix up. */
@@ -162,7 +165,7 @@ map_again:
 		}
 
 		pte->slot = hpteg + (ret & 7);
-		pte->host_va = va;
+		pte->host_vpn = vpn;
 		pte->pte = *orig_pte;
 		pte->pfn = hpaddr >> PAGE_SHIFT;
 
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 877186b..ddb6a21 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -189,7 +189,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 	TP_ARGS(pte),
 
 	TP_STRUCT__entry(
-		__field(	u64,		host_va		)
+		__field(	u64,		host_vpn	)
 		__field(	u64,		pfn		)
 		__field(	ulong,		eaddr		)
 		__field(	u64,		vpage		)
@@ -198,7 +198,7 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 	),
 
 	TP_fast_assign(
-		__entry->host_va	= pte->host_va;
+		__entry->host_vpn	= pte->host_vpn;
 		__entry->pfn		= pte->pfn;
 		__entry->eaddr		= pte->pte.eaddr;
 		__entry->vpage		= pte->pte.vpage;
@@ -208,8 +208,8 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 					  (pte->pte.may_execute ? 0x1 : 0);
 	),
 
-	TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-		  __entry->host_va, __entry->pfn, __entry->eaddr,
+	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
 		  __entry->vpage, __entry->raddr, __entry->flags)
 );
 
@@ -218,7 +218,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 	TP_ARGS(pte),
 
 	TP_STRUCT__entry(
-		__field(	u64,		host_va		)
+		__field(	u64,		host_vpn	)
 		__field(	u64,		pfn		)
 		__field(	ulong,		eaddr		)
 		__field(	u64,		vpage		)
@@ -227,7 +227,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 	),
 
 	TP_fast_assign(
-		__entry->host_va	= pte->host_va;
+		__entry->host_vpn	= pte->host_vpn;
 		__entry->pfn		= pte->pfn;
 		__entry->eaddr		= pte->pte.eaddr;
 		__entry->vpage		= pte->pte.vpage;
@@ -238,7 +238,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 	),
 
 	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-		  __entry->host_va, __entry->pfn, __entry->eaddr,
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
 		  __entry->vpage, __entry->raddr, __entry->flags)
 );
 
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index a5c08c3..36b212b 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -114,7 +114,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
 
 }
 
-static inline void tlbie(unsigned long va, int psize, int ssize, int local)
+static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
 {
 	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
 	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -125,10 +125,10 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local)
 		raw_spin_lock(&native_tlbie_lock);
 	asm volatile("ptesync": : :"memory");
 	if (use_local) {
-		__tlbiel(va, psize, ssize);
+		__tlbiel(vpn, psize, ssize);
 		asm volatile("ptesync": : :"memory");
 	} else {
-		__tlbie(va, psize, ssize);
+		__tlbie(vpn, psize, ssize);
 		asm volatile("eieio; tlbsync; ptesync": : :"memory");
 	}
 	if (lock_tlbie && !use_local)
@@ -154,7 +154,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
 	clear_bit_unlock(HPTE_LOCK_BIT, word);
 }
 
-static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
+static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 			unsigned long pa, unsigned long rflags,
 			unsigned long vflags, int psize, int ssize)
 {
@@ -163,9 +163,9 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	int i;
 
 	if (!(vflags & HPTE_V_BOLTED)) {
-		DBG_LOW("    insert(group=%lx, va=%016lx, pa=%016lx,"
+		DBG_LOW("    insert(group=%lx, vpn=%016lx, pa=%016lx,"
 			" rflags=%lx, vflags=%lx, psize=%d)\n",
-			hpte_group, va, pa, rflags, vflags, psize);
+			hpte_group, vpn, pa, rflags, vflags, psize);
 	}
 
 	for (i = 0; i < HPTES_PER_GROUP; i++) {
@@ -183,7 +183,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va,
 	if (i == HPTES_PER_GROUP)
 		return -1;
 
-	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+	hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED)) {
@@ -245,17 +245,17 @@ static long native_hpte_remove(unsigned long hpte_group)
 }
 
 static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
-				 unsigned long va, int psize, int ssize,
+				 unsigned long vpn, int psize, int ssize,
 				 int local)
 {
 	struct hash_pte *hptep = htab_address + slot;
 	unsigned long hpte_v, want_v;
 	int ret = 0;
 
-	want_v = hpte_encode_v(va, psize, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 
-	DBG_LOW("    update(va=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
-		va, want_v & HPTE_V_AVPN, slot, newpp);
+	DBG_LOW("    update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
+		vpn, want_v & HPTE_V_AVPN, slot, newpp);
 
 	native_lock_hpte(hptep);
 
@@ -274,12 +274,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	native_unlock_hpte(hptep);
 
 	/* Ensure it is out of the tlb too. */
-	tlbie(va, psize, ssize, local);
+	tlbie(vpn, psize, ssize, local);
 
 	return ret;
 }
 
-static long native_hpte_find(unsigned long va, int psize, int ssize)
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
 {
 	struct hash_pte *hptep;
 	unsigned long hash;
@@ -287,8 +287,8 @@ static long native_hpte_find(unsigned long va, int psize, int ssize)
 	long slot;
 	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
-	want_v = hpte_encode_v(va, psize, ssize);
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 
 	/* Bolted mappings are only ever in the primary group */
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -315,14 +315,15 @@ static long native_hpte_find(unsigned long va, int psize, int ssize)
 static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 				       int psize, int ssize)
 {
-	unsigned long vsid, va;
+	unsigned long vpn;
+	unsigned long vsid;
 	long slot;
 	struct hash_pte *hptep;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
-	slot = native_hpte_find(va, psize, ssize);
+	slot = native_hpte_find(vpn, psize, ssize);
 	if (slot == -1)
 		panic("could not find page to bolt\n");
 	hptep = htab_address + slot;
@@ -332,10 +333,10 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 		(newpp & (HPTE_R_PP | HPTE_R_N));
 
 	/* Ensure it is out of the tlb too. */
-	tlbie(va, psize, ssize, 0);
+	tlbie(vpn, psize, ssize, 0);
 }
 
-static void native_hpte_invalidate(unsigned long slot, unsigned long va,
+static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 				   int psize, int ssize, int local)
 {
 	struct hash_pte *hptep = htab_address + slot;
@@ -345,9 +346,9 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	local_irq_save(flags);
 
-	DBG_LOW("    invalidate(va=%016lx, hash: %lx)\n", va, slot);
+	DBG_LOW("    invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
 
-	want_v = hpte_encode_v(va, psize, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 	native_lock_hpte(hptep);
 	hpte_v = hptep->v;
 
@@ -359,7 +360,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 		hptep->v = 0;
 
 	/* Invalidate the TLB */
-	tlbie(va, psize, ssize, local);
+	tlbie(vpn, psize, ssize, local);
 
 	local_irq_restore(flags);
 }
@@ -369,7 +370,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
 #define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
 
 static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
-			int *psize, int *ssize, unsigned long *va)
+			int *psize, int *ssize, unsigned long *vpn)
 {
 	unsigned long avpn, pteg, vpi;
 	unsigned long hpte_r = hpte->r;
@@ -419,7 +420,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -428,9 +429,9 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask;
 			seg_off |= vpi << shift;
 		}
-		*va = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
 	default:
-		*va = size = 0;
+		*vpn = size = 0;
 	}
 	*psize = size;
 }
@@ -445,7 +446,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
  */
 static void native_hpte_clear(void)
 {
-	unsigned long va = 0;
+	unsigned long vpn = 0;
 	unsigned long slot, slots, flags;
 	struct hash_pte *hptep = htab_address;
 	unsigned long hpte_v;
@@ -476,9 +477,9 @@ static void native_hpte_clear(void)
 		 * already hold the native_tlbie_lock.
 		 */
 		if (hpte_v & HPTE_V_VALID) {
-			hpte_decode(hptep, slot, &psize, &ssize, &va);
+			hpte_decode(hptep, slot, &psize, &ssize, &vpn);
 			hptep->v = 0;
-			__tlbie(va, psize, ssize);
+			__tlbie(vpn, psize, ssize);
 		}
 	}
 
@@ -493,7 +494,8 @@ static void native_hpte_clear(void)
  */
 static void native_flush_hash_range(unsigned long number, int local)
 {
-	unsigned long va, hash, index, hidx, shift, slot;
+	unsigned long vpn;
+	unsigned long hash, index, hidx, shift, slot;
 	struct hash_pte *hptep;
 	unsigned long hpte_v;
 	unsigned long want_v;
@@ -507,18 +509,18 @@ static void native_flush_hash_range(unsigned long number, int local)
 	local_irq_save(flags);
 
 	for (i = 0; i < number; i++) {
-		va = batch->vaddr[i];
+		vpn = batch->vpn[i];
 		pte = batch->pte[i];
 
-		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
-			hash = hpt_hash(va, shift, ssize);
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			hash = hpt_hash(vpn, shift, ssize);
 			hidx = __rpte_to_hidx(pte, index);
 			if (hidx & _PTEIDX_SECONDARY)
 				hash = ~hash;
 			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 			slot += hidx & _PTEIDX_GROUP_IX;
 			hptep = htab_address + slot;
-			want_v = hpte_encode_v(va, psize, ssize);
+			want_v = hpte_encode_v(vpn, psize, ssize);
 			native_lock_hpte(hptep);
 			hpte_v = hptep->v;
 			if (!HPTE_V_COMPARE(hpte_v, want_v) ||
@@ -533,12 +535,12 @@ static void native_flush_hash_range(unsigned long number, int local)
 	    mmu_psize_defs[psize].tlbiel && local) {
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
-			va = batch->vaddr[i];
+			vpn = batch->vpn[i];
 			pte = batch->pte[i];
 
-			pte_iterate_hashed_subpages(pte, psize, va, index,
-						    shift) {
-				__tlbiel(va, psize, ssize);
+			pte_iterate_hashed_subpages(pte, psize,
+						    vpn, index, shift) {
+				__tlbiel(vpn, psize, ssize);
 			} pte_iterate_hashed_end();
 		}
 		asm volatile("ptesync":::"memory");
@@ -550,12 +552,12 @@ static void native_flush_hash_range(unsigned long number, int local)
 
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
-			va = batch->vaddr[i];
+			vpn = batch->vpn[i];
 			pte = batch->pte[i];
 
-			pte_iterate_hashed_subpages(pte, psize, va, index,
-						    shift) {
-				__tlbie(va, psize, ssize);
+			pte_iterate_hashed_subpages(pte, psize,
+						    vpn, index, shift) {
+				__tlbie(vpn, psize, ssize);
 			} pte_iterate_hashed_end();
 		}
 		asm volatile("eieio; tlbsync; ptesync":::"memory");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 975c7d1..74c5479 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -192,18 +192,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 	     vaddr += step, paddr += step) {
 		unsigned long hash, hpteg;
 		unsigned long vsid = get_kernel_vsid(vaddr, ssize);
-		unsigned long va  = hpt_vpn(vaddr, vsid, ssize);
+		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
 		/* Make kernel text executable */
 		if (overlaps_kernel_text(vaddr, vaddr + step))
 			tprot &= ~HPTE_R_N;
 
-		hash = hpt_hash(va, shift, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
 		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
 		BUG_ON(!ppc_md.hpte_insert);
-		ret = ppc_md.hpte_insert(hpteg, va, paddr, tprot,
+		ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
 					 HPTE_V_BOLTED, psize, ssize);
 
 		if (ret < 0)
@@ -1153,21 +1153,21 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 /* WARNING: This is called from hash_low_64.S, if you change this prototype,
  *          do not forget to update the assembly call site !
  */
-void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize,
+void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
 		     int local)
 {
 	unsigned long hash, index, shift, hidx, slot;
 
-	DBG_LOW("flush_hash_page(va=%016lx)\n", va);
-	pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
-		hash = hpt_hash(va, shift, ssize);
+	DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
+	pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+		hash = hpt_hash(vpn, shift, ssize);
 		hidx = __rpte_to_hidx(pte, index);
 		if (hidx & _PTEIDX_SECONDARY)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += hidx & _PTEIDX_GROUP_IX;
 		DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
-		ppc_md.hpte_invalidate(slot, va, psize, ssize, local);
+		ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local);
 	} pte_iterate_hashed_end();
 }
 
@@ -1181,7 +1181,7 @@ void flush_hash_range(unsigned long number, int local)
 			&__get_cpu_var(ppc64_tlb_batch);
 
 		for (i = 0; i < number; i++)
-			flush_hash_page(batch->vaddr[i], batch->pte[i],
+			flush_hash_page(batch->vpn[i], batch->pte[i],
 					batch->psize, batch->ssize, local);
 	}
 }
@@ -1208,14 +1208,14 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hpteg;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 	unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
 	int ret;
 
-	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
+	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
-	ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr),
+	ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr),
 				 mode, HPTE_V_BOLTED,
 				 mmu_linear_psize, mmu_kernel_ssize);
 	BUG_ON (ret < 0);
@@ -1229,9 +1229,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash, hidx, slot;
 	unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
-	unsigned long va = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+	unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
 
-	hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize);
+	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
 	spin_lock(&linear_map_hash_lock);
 	BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
 	hidx = linear_map_hash_slots[lmi] & 0x7f;
@@ -1241,7 +1241,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 		hash = ~hash;
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 	slot += hidx & _PTEIDX_GROUP_IX;
-	ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, mmu_kernel_ssize, 0);
+	ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0);
 }
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 1331403..cecad34 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -18,14 +18,15 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, int local, int ssize,
 		     unsigned int shift, unsigned int mmu_psize)
 {
+	unsigned long vpn;
 	unsigned long old_pte, new_pte;
-	unsigned long va, rflags, pa, sz;
+	unsigned long rflags, pa, sz;
 	long slot;
 
 	BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
 	/* Search the Linux page table for a match with va */
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
 	/* At this point, we have a pte (old_pte) which can be used to build
 	 * or update an HPTE. There are 2 cases:
@@ -69,19 +70,19 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		/* There MIGHT be an HPTE for this pte */
 		unsigned long hash, slot;
 
-		hash = hpt_hash(va, shift, ssize);
+		hash = hpt_hash(vpn, shift, ssize);
 		if (old_pte & _PAGE_F_SECOND)
 			hash = ~hash;
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 		slot += (old_pte & _PAGE_F_GIX) >> 12;
 
-		if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
+		if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
 					 ssize, local) == -1)
 			old_pte &= ~_PAGE_HPTEFLAGS;
 	}
 
 	if (likely(!(old_pte & _PAGE_HASHPTE))) {
-		unsigned long hash = hpt_hash(va, shift, ssize);
+		unsigned long hash = hpt_hash(vpn, shift, ssize);
 		unsigned long hpte_group;
 
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
@@ -101,14 +102,14 @@ repeat:
 				      _PAGE_COHERENT | _PAGE_GUARDED));
 
 		/* Insert into the hash table, primary slot */
-		slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
+		slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
 					  mmu_psize, ssize);
 
 		/* Primary is full, try the secondary */
 		if (unlikely(slot == -1)) {
 			hpte_group = ((~hash & htab_hash_mask) *
 				      HPTES_PER_GROUP) & ~0x7UL;
-			slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
+			slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
 						  HPTE_V_SECONDARY,
 						  mmu_psize, ssize);
 			if (slot == -1) {
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 321c585..ae758b3 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -42,8 +42,9 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, unsigned long pte, int huge)
 {
+	unsigned long vpn;
 	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
-	unsigned long vsid, vaddr;
+	unsigned long vsid;
 	unsigned int psize;
 	int ssize;
 	real_pte_t rpte;
@@ -86,7 +87,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
 	}
-	vaddr = hpt_vpn(addr, vsid, ssize);
+	vpn = hpt_vpn(addr, vsid, ssize);
 	rpte = __real_pte(__pte(pte), ptep);
 
 	/*
@@ -96,7 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	 * and decide to use local invalidates instead...
 	 */
 	if (!batch->active) {
-		flush_hash_page(vaddr, rpte, psize, ssize, 0);
+		flush_hash_page(vpn, rpte, psize, ssize, 0);
 		put_cpu_var(ppc64_tlb_batch);
 		return;
 	}
@@ -122,7 +123,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 		batch->ssize = ssize;
 	}
 	batch->pte[i] = rpte;
-	batch->vaddr[i] = vaddr;
+	batch->vpn[i] = vpn;
 	batch->index = ++i;
 	if (i >= PPC64_TLB_BATCH_NR)
 		__flush_tlb_pending(batch);
@@ -146,7 +147,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
 	if (cpumask_equal(mm_cpumask(batch->mm), tmp))
 		local = 1;
 	if (i == 1)
-		flush_hash_page(batch->vaddr[0], batch->pte[0],
+		flush_hash_page(batch->vpn[0], batch->pte[0],
 				batch->psize, batch->ssize, local);
 	else
 		flush_hash_range(i, local);
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index c8c7bf6..0f6f839 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -88,7 +88,7 @@ static inline unsigned int beat_read_mask(unsigned hpte_group)
 }
 
 static long beat_lpar_hpte_insert(unsigned long hpte_group,
-				  unsigned long va, unsigned long pa,
+				  unsigned long vpn, unsigned long pa,
 				  unsigned long rflags, unsigned long vflags,
 				  int psize, int ssize)
 {
@@ -103,7 +103,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
 			"rflags=%lx, vflags=%lx, psize=%d)\n",
 		hpte_group, va, pa, rflags, vflags, psize);
 
-	hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+	hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
 		vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
@@ -184,14 +184,14 @@ static void beat_lpar_hptab_clear(void)
  */
 static long beat_lpar_hpte_updatepp(unsigned long slot,
 				    unsigned long newpp,
-				    unsigned long va,
+				    unsigned long vpn,
 				    int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
 	u64 dummy0, dummy1;
 	unsigned long want_v;
 
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 
 	DBG_LOW("    update: "
 		"avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
@@ -220,15 +220,15 @@ static long beat_lpar_hpte_updatepp(unsigned long slot,
 	return 0;
 }
 
-static long beat_lpar_hpte_find(unsigned long va, int psize)
+static long beat_lpar_hpte_find(unsigned long vpn, int psize)
 {
 	unsigned long hash;
 	unsigned long i, j;
 	long slot;
 	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(va, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 
 	for (j = 0; j < 2; j++) {
 		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -255,14 +255,15 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 					  unsigned long ea,
 					  int psize, int ssize)
 {
-	unsigned long lpar_rc, slot, vsid, va;
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid;
 	u64 dummy0, dummy1;
 
 	vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
-	va = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
+	vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
 
 	raw_spin_lock(&beat_htab_lock);
-	slot = beat_lpar_hpte_find(va, psize);
+	slot = beat_lpar_hpte_find(vpn, psize);
 	BUG_ON(slot == -1);
 
 	lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
@@ -272,7 +273,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 	BUG_ON(lpar_rc != 0);
 }
 
-static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 					 int psize, int ssize, int local)
 {
 	unsigned long want_v;
@@ -282,7 +283,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 
 	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
 		slot, va, psize, local);
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 
 	raw_spin_lock_irqsave(&beat_htab_lock, flags);
 	dummy1 = beat_lpar_hpte_getword0(slot);
@@ -311,7 +312,7 @@ void __init hpte_init_beat(void)
 }
 
 static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
-				  unsigned long va, unsigned long pa,
+				  unsigned long vpn, unsigned long pa,
 				  unsigned long rflags, unsigned long vflags,
 				  int psize, int ssize)
 {
@@ -322,11 +323,11 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
 		return -1;
 
 	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+		DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
 			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, va, pa, rflags, vflags, psize);
+		hpte_group, vpn, pa, rflags, vflags, psize);
 
-	hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) |
+	hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
 		vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
@@ -364,14 +365,14 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
  */
 static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 				    unsigned long newpp,
-				    unsigned long va,
+				    unsigned long vpn,
 				    int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
 	unsigned long want_v;
 	unsigned long pss;
 
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
 
 	DBG_LOW("    update: "
@@ -392,16 +393,16 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 	return 0;
 }
 
-static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long va,
+static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
 					 int psize, int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
 	unsigned long pss;
 
-	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
-		slot, va, psize, local);
-	want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M);
+	DBG_LOW("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		slot, vpn, psize, local);
+	want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M);
 	pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
 
 	lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 3124cf7..d00d7b0 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -43,7 +43,7 @@ enum ps3_lpar_vas_id {
 
 static DEFINE_SPINLOCK(ps3_htab_lock);
 
-static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
+static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 	unsigned long pa, unsigned long rflags, unsigned long vflags,
 	int psize, int ssize)
 {
@@ -61,7 +61,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
 	 */
 	vflags &= ~HPTE_V_SECONDARY;
 
-	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+	hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize) | rflags;
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
@@ -75,8 +75,8 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va,
 
 	if (result) {
 		/* all entries bolted !*/
-		pr_info("%s:result=%d va=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
-			__func__, result, va, pa, hpte_group, hpte_v, hpte_r);
+		pr_info("%s:result=%d vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
+			__func__, result, vpn, pa, hpte_group, hpte_v, hpte_r);
 		BUG();
 	}
 
@@ -107,7 +107,7 @@ static long ps3_hpte_remove(unsigned long hpte_group)
 }
 
 static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
-	unsigned long va, int psize, int ssize, int local)
+	unsigned long vpn, int psize, int ssize, int local)
 {
 	int result;
 	u64 hpte_v, want_v, hpte_rs;
@@ -115,7 +115,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
 	unsigned long flags;
 	long ret;
 
-	want_v = hpte_encode_v(va, psize, ssize);
+	want_v = hpte_encode_v(vpn, psize, ssize);
 
 	spin_lock_irqsave(&ps3_htab_lock, flags);
 
@@ -125,8 +125,8 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
 				       &hpte_rs);
 
 	if (result) {
-		pr_info("%s: res=%d read va=%lx slot=%lx psize=%d\n",
-			__func__, result, va, slot, psize);
+		pr_info("%s: res=%d read vpn=%lx slot=%lx psize=%d\n",
+			__func__, result, vpn, slot, psize);
 		BUG();
 	}
 
@@ -159,7 +159,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 	panic("ps3_hpte_updateboltedpp() not implemented");
 }
 
-static void ps3_hpte_invalidate(unsigned long slot, unsigned long va,
+static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	int psize, int ssize, int local)
 {
 	unsigned long flags;
@@ -170,8 +170,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long va,
 	result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0);
 
 	if (result) {
-		pr_info("%s: res=%d va=%lx slot=%lx psize=%d\n",
-			__func__, result, va, slot, psize);
+		pr_info("%s: res=%d vpn=%lx slot=%lx psize=%d\n",
+			__func__, result, vpn, slot, psize);
 		BUG();
 	}
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 2127529..8308b25 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -108,9 +108,9 @@ void vpa_init(int cpu)
 }
 
 static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
- 			      unsigned long va, unsigned long pa,
- 			      unsigned long rflags, unsigned long vflags,
-			      int psize, int ssize)
+				     unsigned long vpn, unsigned long pa,
+				     unsigned long rflags, unsigned long vflags,
+				     int psize, int ssize)
 {
 	unsigned long lpar_rc;
 	unsigned long flags;
@@ -118,11 +118,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	unsigned long hpte_v, hpte_r;
 
 	if (!(vflags & HPTE_V_BOLTED))
-		pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
-			 "rflags=%lx, vflags=%lx, psize=%d)\n",
-			 hpte_group, va, pa, rflags, vflags, psize);
+		pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+			 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+			 hpte_group, vpn,  pa, rflags, vflags, psize);
 
-	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
+	hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED))
@@ -234,14 +234,14 @@ static void pSeries_lpar_hptab_clear(void)
  */
 static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 				       unsigned long newpp,
-				       unsigned long va,
+				       unsigned long vpn,
 				       int psize, int ssize, int local)
 {
 	unsigned long lpar_rc;
 	unsigned long flags = (newpp & 7) | H_AVPN;
 	unsigned long want_v;
 
-	want_v = hpte_encode_avpn(va, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
 		 want_v, slot, flags, psize);
@@ -279,15 +279,15 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
 	return dword0;
 }
 
-static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize)
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
 {
 	unsigned long hash;
 	unsigned long i;
 	long slot;
 	unsigned long want_v, hpte_v;
 
-	hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize);
-	want_v = hpte_encode_avpn(va, psize, ssize);
+	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	/* Bolted entries are always in the primary group */
 	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -307,12 +307,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 					     unsigned long ea,
 					     int psize, int ssize)
 {
-	unsigned long lpar_rc, slot, vsid, va, flags;
+	unsigned long vpn;
+	unsigned long lpar_rc, slot, vsid, flags;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
-	slot = pSeries_lpar_hpte_find(va, psize, ssize);
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
 	BUG_ON(slot == -1);
 
 	flags = newpp & 7;
@@ -321,17 +322,17 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 	BUG_ON(lpar_rc != H_SUCCESS);
 }
 
-static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 					 int psize, int ssize, int local)
 {
 	unsigned long want_v;
 	unsigned long lpar_rc;
 	unsigned long dummy1, dummy2;
 
-	pr_devel("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
-		 slot, va, psize, local);
+	pr_devel("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+		 slot, vpn, psize, local);
 
-	want_v = hpte_encode_avpn(va, psize, ssize);
+	want_v = hpte_encode_avpn(vpn, psize, ssize);
 	lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
 	if (lpar_rc == H_NOT_FOUND)
 		return;
@@ -342,15 +343,16 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 					   int psize, int ssize)
 {
-	unsigned long slot, vsid, va;
+	unsigned long vpn;
+	unsigned long slot, vsid;
 
 	vsid = get_kernel_vsid(ea, ssize);
-	va = hpt_vpn(ea, vsid, ssize);
+	vpn = hpt_vpn(ea, vsid, ssize);
 
-	slot = pSeries_lpar_hpte_find(va, psize, ssize);
+	slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
 	BUG_ON(slot == -1);
 
-	pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0);
+	pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
 }
 
 /* Flag bits for H_BULK_REMOVE */
@@ -366,12 +368,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
  */
 static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 {
+	unsigned long vpn;
 	unsigned long i, pix, rc;
 	unsigned long flags = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 	unsigned long param[9];
-	unsigned long va;
 	unsigned long hash, index, shift, hidx, slot;
 	real_pte_t pte;
 	int psize, ssize;
@@ -383,21 +385,21 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
 	ssize = batch->ssize;
 	pix = 0;
 	for (i = 0; i < number; i++) {
-		va = batch->vaddr[i];
+		vpn = batch->vpn[i];
 		pte = batch->pte[i];
-		pte_iterate_hashed_subpages(pte, psize, va, index, shift) {
-			hash = hpt_hash(va, shift, ssize);
+		pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+			hash = hpt_hash(vpn, shift, ssize);
 			hidx = __rpte_to_hidx(pte, index);
 			if (hidx & _PTEIDX_SECONDARY)
 				hash = ~hash;
 			slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 			slot += hidx & _PTEIDX_GROUP_IX;
 			if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
-				pSeries_lpar_hpte_invalidate(slot, va, psize,
+				pSeries_lpar_hpte_invalidate(slot, vpn, psize,
 							     ssize, local);
 			} else {
 				param[pix] = HBR_REQUEST | HBR_AVPN | slot;
-				param[pix+1] = hpte_encode_avpn(va, psize,
+				param[pix+1] = hpte_encode_avpn(vpn, psize,
 								ssize);
 				pix += 2;
 				if (pix == 8) {
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 06/12] arch/powerpc: Make KERN_VIRT_SIZE not dependend on PGTABLE_RANGE
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

As we keep increasing PGTABLE_RANGE we need not increase the virual
map area for kernel.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgtable-ppc64.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index c420561..8af1cf2 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -41,7 +41,7 @@
 #else
 #define KERN_VIRT_START ASM_CONST(0xD000000000000000)
 #endif
-#define KERN_VIRT_SIZE	PGTABLE_RANGE
+#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
 
 /*
  * The vmalloc space starts at the beginning of that region, and
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 07/12] arch/powerpc: Increase the slice range to 64TB
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch makes the high psizes mask as an unsigned char array
so that we can have more than 16TB. Currently we support upto
64TB

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h |    6 +-
 arch/powerpc/include/asm/page_64.h    |    6 +-
 arch/powerpc/mm/hash_utils_64.c       |   15 +++--
 arch/powerpc/mm/slb_low.S             |   30 ++++++---
 arch/powerpc/mm/slice.c               |  107 +++++++++++++++++++++------------
 5 files changed, 109 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index d848c56..8f05eec 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -464,7 +464,11 @@ typedef struct {
 
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;	/* SLB page size encodings */
-	u64 high_slices_psize;  /* 4 bits per slice for now */
+	/*
+	 * Right now we support 64TB and 4 bits for each
+	 * 1TB slice we need 32 bytes for 64TB.
+	 */
+	unsigned char high_slices_psize[32];  /* 4 bits per slice for now */
 #else
 	u16 sllp;		/* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index fed85e6..6c9bef4 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -82,7 +82,11 @@ extern u64 ppc64_pft_size;
 
 struct slice_mask {
 	u16 low_slices;
-	u16 high_slices;
+	/*
+	 * This should be derived out of PGTABLE_RANGE. For the current
+	 * max 64TB, u64 should be ok.
+	 */
+	u64 high_slices;
 };
 
 struct mm_struct;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 74c5479..13e0ccf 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 #ifdef CONFIG_PPC_MM_SLICES
 unsigned int get_paca_psize(unsigned long addr)
 {
-	unsigned long index, slices;
+	u64 lpsizes;
+	unsigned char *hpsizes;
+	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		slices = get_paca()->context.low_slices_psize;
+		lpsizes = get_paca()->context.low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-	} else {
-		slices = get_paca()->context.high_slices_psize;
-		index = GET_HIGH_SLICE_INDEX(addr);
+		return (lpsizes >> (index * 4)) & 0xF;
 	}
-	return (slices >> (index * 4)) & 0xF;
+	hpsizes = get_paca()->context.high_slices_psize;
+	index = GET_HIGH_SLICE_INDEX(addr);
+	mask_index = index & 0x1;
+	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
 #else
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b9ee79ce..e132dc6 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -108,17 +108,31 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	 * between 4k and 64k standard page size
 	 */
 #ifdef CONFIG_PPC_MM_SLICES
+	/* r10 have esid */
 	cmpldi	r10,16
-
-	/* Get the slice index * 4 in r11 and matching slice size mask in r9 */
-	ld	r9,PACALOWSLICESPSIZE(r13)
-	sldi	r11,r10,2
+	/* below SLICE_LOW_TOP */
 	blt	5f
-	ld	r9,PACAHIGHSLICEPSIZE(r13)
-	srdi	r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
-	andi.	r11,r11,0x3c
+	/*
+	 * Handle hpsizes,
+	 * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
+	 */
+	srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
+	addi	r9,r11,PACAHIGHSLICEPSIZE
+	lbzx	r9,r13,r9		/* r9 is hpsizes[r11] */
+	/* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
+	rldicl	r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
+	b	6f
 
-5:	/* Extract the psize and multiply to get an array offset */
+5:
+	/*
+	 * Handle lpsizes
+	 * r9 is get_paca()->context.low_slices_psize, r11 is index
+	 */
+	ld	r9,PACALOWSLICESPSIZE(r13)
+	mr	r11,r10
+6:
+	sldi	r11,r11,2  /* index * 4 */
+	/* Extract the psize and multiply to get an array offset */
 	srd	r9,r9,r11
 	andi.	r9,r9,0xf
 	mulli	r9,r9,MMUPSIZEDEFSIZE
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 73709f7..b4e996a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -42,7 +42,7 @@ int _slice_debug = 1;
 
 static void slice_print_mask(const char *label, struct slice_mask mask)
 {
-	char	*p, buf[16 + 3 + 16 + 1];
+	char	*p, buf[16 + 3 + 64 + 1];
 	int	i;
 
 	if (!_slice_debug)
@@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask)
 	*(p++) = '-';
 	*(p++) = ' ';
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		*(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+		*(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
 	*(p++) = 0;
 
 	printk(KERN_DEBUG "%s:%s\n", label, buf);
@@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start,
 	}
 
 	if ((start + len) > SLICE_LOW_TOP)
-		ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
-			- (1u << GET_HIGH_SLICE_INDEX(start));
+		ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
+			- (1ul << GET_HIGH_SLICE_INDEX(start));
 
 	return ret;
 }
@@ -135,26 +135,31 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
 
 	for (i = 0; i < SLICE_NUM_HIGH; i++)
 		if (!slice_high_has_vma(mm, i))
-			ret.high_slices |= 1u << i;
+			ret.high_slices |= 1ul << i;
 
 	return ret;
 }
 
 static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
 {
+	unsigned char *hpsizes;
+	int index, mask_index;
 	struct slice_mask ret = { 0, 0 };
 	unsigned long i;
-	u64 psizes;
+	u64 lpsizes;
 
-	psizes = mm->context.low_slices_psize;
+	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (((psizes >> (i * 4)) & 0xf) == psize)
+		if (((lpsizes >> (i * 4)) & 0xf) == psize)
 			ret.low_slices |= 1u << i;
 
-	psizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		if (((psizes >> (i * 4)) & 0xf) == psize)
-			ret.high_slices |= 1u << i;
+	hpsizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
+			ret.high_slices |= 1ul << i;
+	}
 
 	return ret;
 }
@@ -183,8 +188,10 @@ static void slice_flush_segments(void *parm)
 
 static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
 {
+	int index, mask_index;
 	/* Write the new slice psize bits */
-	u64 lpsizes, hpsizes;
+	unsigned char *hpsizes;
+	u64 lpsizes;
 	unsigned long i, flags;
 
 	slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
@@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
 				(((unsigned long)psize) << (i * 4));
 
-	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		if (mask.high_slices & (1u << i))
-			hpsizes = (hpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
-
+	/* Assign the value back */
 	mm->context.low_slices_psize = lpsizes;
-	mm->context.high_slices_psize = hpsizes;
+
+	hpsizes = mm->context.high_slices_psize;
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (mask.high_slices & (1ul << i))
+			hpsizes[index] = (hpsizes[index] &
+					  ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
 		  mm->context.low_slices_psize,
@@ -587,18 +598,19 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
-	u64 psizes;
-	int index;
+	unsigned char *hpsizes;
+	int index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		psizes = mm->context.low_slices_psize;
+		u64 lpsizes;
+		lpsizes = mm->context.low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-	} else {
-		psizes = mm->context.high_slices_psize;
-		index = GET_HIGH_SLICE_INDEX(addr);
+		return (lpsizes >> (index * 4)) & 0xf;
 	}
-
-	return (psizes >> (index * 4)) & 0xf;
+	hpsizes = mm->context.high_slices_psize;
+	index = GET_HIGH_SLICE_INDEX(addr);
+	mask_index = index & 0x1;
+	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
 }
 EXPORT_SYMBOL_GPL(get_slice_psize);
 
@@ -618,7 +630,9 @@ EXPORT_SYMBOL_GPL(get_slice_psize);
  */
 void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 {
-	unsigned long flags, lpsizes, hpsizes;
+	int index, mask_index;
+	unsigned char *hpsizes;
+	unsigned long flags, lpsizes;
 	unsigned int old_psize;
 	int i;
 
@@ -639,15 +653,21 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 		if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
 			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
 				(((unsigned long)psize) << (i * 4));
+	/* Assign the value back */
+	mm->context.low_slices_psize = lpsizes;
 
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++)
-		if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
-			hpsizes = (hpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
+	for (i = 0; i < SLICE_NUM_HIGH; i++) {
+		mask_index = i & 0x1;
+		index = i >> 1;
+		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
+			hpsizes[index] = (hpsizes[index] &
+					  ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
+
+
 
-	mm->context.low_slices_psize = lpsizes;
-	mm->context.high_slices_psize = hpsizes;
 
 	slice_dbg(" lsps=%lx, hsps=%lx\n",
 		  mm->context.low_slices_psize,
@@ -660,18 +680,27 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
 void slice_set_psize(struct mm_struct *mm, unsigned long address,
 		     unsigned int psize)
 {
+	unsigned char *hpsizes;
 	unsigned long i, flags;
-	u64 *p;
+	u64 *lpsizes;
 
 	spin_lock_irqsave(&slice_convert_lock, flags);
 	if (address < SLICE_LOW_TOP) {
 		i = GET_LOW_SLICE_INDEX(address);
-		p = &mm->context.low_slices_psize;
+		lpsizes = &mm->context.low_slices_psize;
+		*lpsizes = (*lpsizes & ~(0xful << (i * 4))) |
+			((unsigned long) psize << (i * 4));
 	} else {
+		int index, mask_index;
 		i = GET_HIGH_SLICE_INDEX(address);
-		p = &mm->context.high_slices_psize;
+		hpsizes = mm->context.high_slices_psize;
+		mask_index = i & 0x1;
+		index = i >> 1;
+		hpsizes[index] = (hpsizes[index] &
+				  ~(0xf << (mask_index * 4))) |
+			(((unsigned long)psize) << (mask_index * 4));
 	}
-	*p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4));
+
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
 
 #ifdef CONFIG_SPU_BASE
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 08/12] arch/powerpc: Make some of the PGTABLE_RANGE dependency explicit
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

slice array size and slice mask size depend on PGTABLE_RANGE. We
can't directly include pgtable.h in these header because there is
a circular dependency. So add compile time check for these values.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h    |   13 ++++++++-----
 arch/powerpc/include/asm/page_64.h       |   16 ++++++++++++----
 arch/powerpc/include/asm/pgtable-ppc64.h |    8 ++++++++
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 8f05eec..8c5c5a4 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -420,6 +420,13 @@ extern void slb_set_size(u16 size);
 	srdi	rx,rx,VSID_BITS_##size;	/* extract 2^VSID_BITS bit */	\
 	add	rt,rt,rx
 
+/* 4 bits per slice and we have one slice per 1TB */
+#if 0 /* We can't directly include pgtable.h hence this hack */
+#define SLICE_ARRAY_SIZE  (PGTABLE_RANGE >> 41)
+#else
+/* Right now we only support 64TB */
+#define SLICE_ARRAY_SIZE  32
+#endif
 
 #ifndef __ASSEMBLY__
 
@@ -464,11 +471,7 @@ typedef struct {
 
 #ifdef CONFIG_PPC_MM_SLICES
 	u64 low_slices_psize;	/* SLB page size encodings */
-	/*
-	 * Right now we support 64TB and 4 bits for each
-	 * 1TB slice we need 32 bytes for 64TB.
-	 */
-	unsigned char high_slices_psize[32];  /* 4 bits per slice for now */
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
 #else
 	u16 sllp;		/* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 6c9bef4..b55beb4 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -78,14 +78,22 @@ extern u64 ppc64_pft_size;
 #define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
 #define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
 
+/* 1 bit per slice and we have one slice per 1TB */
+#if 0 /* We can't directly include pgtable.h hence this hack */
+#define SLICE_MASK_SIZE (PGTABLE_RANGE >> 43)
+#else
+/*
+ * Right now we support only 64TB.
+ * IF we change this we will have to change the type
+ * of high_slices
+ */
+#define SLICE_MASK_SIZE 8
+#endif
+
 #ifndef __ASSEMBLY__
 
 struct slice_mask {
 	u16 low_slices;
-	/*
-	 * This should be derived out of PGTABLE_RANGE. For the current
-	 * max 64TB, u64 should be ok.
-	 */
 	u64 high_slices;
 };
 
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 8af1cf2..dea953f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -32,6 +32,14 @@
 #endif
 #endif
 
+#if (PGTABLE_RANGE >> 41) > SLICE_ARRAY_SIZE
+#error PGTABLE_RANGE exceeds SLICE_ARRAY_SIZE
+#endif
+
+#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
+#error PGTABLE_RANGE exceeds slice_mask high_slices size
+#endif
+
 /*
  * Define the address range of the kernel non-linear virtual area
  */
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 10/12] arch/powerpc: Use 32bit array for slb cache
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

With larger vsid we need to track more bits of ESID in slb cache
for slb invalidate.

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/paca.h |    2 +-
 arch/powerpc/mm/slb_low.S       |    8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index daf813f..3e7abba 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -100,7 +100,7 @@ struct paca_struct {
 	/* SLB related definitions */
 	u16 vmalloc_sllp;
 	u16 slb_cache_ptr;
-	u16 slb_cache[SLB_CACHE_ENTRIES];
+	u32 slb_cache[SLB_CACHE_ENTRIES];
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
 #ifdef CONFIG_PPC_BOOK3E
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 3b75f19..f6a2625 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -270,10 +270,10 @@ _GLOBAL(slb_compare_rr_to_size)
 	bge	1f
 
 	/* still room in the slb cache */
-	sldi	r11,r3,1		/* r11 = offset * sizeof(u16) */
-	rldicl	r10,r10,36,28		/* get low 16 bits of the ESID */
-	add	r11,r11,r13		/* r11 = (u16 *)paca + offset */
-	sth	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
+	sldi	r11,r3,2		/* r11 = offset * sizeof(u32) */
+	srdi    r10,r10,28		/* get the 36 bits of the ESID */
+	add	r11,r11,r13		/* r11 = (u32 *)paca + offset */
+	stw	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
 	addi	r3,r3,1			/* offset++ */
 	b	2f
 1:					/* offset >= SLB_CACHE_ENTRIES */
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 12/12] arch/powerpc: Update VSID allocation documentation
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This update the proto-VSID and VSID scramble related information
to be more generic by using names instead of current values.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h |   40 ++++++++++++++-------------------
 arch/powerpc/mm/mmu_context_hash64.c  |    8 ++++---
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 5c52691..8dfe31a 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -328,51 +328,45 @@ extern void slb_set_size(u16 size);
 #endif /* __ASSEMBLY__ */
 
 /*
- * VSID allocation
+ * VSID allocation (256MB segment)
  *
- * We first generate a 36-bit "proto-VSID".  For kernel addresses this
- * is equal to the ESID, for user addresses it is:
- *	(context << 15) | (esid & 0x7fff)
+ * We first generate a 38-bit "proto-VSID".  For kernel addresses this
+ * is equal to the ESID | 1 << 37, for user addresses it is:
+ *	(context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1)
  *
- * The two forms are distinguishable because the top bit is 0 for user
- * addresses, whereas the top two bits are 1 for kernel addresses.
- * Proto-VSIDs with the top two bits equal to 0b10 are reserved for
- * now.
+ * This splits the proto-VSID into the below range
+ *  0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range
+ *  2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range
+ *
+ * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1
+ * That is, we assign half of the space to user processes and half
+ * to the kernel.
  *
  * The proto-VSIDs are then scrambled into real VSIDs with the
  * multiplicative hash:
  *
  *	VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
- *	where	VSID_MULTIPLIER = 268435399 = 0xFFFFFC7
- *		VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF
  *
- * This scramble is only well defined for proto-VSIDs below
- * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are
- * reserved.  VSID_MULTIPLIER is prime, so in particular it is
+ * VSID_MULTIPLIER is prime, so in particular it is
  * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
  * Because the modulus is 2^n-1 we can compute it efficiently without
  * a divide or extra multiply (see below).
  *
  * This scheme has several advantages over older methods:
  *
- * 	- We have VSIDs allocated for every kernel address
+ *	- We have VSIDs allocated for every kernel address
  * (i.e. everything above 0xC000000000000000), except the very top
  * segment, which simplifies several things.
  *
- *	- We allow for 16 significant bits of ESID and 19 bits of
- * context for user addresses.  i.e. 16T (44 bits) of address space for
- * up to half a million contexts.
+ *	- We allow for USER_ESID_BITS significant bits of ESID and
+ * CONTEXT_BITS  bits of context for user addresses.
+ *  i.e. 64T (46 bits) of address space for up to half a million contexts.
  *
- * 	- The scramble function gives robust scattering in the hash
+ *	- The scramble function gives robust scattering in the hash
  * table (at least based on some initial results).  The previous
  * method was more susceptible to pathological cases giving excessive
  * hash collisions.
  */
-/*
- * WARNING - If you change these you must make sure the asm
- * implementations in slb_allocate (slb_low.S), do_stab_bolted
- * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
- */
 
 /*
  * This should be computed such that protovosid * vsid_mulitplier
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index daa076c..40bc5b0 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -30,9 +30,11 @@ static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
 /*
- * The proto-VSID space has 2^35 - 1 segments available for user mappings.
- * Each segment contains 2^28 bytes.  Each context maps 2^44 bytes,
- * so we can support 2^19-1 contexts (19 == 35 + 28 - 44).
+ * 256MB segment
+ * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
+ * available for user mappings. Each segment contains 2^28 bytes. Each
+ * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
+ * (19 == 37 + 28 - 46).
  */
 #define MAX_CONTEXT	((1UL << CONTEXT_BITS) - 1)
 
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 11/12] arch/powerpc: Add 64TB support
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Increase max addressable range to 64TB. This is not tested on
real hardware yet.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mmu-hash64.h        |   42 ++++++++++++++++++++------
 arch/powerpc/include/asm/pgtable-ppc64-4k.h  |    2 +-
 arch/powerpc/include/asm/pgtable-ppc64-64k.h |    2 +-
 arch/powerpc/include/asm/processor.h         |    4 +--
 arch/powerpc/include/asm/sparsemem.h         |    4 +--
 arch/powerpc/kernel/exceptions-64s.S         |    4 ++-
 arch/powerpc/mm/slb_low.S                    |   12 ++++++++
 7 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 8c5c5a4..5c52691 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -374,17 +374,21 @@ extern void slb_set_size(u16 size);
  * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
  */
 
-#define VSID_MULTIPLIER_256M	ASM_CONST(200730139)	/* 28-bit prime */
-#define VSID_BITS_256M		36
+/*
+ * This should be computed such that protovosid * vsid_mulitplier
+ * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
+ */
+#define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
+#define VSID_BITS_256M		38
 #define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		24
+#define VSID_BITS_1T		26
 #define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
 
 #define CONTEXT_BITS		19
-#define USER_ESID_BITS		16
-#define USER_ESID_BITS_1T	4
+#define USER_ESID_BITS		18
+#define USER_ESID_BITS_1T	6
 
 #define USER_VSID_RANGE	(1UL << (USER_ESID_BITS + SID_SHIFT))
 
@@ -507,12 +511,32 @@ typedef struct {
 	})
 #endif /* 1 */
 
-/* This is only valid for addresses >= PAGE_OFFSET */
+/*
+ * This is only valid for addresses >= PAGE_OFFSET
+ * The proto-VSID space is divided into two class
+ * User:   0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1
+ * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1
+ *
+ * With KERNEL_START at 0xc000000000000000, the proto vsid for
+ * the kernel ends up with 0xc00000000 (36 bits). With 64TB
+ * support we need to have kernel proto-VSID in the
+ * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS.
+ */
 static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 {
-	if (ssize == MMU_SEGSIZE_256M)
-		return vsid_scramble(ea >> SID_SHIFT, 256M);
-	return vsid_scramble(ea >> SID_SHIFT_1T, 1T);
+	unsigned long proto_vsid;
+	/*
+	 * We need to make sure proto_vsid for the kernel is
+	 * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T])
+	 */
+	if (ssize == MMU_SEGSIZE_256M) {
+		proto_vsid = ea >> SID_SHIFT;
+		proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS));
+		return vsid_scramble(proto_vsid, 256M);
+	}
+	proto_vsid = ea >> SID_SHIFT_1T;
+	proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T));
+	return vsid_scramble(proto_vsid, 1T);
 }
 
 /* Returns the segment size indicator for a user address */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
index 6eefdcf..b3eccf2 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
@@ -7,7 +7,7 @@
  */
 #define PTE_INDEX_SIZE  9
 #define PMD_INDEX_SIZE  7
-#define PUD_INDEX_SIZE  7
+#define PUD_INDEX_SIZE  9
 #define PGD_INDEX_SIZE  9
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
index 90533dd..be4e287 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
@@ -7,7 +7,7 @@
 #define PTE_INDEX_SIZE  12
 #define PMD_INDEX_SIZE  12
 #define PUD_INDEX_SIZE	0
-#define PGD_INDEX_SIZE  4
+#define PGD_INDEX_SIZE  6
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 53b6dfa..00bda2f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -97,8 +97,8 @@ extern struct task_struct *last_task_used_spe;
 #endif
 
 #ifdef CONFIG_PPC64
-/* 64-bit user address space is 44-bits (16TB user VM) */
-#define TASK_SIZE_USER64 (0x0000100000000000UL)
+/* 64-bit user address space is 46-bits (64TB user VM) */
+#define TASK_SIZE_USER64 (0x0000400000000000UL)
 
 /* 
  * 32-bit user address space is 4GB - 1 page 
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 0c5fa31..f6fc0ee 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -10,8 +10,8 @@
  */
 #define SECTION_SIZE_BITS       24
 
-#define MAX_PHYSADDR_BITS       44
-#define MAX_PHYSMEM_BITS        44
+#define MAX_PHYSADDR_BITS       46
+#define MAX_PHYSMEM_BITS        46
 
 #endif /* CONFIG_SPARSEMEM */
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e894515..d980818 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -959,7 +959,9 @@ _GLOBAL(do_stab_bolted)
 	rldimi	r10,r11,7,52	/* r10 = first ste of the group */
 
 	/* Calculate VSID */
-	/* This is a kernel address, so protovsid = ESID */
+	/* This is a kernel address, so protovsid = ESID | 1 << 37 */
+	li	r9,0x1
+	rldimi  r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0
 	ASM_VSID_SCRAMBLE(r11, r9, 256M)
 	rldic	r9,r11,12,16	/* r9 = vsid << 12 */
 
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index f6a2625..1a16ca2 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -56,6 +56,12 @@ _GLOBAL(slb_allocate_realmode)
 	 */
 _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
+	li	r9,0x1
+	/*
+	 * for 1T we shift 12 bits more.  slb_finish_load_1T will do
+	 * the necessary adjustment
+	 */
+	rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
@@ -85,6 +91,12 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
 	_GLOBAL(slb_miss_kernel_load_io)
 	li	r11,0
 6:
+	li	r9,0x1
+	/*
+	 * for 1T we shift 12 bits more.  slb_finish_load_1T will do
+	 * the necessary adjustment
+	 */
+	rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
 BEGIN_FTR_SECTION
 	b	slb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
-- 
1.7.10

^ permalink raw reply related

* [PATCH -V7 09/12] arch/powerpc: Use the required number of VSID bits in slbmte
From: Aneesh Kumar K.V @ 2012-09-04  9:01 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev, Aneesh Kumar K.V
In-Reply-To: <1346749289-16986-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

ASM_VSID_SCRAMBLE can leave non-zero bits in the high 28 bits of the result
for 256MB segment (40 bits for 1T segment). Properly mask them before using
the values in slbmte

Reviewed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/mm/slb_low.S |   12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index e132dc6..3b75f19 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -223,7 +223,11 @@ _GLOBAL(slb_allocate_user)
  */
 slb_finish_load:
 	ASM_VSID_SCRAMBLE(r10,r9,256M)
-	rldimi	r11,r10,SLB_VSID_SHIFT,16	/* combine VSID and flags */
+	/*
+	 * bits above VSID_BITS_256M need to be ignored from r10
+	 * also combine VSID and flags
+	 */
+	rldimi	r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
 
 	/* r3 = EA, r11 = VSID data */
 	/*
@@ -287,7 +291,11 @@ _GLOBAL(slb_compare_rr_to_size)
 slb_finish_load_1T:
 	srdi	r10,r10,40-28		/* get 1T ESID */
 	ASM_VSID_SCRAMBLE(r10,r9,1T)
-	rldimi	r11,r10,SLB_VSID_SHIFT_1T,16	/* combine VSID and flags */
+	/*
+	 * bits above VSID_BITS_1T need to be ignored from r10
+	 * also combine VSID and flags
+	 */
+	rldimi	r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
 	li	r10,MMU_SEGSIZE_1T
 	rldimi	r11,r10,SLB_VSID_SSIZE_SHIFT,0	/* insert segment size */
 
-- 
1.7.10

^ permalink raw reply related

* Re: 3.5+: yaboot, Invalid memory access
From: Christian Kujau @ 2012-09-04  9:32 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Steven Rostedt, linuxppc-dev
In-Reply-To: <1346741491.7619.12.camel@concordia>

On Tue, 4 Sep 2012 at 16:51, Michael Ellerman wrote:
> My guess would be we're calling that quite early and the __put_user()
> check is getting confused and failing. That means we'll have left some
> code unpatched, which then fails.
> 
> Can you try with the patch applied, but instead of returning if the
> __put_user() fails, just continue on anyway.

You mean, like this?

------
diff --git a/arch/powerpc/lib/code-patching.c 
b/arch/powerpc/lib/code-patching.c
index dd223b3..755b623 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -21,8 +21,8 @@ int patch_instruction(unsigned int *addr, unsigned int 
instr)
        int err;
 
        err = __put_user(instr, addr);
-       if (err)
-               return err;
+//     if (err)
+//             return err;
        asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
        return 0;
 }
------


Thanks,
Christian.

> 
> That will isolate if it's something in the __put_user() (I doubt it), or
> just that the __put_user() is failing and leaving the code unpatched.
> 
> cheers
-- 
BOFH excuse #361:

Communist revolutionaries taking over the server room and demanding all the computers in the building or they shoot the sysadmin. Poor misguided fools.

^ permalink raw reply related

* Re: [PATCH 4/4] drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups
From: Julia Lawall @ 2012-09-04  9:44 UTC (permalink / raw)
  To: Lars-Peter Clausen
  Cc: dedekind1, David Woodhouse, kernel-janitors, linux-kernel,
	Julia Lawall, linux-mtd, linuxppc-dev
In-Reply-To: <5045C19A.7000705@metafoo.de>

On Tue, 4 Sep 2012, Lars-Peter Clausen wrote:

> On 09/04/2012 10:42 AM, Artem Bityutskiy wrote:
> > Aiaiai! :-) [1] [2]
> >
> > I've build-tested this using aiaiai and it reports that this change breaks the build:
> >
> > dedekind@blue:~/git/maintaining$ ./verify ../l2-mtd/ mpc5121_nfc < ~/tmp/julia2.mbox
> > Tested the patch(es) on top of the following commits:
> > ba64756 Quick fixes - applied by aiaiai
> > 651c6fa JFFS2: don't fail on bitflips in OOB
> > e22ac84 mtd: autcpu12-nvram: drop frees of devm_ alloc'd data
> > ea9d312 mtd: cmdlinepart: minor cleanups
> >
> > --------------------------------------------------------------------------------
> > Failed to build the following commit for configuration "powerpc-mpc512x_defconfig" (architecture powerpc)":
> >
> > 0fe13ab drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups
> >
> > ...
> > drivers/mtd/nand/mpc5121_nfc.c: In function 'mpc5121_nfc_probe':
> > drivers/mtd/nand/mpc5121_nfc.c:622:28: warning: variable 'regs_size' set but not used [-Wunused-but-set-variable]
> > drivers/mtd/nand/mpc5121_nfc.c:622:16: warning: variable 'regs_paddr' set but not used [-Wunused-but-set-variable]
> > drivers/built-in.o: In function `mpc5121_nfc_probe':
> > mpc5121_nfc.c:(.devinit.text+0x2a14): undefined reference to `devm_clk_get'
> > make[1]: *** [vmlinux] Error 1
> >
> > --------------------------------------------------------------------------------
> >
> > I do not really know why, but it seems that clock framework is not supported for powerpc. CCing the PPC mailing list. Preserved the context below for the PPC people.
> >
>
> I've been bitten by the same issue recently, also cause by one of these
> cocci devm patches. devm_clk_get is only available if the generic
> clk_get/clk_put implementation is used. Not all architectures do this and
> some implement their own clk_get/clk_put, etc functions. Since devm_clk_get
> is merely a wrapper around clk_get/clk_put there is no reason why it should
> depend CLKDEV_LOOKUP. I've prepared a patch which makes them generically
> available if the clk_get/clk_put are implemented (i.e. if HAVE_CLK is set),
> but it is on a different machine right now, will try to submit it later today.

Sorry about this.  I wasn't aware that devm_clk_get wasn't supported by
all architectures, and I have no way of compiling code for these
architectures...  But I wonder why it is not, since devm-ness doesn't seem
to have anything to do with architecture-specific details?  It would be
really nice to have it for all architectures, because the clock functions
are just as (or at least almost as) common as kzalloc, ioremap, etc.

thanks,
julia

^ permalink raw reply

* Re: [PATCH 4/4] drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups
From: Artem Bityutskiy @ 2012-09-04  9:54 UTC (permalink / raw)
  To: Julia Lawall
  Cc: Lars-Peter Clausen, linuxppc-dev, kernel-janitors, linux-kernel,
	linux-mtd, David Woodhouse
In-Reply-To: <alpine.DEB.2.02.1209041142130.2759@hadrien>

[-- Attachment #1: Type: text/plain, Size: 1464 bytes --]

On Tue, 2012-09-04 at 11:44 +0200, Julia Lawall wrote:
> > I've been bitten by the same issue recently, also cause by one of these
> > cocci devm patches. devm_clk_get is only available if the generic
> > clk_get/clk_put implementation is used. Not all architectures do this and
> > some implement their own clk_get/clk_put, etc functions. Since devm_clk_get
> > is merely a wrapper around clk_get/clk_put there is no reason why it should
> > depend CLKDEV_LOOKUP. I've prepared a patch which makes them generically
> > available if the clk_get/clk_put are implemented (i.e. if HAVE_CLK is set),
> > but it is on a different machine right now, will try to submit it later today.
> 
> Sorry about this.  I wasn't aware that devm_clk_get wasn't supported by
> all architectures, and I have no way of compiling code for these
> architectures...  But I wonder why it is not, since devm-ness doesn't seem
> to have anything to do with architecture-specific details?  It would be
> really nice to have it for all architectures, because the clock functions
> are just as (or at least almost as) common as kzalloc, ioremap, etc.

It looks like Lars is going to fix this.

I am personally fine if you send patches without build-testing them.
Your patches are generally of good quality and you send many of them, so
build-testing each would be too much for you. And at least for MTD, I
can build-test myself.


-- 
Best Regards,
Artem Bityutskiy

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH 4/4] drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups
From: Lars-Peter Clausen @ 2012-09-04  8:53 UTC (permalink / raw)
  To: dedekind1
  Cc: David Woodhouse, kernel-janitors, linux-kernel, Julia Lawall,
	linux-mtd, linuxppc-dev
In-Reply-To: <1346748171.12610.22.camel@sauron.fi.intel.com>

On 09/04/2012 10:42 AM, Artem Bityutskiy wrote:
> Aiaiai! :-) [1] [2]
> 
> I've build-tested this using aiaiai and it reports that this change breaks the build:
> 
> dedekind@blue:~/git/maintaining$ ./verify ../l2-mtd/ mpc5121_nfc < ~/tmp/julia2.mbox 
> Tested the patch(es) on top of the following commits:
> ba64756 Quick fixes - applied by aiaiai
> 651c6fa JFFS2: don't fail on bitflips in OOB
> e22ac84 mtd: autcpu12-nvram: drop frees of devm_ alloc'd data
> ea9d312 mtd: cmdlinepart: minor cleanups
> 
> --------------------------------------------------------------------------------
> Failed to build the following commit for configuration "powerpc-mpc512x_defconfig" (architecture powerpc)":
> 
> 0fe13ab drivers/mtd/nand/mpc5121_nfc.c: some devm_ cleanups
> 
> ...
> drivers/mtd/nand/mpc5121_nfc.c: In function 'mpc5121_nfc_probe':
> drivers/mtd/nand/mpc5121_nfc.c:622:28: warning: variable 'regs_size' set but not used [-Wunused-but-set-variable]
> drivers/mtd/nand/mpc5121_nfc.c:622:16: warning: variable 'regs_paddr' set but not used [-Wunused-but-set-variable]
> drivers/built-in.o: In function `mpc5121_nfc_probe':
> mpc5121_nfc.c:(.devinit.text+0x2a14): undefined reference to `devm_clk_get'
> make[1]: *** [vmlinux] Error 1
> 
> --------------------------------------------------------------------------------
> 
> I do not really know why, but it seems that clock framework is not supported for powerpc. CCing the PPC mailing list. Preserved the context below for the PPC people.
> 

I've been bitten by the same issue recently, also cause by one of these
cocci devm patches. devm_clk_get is only available if the generic
clk_get/clk_put implementation is used. Not all architectures do this and
some implement their own clk_get/clk_put, etc functions. Since devm_clk_get
is merely a wrapper around clk_get/clk_put there is no reason why it should
depend CLKDEV_LOOKUP. I've prepared a patch which makes them generically
available if the clk_get/clk_put are implemented (i.e. if HAVE_CLK is set),
but it is on a different machine right now, will try to submit it later today.

- Lars

^ permalink raw reply

* Re: [RFC v8 PATCH 13/20] memory-hotplug: check page type in get_page_bootmem
From: Yasuaki Ishimatsu @ 2012-09-04  9:54 UTC (permalink / raw)
  To: Wen Congyang
  Cc: linux-s390, linux-ia64, len.brown, linux-acpi, linux-sh, x86,
	linux-kernel, cmetcalf, linux-mm, paulus, minchan.kim,
	kosaki.motohiro, rientjes, sparclinux, Andrew Morton,
	linuxppc-dev, cl, liuj97
In-Reply-To: <50457983.1050304@cn.fujitsu.com>

Hi Wen,

2012/09/04 12:46, Wen Congyang wrote:
> Hi, isimatu-san
>
> At 09/01/2012 05:30 AM, Andrew Morton Wrote:
>> On Tue, 28 Aug 2012 18:00:20 +0800
>> wency@cn.fujitsu.com wrote:
>>
>>> From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>>>
>>> There is a possibility that get_page_bootmem() is called to the same page many
>>> times. So when get_page_bootmem is called to the same page, the function only
>>> increments page->_count.
>>
>> I really don't understand this explanation, even after having looked at
>> the code.  Can you please have another attempt at the changelog?
>
> What is the problem that you want to fix? The function get_page_bootmem()
> may be called to the same page more than once, but I don't find any problem
> about current implementation.

The patch is just optimization. The patch does not fix a problems.
As you know, the function may be called many times for the same page.
I think if a page is sets to page_type and Page Private flag and page->private,
the page need not be set the same things again. So we check page_type when
get_page_bootmem() is called. And if the page has been set to them, the page
is only incremented page->_count.

Thanks,
Yasuaki Ishimatsu

>
> Thanks
> Wen Congyang
>
>>
>>> --- a/mm/memory_hotplug.c
>>> +++ b/mm/memory_hotplug.c
>>> @@ -95,10 +95,17 @@ static void release_memory_resource(struct resource *res)
>>>   static void get_page_bootmem(unsigned long info,  struct page *page,
>>>   			     unsigned long type)
>>>   {
>>> -	page->lru.next = (struct list_head *) type;
>>> -	SetPagePrivate(page);
>>> -	set_page_private(page, info);
>>> -	atomic_inc(&page->_count);
>>> +	unsigned long page_type;
>>> +
>>> +	page_type = (unsigned long) page->lru.next;
>>> +	if (page_type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
>>> +	    page_type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE){
>>> +		page->lru.next = (struct list_head *) type;
>>> +		SetPagePrivate(page);
>>> +		set_page_private(page, info);
>>> +		atomic_inc(&page->_count);
>>> +	} else
>>> +		atomic_inc(&page->_count);
>>>   }
>>
>> And a code comment which explains what is going on would be good.  As
>> is always the case ;)
>>
>>
>

^ permalink raw reply

* [PATCH] sata_fsl: add workaround for data length mismatch on freescale V2 controller
From: Shaohui Xie @ 2012-09-04 11:08 UTC (permalink / raw)
  To: jgarzik, linux-ide; +Cc: linuxppc-dev, linux-kernel, Anju Bhartiya, Shaohui Xie

The freescale V2 SATA controller checks if the received data length matches
the programmed length 'ttl', if not, it assumes that this is an error.
In ATAPI, the 'ttl' is based on max allocation length and not the actual
data transfer length, controller will raise 'DLM' (Data length Mismatch)
error bit in Hstatus register. Along with 'DLM', DE (Device error) and
FE (fatal Error) bits are also set in Hstatus register, 'E' (Internal Error)
bit is set in Serror register and CE (Command Error) and DE (Device error)
registers have the corresponding bit set. In this condition, we need to
clear errors in following way: in the service routine, based on 'DLM' flag,
HCONTROL[27] operation clears Hstatus, CE and DE registers, clear Serror
register.

Signed-off-by: Shaohui Xie <Shaohui.Xie@freescale.com>
Signed-off-by: Anju Bhartiya <Anju.Bhartiya@freescale.com>
---
 drivers/ata/sata_fsl.c |   48 +++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index d6577b9..00a00cc 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -143,6 +143,7 @@ enum {
 	    FATAL_ERR_CRC_ERR_RX |
 	    FATAL_ERR_FIFO_OVRFL_TX | FATAL_ERR_FIFO_OVRFL_RX,
 
+	INT_ON_DATA_LENGTH_MISMATCH = (1 << 12),
 	INT_ON_FATAL_ERR = (1 << 5),
 	INT_ON_PHYRDY_CHG = (1 << 4),
 
@@ -283,6 +284,8 @@ struct sata_fsl_host_priv {
 	int irq;
 	int data_snoop;
 	struct device_attribute intr_coalescing;
+	u32 quirks;
+#define SATA_FSL_QUIRK_V2_ERRATA        (1 << 1)
 };
 
 static void fsl_sata_set_irq_coalescing(struct ata_host *host,
@@ -1180,26 +1183,58 @@ static void sata_fsl_host_intr(struct ata_port *ap)
 	void __iomem *hcr_base = host_priv->hcr_base;
 	u32 hstatus, done_mask = 0;
 	struct ata_queued_cmd *qc;
-	u32 SError;
+	u32 SError, tag, atapi_flag = 0;
+	u32 status_mask = INT_ON_ERROR;
 
 	hstatus = ioread32(hcr_base + HSTATUS);
 
 	sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError);
 
+	/* Read command completed register */
+	done_mask = ioread32(hcr_base + CC);
+
+	if (host_priv->quirks & SATA_FSL_QUIRK_V2_ERRATA) {
+		if (unlikely(hstatus & INT_ON_DATA_LENGTH_MISMATCH)) {
+			for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
+				qc = ata_qc_from_tag(ap, tag);
+				if (qc && ata_is_atapi(qc->tf.protocol)) {
+					atapi_flag = 1;
+					break;
+				}
+			}
+		}
+	}
+
+	/* Workaround for data length mismatch errata */
+	if (atapi_flag) {
+		u32 Hcontrol;
+#define HCONTROL_CLEAR_ERROR	(1 << 27)
+		/* Set HControl[27] to clear error registers */
+		Hcontrol = ioread32(hcr_base + HCONTROL);
+		iowrite32(Hcontrol | HCONTROL_CLEAR_ERROR, hcr_base + HCONTROL);
+
+		/* Clear HControl[27] */
+		iowrite32(Hcontrol & (~HCONTROL_CLEAR_ERROR),
+						hcr_base + HCONTROL);
+
+		/* Clear SError[E] bit */
+		sata_fsl_scr_write(&ap->link, SCR_ERROR, SError);
+
+		/* Ignore fatal error and device error */
+		status_mask &= ~(INT_ON_SINGL_DEVICE_ERR | INT_ON_FATAL_ERR);
+	}
+
 	if (unlikely(SError & 0xFFFF0000)) {
 		DPRINTK("serror @host_intr : 0x%x\n", SError);
 		sata_fsl_error_intr(ap);
 	}
 
-	if (unlikely(hstatus & INT_ON_ERROR)) {
+	if (unlikely(hstatus & status_mask)) {
 		DPRINTK("error interrupt!!\n");
 		sata_fsl_error_intr(ap);
 		return;
 	}
 
-	/* Read command completed register */
-	done_mask = ioread32(hcr_base + CC);
-
 	VPRINTK("Status of all queues :\n");
 	VPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%x\n",
 		done_mask,
@@ -1437,6 +1472,9 @@ static int sata_fsl_probe(struct platform_device *ofdev)
 	else
 		host_priv->data_snoop = DATA_SNOOP_ENABLE_V1;
 
+	if (of_device_is_compatible(ofdev->dev.of_node, "fsl,pq-sata-v2"))
+		host_priv->quirks |= SATA_FSL_QUIRK_V2_ERRATA;
+
 	/* allocate host structure */
 	host = ata_host_alloc_pinfo(&ofdev->dev, ppi, SATA_FSL_MAX_PORTS);
 	if (!host) {
-- 
1.6.4

^ permalink raw reply related

* RE: [PATCH] sata_fsl: add workaround for data length mismatch on freescale V2 controller
From: Jenkins, Clive @ 2012-09-04 12:00 UTC (permalink / raw)
  To: Shaohui Xie, jgarzik, linux-ide; +Cc: linuxppc-dev, linux-kernel, Anju Bhartiya
In-Reply-To: <1346756920-19128-1-git-send-email-Shaohui.Xie@freescale.com>

> The freescale V2 SATA controller checks
> if the received data length matches
> the programmed length 'ttl', if not,
> it assumes that this is an error.
...

Can you tell us exactly what
"The freescale V2 SATA controller" is,
and what versions of what devices contain it?

Thanks,
Clive

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox