LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2 5/5] libnvdimm: Add prctl control for disabling synchronous fault support
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V
In-Reply-To: <20200602074909.36738-1-aneesh.kumar@linux.ibm.com>

With POWER10, architecture is adding new pmem flush and sync instructions.
The kernel should prevent the usage of MAP_SYNC if applications are not using
the new instructions on newer hardware.

This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable
the usage of MAP_SYNC. This is in addition to the namespace specific control
already added (/sys/bus/nd/devices/region0/pfn0.1/block/pmem0/dax/sync_fault)

With this patch, if the device supports synchronous fault, then an application
can enable the synchronous fault support using the prctl() interface even if
the platform disabled it for the namespace.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/linux/dax.h            |  5 +++--
 include/linux/sched/coredump.h | 13 ++++++++++---
 include/uapi/linux/prctl.h     |  3 +++
 kernel/fork.c                  |  8 +++++++-
 kernel/sys.c                   | 18 ++++++++++++++++++
 5 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/linux/dax.h b/include/linux/dax.h
index c4a3551557de..0733aae23828 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -80,9 +80,10 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 	if (!IS_DAX(file_inode(vma->vm_file)))
 		return false;
 	/*
-	 * check MAP_SYNC is disabled by platform for this device.
+	 * MAP_SYNC is disabled by platform for this device.
+	 * check for prctl.
 	 */
-	if (!dax_synchronous_enabled(dax_dev))
+	if (!dax_synchronous_enabled(dax_dev) && !map_sync_enabled(vma->vm_mm))
 		return false;
 
 	return dax_synchronous(dax_dev);
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ecdc6542070f..35698adc3d13 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
 #define MMF_OOM_VICTIM		25	/* mm is the oom victim */
 #define MMF_OOM_REAP_QUEUED	26	/* mm was queued for oom_reaper */
-#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
+#define MMF_ENABLE_MAP_SYNC	27	/* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK		(1 << MMF_DISABLE_THP)
+#define MMF_ENABLE_MAP_SYNC_MASK	(1 << MMF_ENABLE_MAP_SYNC)
 
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
-				 MMF_DISABLE_THP_MASK)
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \
+			MMF_DISABLE_THP_MASK | MMF_ENABLE_MAP_SYNC_MASK)
+
+static inline bool map_sync_enabled(struct mm_struct *mm)
+{
+	return !!(mm->flags & MMF_ENABLE_MAP_SYNC_MASK);
+}
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e36..ee4cde32d5cf 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,4 +238,7 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER		57
 #define PR_GET_IO_FLUSHER		58
 
+#define PR_SET_MAP_SYNC_ENABLE		59
+#define PR_GET_MAP_SYNC_ENABLE		60
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..d50cac15ef41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
 static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
 
+#ifndef CONFIG_ARCH_MAP_SYNC_DISABLE
+unsigned long default_map_sync_mask = MMF_ENABLE_MAP_SYNC_MASK;
+#else
+unsigned long default_map_sync_mask = 0;
+#endif
+
 static int __init coredump_filter_setup(char *s)
 {
 	default_dump_filter =
@@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 		mm->flags = current->mm->flags & MMF_INIT_MASK;
 		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
 	} else {
-		mm->flags = default_dump_filter;
+		mm->flags = default_dump_filter | default_map_sync_mask;
 		mm->def_flags = 0;
 	}
 
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a..5011912831b0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			clear_bit(MMF_DISABLE_THP, &me->mm->flags);
 		up_write(&me->mm->mmap_sem);
 		break;
+
+	case PR_GET_MAP_SYNC_ENABLE:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = !!test_bit(MMF_ENABLE_MAP_SYNC, &me->mm->flags);
+		break;
+	case PR_SET_MAP_SYNC_ENABLE:
+		if (arg3 || arg4 || arg5)
+			return -EINVAL;
+		if (down_write_killable(&me->mm->mmap_sem))
+			return -EINTR;
+		if (arg2)
+			set_bit(MMF_ENABLE_MAP_SYNC, &me->mm->flags);
+		else
+			clear_bit(MMF_ENABLE_MAP_SYNC, &me->mm->flags);
+		up_write(&me->mm->mmap_sem);
+		break;
+
 	case PR_MPX_ENABLE_MANAGEMENT:
 	case PR_MPX_DISABLE_MANAGEMENT:
 		/* No longer implemented: */
-- 
2.26.2


^ permalink raw reply related

* [RFC PATCH v2 4/5] powerpc/papr_scm: disable MAP_SYNC for newer hardware
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V
In-Reply-To: <20200602074909.36738-1-aneesh.kumar@linux.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 17 ++++++++++++++++-
 drivers/nvdimm/of_pmem.c                  |  7 +++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f35592423380..30474d4cd109 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -30,6 +30,7 @@ struct papr_scm_priv {
 	uint64_t block_size;
 	int metadata_size;
 	bool is_volatile;
+	bool disable_map_sync;
 
 	uint64_t bound_addr;
 
@@ -340,11 +341,18 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.mapping = &mapping;
 	ndr_desc.num_mappings = 1;
 	ndr_desc.nd_set = &p->nd_set;
+	set_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 
 	if (p->is_volatile)
 		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
 	else {
 		set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
+		/*
+		 * for a persistent region, check if the platform needs to
+		 * force MAP_SYNC disable.
+		 */
+		if (p->disable_map_sync)
+			clear_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 		p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
 	}
 	if (!p->region) {
@@ -365,7 +373,7 @@ err:	nvdimm_bus_unregister(p->bus);
 
 static int papr_scm_probe(struct platform_device *pdev)
 {
-	struct device_node *dn = pdev->dev.of_node;
+	struct device_node *dn;
 	u32 drc_index, metadata_size;
 	u64 blocks, block_size;
 	struct papr_scm_priv *p;
@@ -373,6 +381,10 @@ static int papr_scm_probe(struct platform_device *pdev)
 	u64 uuid[2];
 	int rc;
 
+	dn = dev_of_node(&pdev->dev);
+	if (!dn)
+		return -ENXIO;
+
 	/* check we have all the required DT properties */
 	if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
 		dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
@@ -402,6 +414,9 @@ static int papr_scm_probe(struct platform_device *pdev)
 	/* optional DT properties */
 	of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
 
+	if (of_device_is_compatible(dn, "ibm,pmemory-v2"))
+		p->disable_map_sync = true;
+
 	p->dn = dn;
 	p->drc_index = drc_index;
 	p->block_size = block_size;
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 6826a274a1f1..a6cc3488e552 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -59,12 +59,19 @@ static int of_pmem_region_probe(struct platform_device *pdev)
 		ndr_desc.res = &pdev->resource[i];
 		ndr_desc.of_node = np;
 		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+		set_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 
 		if (is_volatile)
 			region = nvdimm_volatile_region_create(bus, &ndr_desc);
 		else {
 			set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
+			/*
+			 * for a persistent region, check for newer device
+			 */
+			if (of_device_is_compatible(np, "pmem-region-v2"))
+				clear_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 			region = nvdimm_pmem_region_create(bus, &ndr_desc);
+
 		}
 
 		if (!region)
-- 
2.26.2


^ permalink raw reply related

* [RFC PATCH v2 3/5] libnvdimm/dax: Make DAXDEV_SYNC_ENABLED flag region-specific
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V
In-Reply-To: <20200602074909.36738-1-aneesh.kumar@linux.ibm.com>

This patch makes sync fault enable/disable feature more fine-grained
by allowing region-wise control of the same.

In a followup patch on ppc64 only device with compat string "ibm,pmemory-v2"
will disable the sync fault feature.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/dax/bus.c            |  2 +-
 drivers/dax/super.c          | 16 +++++++++-------
 drivers/nvdimm/pmem.c        |  4 ++++
 drivers/nvdimm/region_devs.c | 16 ++++++++++++++++
 include/linux/dax.h          | 16 ++++++++++++++++
 include/linux/libnvdimm.h    |  4 ++++
 6 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index df238c8b6ef2..8a825ecff49b 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -420,7 +420,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
 	 * No 'host' or dax_operations since there is no access to this
 	 * device outside of mmap of the resulting character device.
 	 */
-	dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
+	dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC | DAXDEV_F_SYNC_ENABLED);
 	if (IS_ERR(dax_dev)) {
 		rc = PTR_ERR(dax_dev);
 		goto err;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 980f7be7e56d..f93e6649d452 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -260,10 +260,11 @@ static ssize_t write_cache_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(write_cache);
 
-static bool dax_synchronous_enabled(struct dax_device *dax_dev)
+bool __dax_synchronous_enabled(struct dax_device *dax_dev)
 {
 	return test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
 }
+EXPORT_SYMBOL_GPL(__dax_synchronous_enabled);
 
 static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable)
 {
@@ -280,6 +281,7 @@ static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable)
 static ssize_t sync_fault_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
+	int enabled;
 	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
 	ssize_t rc;
 
@@ -287,7 +289,8 @@ static ssize_t sync_fault_show(struct device *dev,
 	if (!dax_dev)
 		return -ENXIO;
 
-	rc = sprintf(buf, "%d\n", !!__dax_synchronous(dax_dev));
+	enabled = (dax_synchronous(dax_dev) && dax_synchronous_enabled(dax_dev));
+	rc = sprintf(buf, "%d\n", enabled);
 	put_dax(dax_dev);
 	return rc;
 }
@@ -461,17 +464,13 @@ EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
 
 bool __dax_synchronous(struct dax_device *dax_dev)
 {
-	return test_bit(DAXDEV_SYNC, &dax_dev->flags) &&
-		test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+	return test_bit(DAXDEV_SYNC, &dax_dev->flags);
 }
 EXPORT_SYMBOL_GPL(__dax_synchronous);
 
 void __set_dax_synchronous(struct dax_device *dax_dev)
 {
 	set_bit(DAXDEV_SYNC, &dax_dev->flags);
-#ifndef CONFIG_ARCH_MAP_SYNC_DISABLE
-	set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
-#endif
 }
 EXPORT_SYMBOL_GPL(__set_dax_synchronous);
 
@@ -665,6 +664,9 @@ struct dax_device *alloc_dax(void *private, const char *__host,
 	if (flags & DAXDEV_F_SYNC)
 		set_dax_synchronous(dax_dev);
 
+	if (flags & DAXDEV_F_SYNC_ENABLED)
+		set_dax_synchronous_enable(dax_dev, true);
+
 	return dax_dev;
 
  err_dev:
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2df6994acf83..dc9c269eb50d 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -485,6 +485,10 @@ static int pmem_attach_disk(struct device *dev,
 
 	if (is_nvdimm_sync(nd_region))
 		flags = DAXDEV_F_SYNC;
+
+	if (is_nvdimm_sync_enabled(nd_region))
+		flags |= DAXDEV_F_SYNC_ENABLED;
+
 	dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
 	if (IS_ERR(dax_dev)) {
 		put_disk(disk);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index ccbb5b43b8b2..2181560cf655 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1283,6 +1283,22 @@ bool is_nvdimm_sync(struct nd_region *nd_region)
 }
 EXPORT_SYMBOL_GPL(is_nvdimm_sync);
 
+bool is_nvdimm_sync_enabled(struct nd_region *nd_region)
+{
+#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE
+	if (is_nd_volatile(&nd_region->dev))
+		return true;
+
+	return is_nd_pmem(&nd_region->dev) &&
+		test_bit(ND_REGION_SYNC_ENABLED, &nd_region->flags);
+#else
+	return true;
+#endif
+
+}
+EXPORT_SYMBOL_GPL(is_nvdimm_sync_enabled);
+
+
 struct conflict_context {
 	struct nd_region *nd_region;
 	resource_size_t start, size;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d7af5d243f24..c4a3551557de 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -10,6 +10,9 @@
 /* Flag for synchronous flush */
 #define DAXDEV_F_SYNC (1UL << 0)
 
+/* flag for platform forcing synchronous flush disable */
+#define DAXDEV_F_SYNC_ENABLED (1UL << 1)
+
 typedef unsigned long dax_entry_t;
 
 struct iomap_ops;
@@ -59,6 +62,13 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
 	__set_dax_synchronous(dax_dev);
 }
+
+bool __dax_synchronous_enabled(struct dax_device *dax_dev);
+static inline bool dax_synchronous_enabled(struct dax_device *dax_dev)
+{
+	return  __dax_synchronous_enabled(dax_dev);
+}
+
 /*
  * Check if given mapping is supported by the file / underlying device.
  */
@@ -69,6 +79,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 		return true;
 	if (!IS_DAX(file_inode(vma->vm_file)))
 		return false;
+	/*
+	 * check MAP_SYNC is disabled by platform for this device.
+	 */
+	if (!dax_synchronous_enabled(dax_dev))
+		return false;
+
 	return dax_synchronous(dax_dev);
 }
 #else
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 18da4059be09..cc3962c4978f 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -63,6 +63,9 @@ enum {
 	/* Platform provides asynchronous flush mechanism */
 	ND_REGION_ASYNC = 3,
 
+	/* Platform wants to disable synchronous flush mechanism */
+	ND_REGION_SYNC_ENABLED= 4,
+
 	/* mark newly adjusted resources as requiring a label update */
 	DPA_RESOURCE_ADJUSTED = 1 << 0,
 };
@@ -262,6 +265,7 @@ int nvdimm_has_flush(struct nd_region *nd_region);
 int nvdimm_has_cache(struct nd_region *nd_region);
 int nvdimm_in_overwrite(struct nvdimm *nvdimm);
 bool is_nvdimm_sync(struct nd_region *nd_region);
+bool is_nvdimm_sync_enabled(struct nd_region *nd_region);
 
 static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc)
-- 
2.26.2


^ permalink raw reply related

* [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V

With POWER10, architecture is adding new pmem flush and sync instructions.
The kernel should prevent the usage of MAP_SYNC if applications are not using
the new instructions on newer hardware

This patch adds a dax attribute (/sys/bus/nd/devices/region0/pfn0.1/block/pmem0/dax/sync_fault)
which can be used to control this flag. If the device supports synchronous flush
then userspace can update this attribute to enable/disable the synchronous
fault. The attribute is only visible if there is write cache enabled on the device.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/dax/super.c | 73 ++++++++++++++++++++++++++++++++++++++++++++-
 mm/Kconfig          |  3 ++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 8e32345be0f7..980f7be7e56d 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -198,6 +198,12 @@ enum dax_device_flags {
 	DAXDEV_WRITE_CACHE,
 	/* flag to check if device supports synchronous flush */
 	DAXDEV_SYNC,
+	/*
+	 * flag to indicate whether synchronous flush is enabled.
+	 * Some platform may want to disable synchronous flush support
+	 * even though device supports the same.
+	 */
+	DAXDEV_SYNC_ENABLED,
 };
 
 /**
@@ -254,6 +260,60 @@ static ssize_t write_cache_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(write_cache);
 
+static bool dax_synchronous_enabled(struct dax_device *dax_dev)
+{
+	return test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+}
+
+static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable)
+{
+	if (!test_bit(DAXDEV_SYNC, &dax_dev->flags))
+		return;
+
+	if (enable)
+		set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+	else
+		clear_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+}
+
+
+static ssize_t sync_fault_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+	ssize_t rc;
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return -ENXIO;
+
+	rc = sprintf(buf, "%d\n", !!__dax_synchronous(dax_dev));
+	put_dax(dax_dev);
+	return rc;
+}
+
+static ssize_t sync_fault_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool enable_sync;
+	int rc = strtobool(buf, &enable_sync);
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return -ENXIO;
+
+	if (rc)
+		len = rc;
+	else
+		set_dax_synchronous_enable(dax_dev, enable_sync);
+
+	put_dax(dax_dev);
+	return len;
+}
+
+static DEVICE_ATTR_RW(sync_fault);
+
 static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
@@ -267,11 +327,18 @@ static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
 	if (a == &dev_attr_write_cache.attr)
 		return 0;
 #endif
+	if (a == &dev_attr_sync_fault.attr) {
+		if (dax_write_cache_enabled(dax_dev))
+			return a->mode;
+		return 0;
+	}
+
 	return a->mode;
 }
 
 static struct attribute *dax_attributes[] = {
 	&dev_attr_write_cache.attr,
+	&dev_attr_sync_fault.attr,
 	NULL,
 };
 
@@ -394,13 +461,17 @@ EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
 
 bool __dax_synchronous(struct dax_device *dax_dev)
 {
-	return test_bit(DAXDEV_SYNC, &dax_dev->flags);
+	return test_bit(DAXDEV_SYNC, &dax_dev->flags) &&
+		test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
 }
 EXPORT_SYMBOL_GPL(__dax_synchronous);
 
 void __set_dax_synchronous(struct dax_device *dax_dev)
 {
 	set_bit(DAXDEV_SYNC, &dax_dev->flags);
+#ifndef CONFIG_ARCH_MAP_SYNC_DISABLE
+	set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+#endif
 }
 EXPORT_SYMBOL_GPL(__set_dax_synchronous);
 
diff --git a/mm/Kconfig b/mm/Kconfig
index c1acc34c1c35..38fd7cfbfca8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD
 config MAPPING_DIRTY_HELPERS
         bool
 
+config ARCH_MAP_SYNC_DISABLE
+	bool
+
 endmenu
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH 1/4] powerpc: Add a ppc_inst_as_str() helper
From: Joel Stanley @ 2020-06-02  7:43 UTC (permalink / raw)
  To: Jordan Niethe; +Cc: linuxppc-dev, Alistair Popple
In-Reply-To: <20200602052728.18227-1-jniethe5@gmail.com>

On Tue, 2 Jun 2020 at 05:31, Jordan Niethe <jniethe5@gmail.com> wrote:
>
> There are quite a few places where instructions are printed, this is
> done using a '%x' format specifier. With the introduction of prefixed
> instructions, this does not work well. Currently in these places,
> ppc_inst_val() is used for the value for %x so only the first word of
> prefixed instructions are printed.
>
> When the instructions are word instructions, only a single word should
> be printed. For prefixed instructions both the prefix and suffix should
> be printed. To accommodate both of these situations, instead of a '%x'
> specifier use '%s' and introduce a helper, __ppc_inst_as_str() which
> returns a char *. The char * __ppc_inst_as_str() returns is buffer that
> is passed to it by the caller.
>
> It is cumbersome to require every caller of __ppc_inst_as_str() to now
> declare a buffer. To make it more convenient to use __ppc_inst_as_str(),
> wrap it in a macro that uses a compound statement to allocate a buffer
> on the caller's stack before calling it.
>
> Signed-off-by: Jordan Niethe <jniethe5@gmail.com>

Reviewed-by: Joel Stanley <joel@jms.id.au>

> ---
>  arch/powerpc/include/asm/inst.h      | 19 +++++++++++++++++++
>  arch/powerpc/kernel/kprobes.c        |  2 +-
>  arch/powerpc/kernel/trace/ftrace.c   | 26 +++++++++++++-------------
>  arch/powerpc/lib/test_emulate_step.c |  4 ++--
>  arch/powerpc/xmon/xmon.c             |  2 +-
>  5 files changed, 36 insertions(+), 17 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
> index 45f3ec868258..3df7806e6dc3 100644
> --- a/arch/powerpc/include/asm/inst.h
> +++ b/arch/powerpc/include/asm/inst.h
> @@ -122,6 +122,25 @@ static inline u64 ppc_inst_as_u64(struct ppc_inst x)
>  #endif
>  }
>
> +#define PPC_INST_STR_LEN sizeof("0x00000000 0x00000000")
> +
> +static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x)
> +{
> +       if (ppc_inst_prefixed(x))
> +               sprintf(str, "0x%08x 0x%08x", ppc_inst_val(x), ppc_inst_suffix(x));
> +       else
> +               sprintf(str, "0x%08x", ppc_inst_val(x));
> +
> +       return str;
> +}
> +
> +#define ppc_inst_as_str(x)             \
> +({                                     \
> +       char __str[PPC_INST_STR_LEN];   \
> +       __ppc_inst_as_str(__str, x);    \
> +       __str;                          \
> +})
> +
>  int probe_user_read_inst(struct ppc_inst *inst,
>                          struct ppc_inst __user *nip);
>
> diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
> index 227510df8c55..d0797171dba3 100644
> --- a/arch/powerpc/kernel/kprobes.c
> +++ b/arch/powerpc/kernel/kprobes.c
> @@ -244,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
>                  * So, we should never get here... but, its still
>                  * good to catch them, just in case...
>                  */
> -               printk("Can't step on instruction %x\n", ppc_inst_val(insn));
> +               printk("Can't step on instruction %s\n", ppc_inst_as_str(insn));
>                 BUG();
>         } else {
>                 /*
> diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
> index 5e399628f51a..da11a26d8213 100644
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -73,8 +73,8 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
>
>         /* Make sure it is what we expect it to be */
>         if (!ppc_inst_equal(replaced, old)) {
> -               pr_err("%p: replaced (%#x) != old (%#x)",
> -               (void *)ip, ppc_inst_val(replaced), ppc_inst_val(old));
> +               pr_err("%p: replaced (%s) != old (%s)",
> +               (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old));
>                 return -EINVAL;
>         }
>
> @@ -137,7 +137,7 @@ __ftrace_make_nop(struct module *mod,
>
>         /* Make sure that that this is still a 24bit jump */
>         if (!is_bl_op(op)) {
> -               pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
> +               pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>
> @@ -172,8 +172,8 @@ __ftrace_make_nop(struct module *mod,
>         /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
>         if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) &&
>             !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
> -               pr_err("Unexpected instruction %08x around bl _mcount\n",
> -                      ppc_inst_val(op));
> +               pr_err("Unexpected instruction %s around bl _mcount\n",
> +                      ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>  #else
> @@ -203,7 +203,7 @@ __ftrace_make_nop(struct module *mod,
>         }
>
>         if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
> -               pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op));
> +               pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>  #endif /* CONFIG_MPROFILE_KERNEL */
> @@ -231,7 +231,7 @@ __ftrace_make_nop(struct module *mod,
>
>         /* Make sure that that this is still a 24bit jump */
>         if (!is_bl_op(op)) {
> -               pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
> +               pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>
> @@ -406,7 +406,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
>
>         /* Make sure that that this is still a 24bit jump */
>         if (!is_bl_op(op)) {
> -               pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
> +               pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>
> @@ -533,8 +533,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>                 return -EFAULT;
>
>         if (!expected_nop_sequence(ip, op[0], op[1])) {
> -               pr_err("Unexpected call sequence at %p: %x %x\n",
> -               ip, ppc_inst_val(op[0]), ppc_inst_val(op[1]));
> +               pr_err("Unexpected call sequence at %p: %s %s\n",
> +               ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1]));
>                 return -EINVAL;
>         }
>
> @@ -597,7 +597,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>
>         /* It should be pointing to a nop */
>         if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_NOP))) {
> -               pr_err("Expected NOP but have %x\n", ppc_inst_val(op));
> +               pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>
> @@ -654,7 +654,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
>         }
>
>         if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
> -               pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op));
> +               pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>
> @@ -733,7 +733,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
>
>         /* Make sure that that this is still a 24bit jump */
>         if (!is_bl_op(op)) {
> -               pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
> +               pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
>                 return -EINVAL;
>         }
>
> diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
> index 46af80279ebc..5fb6f5267d70 100644
> --- a/arch/powerpc/lib/test_emulate_step.c
> +++ b/arch/powerpc/lib/test_emulate_step.c
> @@ -852,7 +852,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs,
>
>         if (analyse_instr(&op, regs, instr) != 1 ||
>             GETTYPE(op.type) != COMPUTE) {
> -               pr_info("emulation failed, instruction = 0x%08x\n", ppc_inst_val(instr));
> +               pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr));
>                 return -EFAULT;
>         }
>
> @@ -872,7 +872,7 @@ static int __init execute_compute_instr(struct pt_regs *regs,
>         /* Patch the NOP with the actual instruction */
>         patch_instruction_site(&patch__exec_instr, instr);
>         if (exec_instr(regs)) {
> -               pr_info("execution failed, instruction = 0x%08x\n", ppc_inst_val(instr));
> +               pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr));
>                 return -EFAULT;
>         }
>
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index 16ee6639a60c..1dd3bf02021b 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -2958,7 +2958,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
>                 dotted = 0;
>                 last_inst = inst;
>                 if (praddr)
> -                       printf(REG"  %.8x", adr, ppc_inst_val(inst));
> +                       printf(REG"  %s", adr, ppc_inst_as_str(inst));
>                 printf("\t");
>                 dump_func(ppc_inst_val(inst), adr);
>                 printf("\n");
> --
> 2.17.1
>

^ permalink raw reply

* [PATCH v2] cxl: Remove dead Kconfig option
From: Andrew Donnellan @ 2020-06-02  7:05 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: fbarrat

The CXL_AFU_DRIVER_OPS Kconfig option was added to coordinate merging of
new features. It no longer serves any purpose, so remove it.

Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>

---
v1->v2:
- keep CXL_LIB for now to avoid breaking a driver that is currently out of
tree
---
 drivers/misc/cxl/Kconfig | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 39eec9031487..984114f7cee9 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -7,9 +7,6 @@ config CXL_BASE
 	bool
 	select PPC_COPRO_BASE
 
-config CXL_AFU_DRIVER_OPS
-	bool
-
 config CXL_LIB
 	bool
 
@@ -17,7 +14,6 @@ config CXL
 	tristate "Support for IBM Coherent Accelerators (CXL)"
 	depends on PPC_POWERNV && PCI_MSI && EEH
 	select CXL_BASE
-	select CXL_AFU_DRIVER_OPS
 	select CXL_LIB
 	default m
 	help
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH] powerpc/kvm: Enable support for ISA v3.1 guests
From: Jordan Niethe @ 2020-06-02  6:26 UTC (permalink / raw)
  To: Alistair Popple; +Cc: ravi.bangoria, mikey, linuxppc-dev, kvm-ppc
In-Reply-To: <20200602055325.6102-1-alistair@popple.id.au>

On Tue, Jun 2, 2020 at 3:55 PM Alistair Popple <alistair@popple.id.au> wrote:
>
> Adds support for emulating ISAv3.1 guests by adding the appropriate PCR
> and FSCR bits.
>
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/include/asm/reg.h |  1 +
>  arch/powerpc/kvm/book3s_hv.c   | 11 ++++++++---
>  2 files changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 773f76402392..d77040d0588a 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -1348,6 +1348,7 @@
>  #define PVR_ARCH_206p  0x0f100003
>  #define PVR_ARCH_207   0x0f000004
>  #define PVR_ARCH_300   0x0f000005
> +#define PVR_ARCH_31    0x0f000006
>
>  /* Macros for setting and retrieving special purpose registers */
>  #ifndef __ASSEMBLY__
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 93493f0cbfe8..359bb2ed43e1 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -345,7 +345,7 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
>  }
>
>  /* Dummy value used in computing PCR value below */
> -#define PCR_ARCH_300   (PCR_ARCH_207 << 1)
> +#define PCR_ARCH_31    (PCR_ARCH_300 << 1)
>
>  static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
>  {
> @@ -353,7 +353,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
>         struct kvmppc_vcore *vc = vcpu->arch.vcore;
>
>         /* We can (emulate) our own architecture version and anything older */
> -       if (cpu_has_feature(CPU_FTR_ARCH_300))
> +       if (cpu_has_feature(CPU_FTR_ARCH_31))
> +               host_pcr_bit = PCR_ARCH_31;
> +       else if (cpu_has_feature(CPU_FTR_ARCH_300))
>                 host_pcr_bit = PCR_ARCH_300;
>         else if (cpu_has_feature(CPU_FTR_ARCH_207S))
>                 host_pcr_bit = PCR_ARCH_207;
> @@ -379,6 +381,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
>                 case PVR_ARCH_300:
>                         guest_pcr_bit = PCR_ARCH_300;
>                         break;
> +               case PVR_ARCH_31:
> +                       guest_pcr_bit = PCR_ARCH_31;
> +                       break;
>                 default:
>                         return -EINVAL;
>                 }
> @@ -2318,7 +2323,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
>          * to trap and then we emulate them.
>          */
The comment above this:
"...
     * Set the default HFSCR for the guest from the host value.
     * This value is only used on POWER9..."
would need to be updated.
>         vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
> -               HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
> +               HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
>         if (cpu_has_feature(CPU_FTR_HVMODE)) {
>                 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
>                 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
> --
> 2.20.1
>

^ permalink raw reply

* [PATCH] powerpc/kvm: Enable support for ISA v3.1 guests
From: Alistair Popple @ 2020-06-02  5:53 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: ravi.bangoria, mikey, kvm-ppc, Alistair Popple

Adds support for emulating ISAv3.1 guests by adding the appropriate PCR
and FSCR bits.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 arch/powerpc/include/asm/reg.h |  1 +
 arch/powerpc/kvm/book3s_hv.c   | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 773f76402392..d77040d0588a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1348,6 +1348,7 @@
 #define PVR_ARCH_206p	0x0f100003
 #define PVR_ARCH_207	0x0f000004
 #define PVR_ARCH_300	0x0f000005
+#define PVR_ARCH_31	0x0f000006
 
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 93493f0cbfe8..359bb2ed43e1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -345,7 +345,7 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
 }
 
 /* Dummy value used in computing PCR value below */
-#define PCR_ARCH_300	(PCR_ARCH_207 << 1)
+#define PCR_ARCH_31    (PCR_ARCH_300 << 1)
 
 static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 {
@@ -353,7 +353,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	/* We can (emulate) our own architecture version and anything older */
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		host_pcr_bit = PCR_ARCH_31;
+	else if (cpu_has_feature(CPU_FTR_ARCH_300))
 		host_pcr_bit = PCR_ARCH_300;
 	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		host_pcr_bit = PCR_ARCH_207;
@@ -379,6 +381,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
 		case PVR_ARCH_300:
 			guest_pcr_bit = PCR_ARCH_300;
 			break;
+		case PVR_ARCH_31:
+			guest_pcr_bit = PCR_ARCH_31;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -2318,7 +2323,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 	 * to trap and then we emulate them.
 	 */
 	vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
-		HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+		HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 		vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
 		if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
-- 
2.20.1


^ permalink raw reply related

* [PATCH 4/4] powerpc: Handle prefixed instructions in show_instructions()
From: Jordan Niethe @ 2020-06-02  5:27 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Jordan Niethe, alistair
In-Reply-To: <20200602052728.18227-1-jniethe5@gmail.com>

Currently show_instructions() treats prefixed instructions as two
separate word instructions. '<' and '>' are placed around the
instruction at the NIP, but as a result they only wrap around the
prefix. Make '<' and '>' straddle the whole prefixed instruction.

Currently showing a prefixed instruction looks like:

Instruction dump:
60000000 60000000 60000000 60000000 60000000 60000000 60000000 60000000
60000000 60000000 60000000 60000000 <04000000> 00000000 60000000 60000000

Make it look like:
Instruction dump:
0x60000000 0x60000000 0x60000000 0x60000000 0x60000000 0x60000000 0x60000000 0x60000000
0x60000000 0x60000000 0x60000000 0x60000000 <0x04000000 0x00000000> 0x60000000 0x60000000 0x60000000

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/kernel/process.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b3f73e398d00..bcd7277a9395 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1258,7 +1258,7 @@ static void show_instructions(struct pt_regs *regs)
 	printk("Instruction dump:");
 
 	for (i = 0; i < NR_INSN_TO_PRINT; i++) {
-		int instr;
+		struct ppc_inst instr;
 
 		if (!(i % 8))
 			pr_cont("\n");
@@ -1272,16 +1272,17 @@ static void show_instructions(struct pt_regs *regs)
 #endif
 
 		if (!__kernel_text_address(pc) ||
-		    probe_kernel_address((const void *)pc, instr)) {
+		    probe_kernel_read_inst(&instr, (struct ppc_inst *)pc)) {
+			instr = ppc_inst(PPC_INST_NOP);
 			pr_cont("XXXXXXXX ");
 		} else {
 			if (regs->nip == pc)
-				pr_cont("<%08x> ", instr);
+				pr_cont("<%s> ", ppc_inst_as_str(instr));
 			else
-				pr_cont("%08x ", instr);
+				pr_cont("%s ", ppc_inst_as_str(instr));
 		}
 
-		pc += sizeof(int);
+		pc += ppc_inst_len(instr);
 	}
 
 	pr_cont("\n");
-- 
2.17.1


^ permalink raw reply related

* [PATCH 3/4] powerpc: Handle prefixed instructions in show_user_instructions()
From: Jordan Niethe @ 2020-06-02  5:27 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Jordan Niethe, alistair
In-Reply-To: <20200602052728.18227-1-jniethe5@gmail.com>

Currently prefixed instructions are treated as two word instructions by
show_user_instructions(), treat them as a single instruction. '<' and
'>' are placed around the instruction at the NIP, and for prefixed
instructions this is placed around the prefix only. Make the '<' and '>'
wrap the prefix and suffix.

Currently showing a prefixed instruction looks like:
fbe1fff8 39200000 06000000 a3e30000 <04000000> f7e40000 ebe1fff8 4e800020

Make it look like:
0xfbe1fff8 0x39200000 0x06000000 0xa3e30000 <0x04000000 0xf7e40000> 0xebe1fff8 0x4e800020 0x00000000 0x00000000

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/kernel/process.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 048d64c4e115..b3f73e398d00 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1292,7 +1292,8 @@ void show_user_instructions(struct pt_regs *regs)
 	unsigned long pc;
 	int n = NR_INSN_TO_PRINT;
 	struct seq_buf s;
-	char buf[96]; /* enough for 8 times 9 + 2 chars */
+	char buf[8 * sizeof("0x00000000 0x00000000") + 2];
+	struct ppc_inst instr;
 
 	pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
 
@@ -1303,14 +1304,15 @@ void show_user_instructions(struct pt_regs *regs)
 
 		seq_buf_clear(&s);
 
-		for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
-			int instr;
+		for (i = 0; i < 8 && n; i++, n--, pc += ppc_inst_len(instr)) {
 
-			if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) {
+			if (probe_user_read_inst(&instr, (void __user *)pc)) {
 				seq_buf_printf(&s, "XXXXXXXX ");
+				instr = ppc_inst(PPC_INST_NOP);
 				continue;
 			}
-			seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
+			seq_buf_printf(&s, regs->nip == pc ? "<%s> " : "%s ",
+				       ppc_inst_as_str(instr));
 		}
 
 		if (!seq_buf_has_overflowed(&s))
-- 
2.17.1


^ permalink raw reply related

* [PATCH 2/4] powerpc/xmon: Improve dumping prefixed instructions
From: Jordan Niethe @ 2020-06-02  5:27 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Jordan Niethe, alistair
In-Reply-To: <20200602052728.18227-1-jniethe5@gmail.com>

Currently prefixed instructions are dumped as two separate word
instructions. Use mread_instr() so that prefixed instructions are read
as such and update the incrementor in the loop to take this into
account.

'dump_func' is print_insn_powerpc() which comes from ppc-dis.c which is
taken from binutils. When this is updated prefixed instructions will be
disassembled.

Currently dumping prefixed instructions looks like this:
0:mon> di c000000000094168
c000000000094168  0x06000000    .long 0x6000000
c00000000009416c  0x392a0003    addi    r9,r10,3
c000000000094170  0x913f0028    stw     r9,40(r31)
c000000000094174  0xe93f002a    lwa     r9,40(r31)
c000000000094178  0x7d234b78    mr      r3,r9
c00000000009417c  0x383f0040    addi    r1,r31,64
c000000000094180  0xebe1fff8    ld      r31,-8(r1)
c000000000094184  0x4e800020    blr
c000000000094188  0x60000000    nop
 ...
c000000000094190  0x3c4c0121    addis   r2,r12,289
c000000000094194  0x38429670    addi    r2,r2,-27024
c000000000094198  0x7c0802a6    mflr    r0
c00000000009419c  0x60000000    nop
c0000000000941a0  0xe9240100    ld      r9,256(r4)
c0000000000941a4  0x39400001    li      r10,1

After this it looks like:
0:mon> di c000000000094168
c000000000094168  0x06000000 0x392a0003 .long 0x392a000306000000
c000000000094170  0x913f0028    stw     r9,40(r31)
c000000000094174  0xe93f002a    lwa     r9,40(r31)
c000000000094178  0x7d234b78    mr      r3,r9
c00000000009417c  0x383f0040    addi    r1,r31,64
c000000000094180  0xebe1fff8    ld      r31,-8(r1)
c000000000094184  0x4e800020    blr
c000000000094188  0x60000000    nop
 ...
c000000000094190  0x3c4c0121    addis   r2,r12,289
c000000000094194  0x38429570    addi    r2,r2,-27280
c000000000094198  0x7c0802a6    mflr    r0
c00000000009419c  0x60000000    nop
c0000000000941a0  0xe9240100    ld      r9,256(r4)
c0000000000941a4  0x39400001    li      r10,1
c0000000000941a8  0x3d02000b    addis   r8,r2,11

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/xmon/xmon.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1dd3bf02021b..548571536bd1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2935,11 +2935,10 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
 	int nr, dotted;
 	unsigned long first_adr;
 	struct ppc_inst inst, last_inst = ppc_inst(0);
-	unsigned char val[4];
 
 	dotted = 0;
-	for (first_adr = adr; count > 0; --count, adr += 4) {
-		nr = mread(adr, val, 4);
+	for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) {
+		nr = mread_instr(adr, &inst);
 		if (nr == 0) {
 			if (praddr) {
 				const char *x = fault_chars[fault_type];
@@ -2947,7 +2946,6 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
 			}
 			break;
 		}
-		inst = ppc_inst(GETWORD(val));
 		if (adr > first_adr && ppc_inst_equal(inst, last_inst)) {
 			if (!dotted) {
 				printf(" ...\n");
@@ -2960,7 +2958,10 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
 		if (praddr)
 			printf(REG"  %s", adr, ppc_inst_as_str(inst));
 		printf("\t");
-		dump_func(ppc_inst_val(inst), adr);
+		if (!ppc_inst_prefixed(inst))
+			dump_func(ppc_inst_val(inst), adr);
+		else
+			dump_func(ppc_inst_as_u64(inst), adr);
 		printf("\n");
 	}
 	return adr - first_adr;
-- 
2.17.1


^ permalink raw reply related

* [PATCH 1/4] powerpc: Add a ppc_inst_as_str() helper
From: Jordan Niethe @ 2020-06-02  5:27 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Jordan Niethe, alistair

There are quite a few places where instructions are printed, this is
done using a '%x' format specifier. With the introduction of prefixed
instructions, this does not work well. Currently in these places,
ppc_inst_val() is used for the value for %x so only the first word of
prefixed instructions are printed.

When the instructions are word instructions, only a single word should
be printed. For prefixed instructions both the prefix and suffix should
be printed. To accommodate both of these situations, instead of a '%x'
specifier use '%s' and introduce a helper, __ppc_inst_as_str() which
returns a char *. The char * __ppc_inst_as_str() returns is buffer that
is passed to it by the caller.

It is cumbersome to require every caller of __ppc_inst_as_str() to now
declare a buffer. To make it more convenient to use __ppc_inst_as_str(),
wrap it in a macro that uses a compound statement to allocate a buffer
on the caller's stack before calling it.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
---
 arch/powerpc/include/asm/inst.h      | 19 +++++++++++++++++++
 arch/powerpc/kernel/kprobes.c        |  2 +-
 arch/powerpc/kernel/trace/ftrace.c   | 26 +++++++++++++-------------
 arch/powerpc/lib/test_emulate_step.c |  4 ++--
 arch/powerpc/xmon/xmon.c             |  2 +-
 5 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 45f3ec868258..3df7806e6dc3 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -122,6 +122,25 @@ static inline u64 ppc_inst_as_u64(struct ppc_inst x)
 #endif
 }
 
+#define PPC_INST_STR_LEN sizeof("0x00000000 0x00000000")
+
+static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x)
+{
+	if (ppc_inst_prefixed(x))
+		sprintf(str, "0x%08x 0x%08x", ppc_inst_val(x), ppc_inst_suffix(x));
+	else
+		sprintf(str, "0x%08x", ppc_inst_val(x));
+
+	return str;
+}
+
+#define ppc_inst_as_str(x)		\
+({					\
+	char __str[PPC_INST_STR_LEN];	\
+	__ppc_inst_as_str(__str, x);	\
+	__str;				\
+})
+
 int probe_user_read_inst(struct ppc_inst *inst,
 			 struct ppc_inst __user *nip);
 
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 227510df8c55..d0797171dba3 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -244,7 +244,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
 		 * So, we should never get here... but, its still
 		 * good to catch them, just in case...
 		 */
-		printk("Can't step on instruction %x\n", ppc_inst_val(insn));
+		printk("Can't step on instruction %s\n", ppc_inst_as_str(insn));
 		BUG();
 	} else {
 		/*
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 5e399628f51a..da11a26d8213 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -73,8 +73,8 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
 
 	/* Make sure it is what we expect it to be */
 	if (!ppc_inst_equal(replaced, old)) {
-		pr_err("%p: replaced (%#x) != old (%#x)",
-		(void *)ip, ppc_inst_val(replaced), ppc_inst_val(old));
+		pr_err("%p: replaced (%s) != old (%s)",
+		(void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old));
 		return -EINVAL;
 	}
 
@@ -137,7 +137,7 @@ __ftrace_make_nop(struct module *mod,
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -172,8 +172,8 @@ __ftrace_make_nop(struct module *mod,
 	/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
 	if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) &&
 	    !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
-		pr_err("Unexpected instruction %08x around bl _mcount\n",
-		       ppc_inst_val(op));
+		pr_err("Unexpected instruction %s around bl _mcount\n",
+		       ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 #else
@@ -203,7 +203,7 @@ __ftrace_make_nop(struct module *mod,
 	}
 
 	if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
-		pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op));
+		pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 #endif /* CONFIG_MPROFILE_KERNEL */
@@ -231,7 +231,7 @@ __ftrace_make_nop(struct module *mod,
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -406,7 +406,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -533,8 +533,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		return -EFAULT;
 
 	if (!expected_nop_sequence(ip, op[0], op[1])) {
-		pr_err("Unexpected call sequence at %p: %x %x\n",
-		ip, ppc_inst_val(op[0]), ppc_inst_val(op[1]));
+		pr_err("Unexpected call sequence at %p: %s %s\n",
+		ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1]));
 		return -EINVAL;
 	}
 
@@ -597,7 +597,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	/* It should be pointing to a nop */
 	if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_NOP))) {
-		pr_err("Expected NOP but have %x\n", ppc_inst_val(op));
+		pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -654,7 +654,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
 	}
 
 	if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
-		pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op));
+		pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -733,7 +733,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 46af80279ebc..5fb6f5267d70 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -852,7 +852,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs,
 
 	if (analyse_instr(&op, regs, instr) != 1 ||
 	    GETTYPE(op.type) != COMPUTE) {
-		pr_info("emulation failed, instruction = 0x%08x\n", ppc_inst_val(instr));
+		pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr));
 		return -EFAULT;
 	}
 
@@ -872,7 +872,7 @@ static int __init execute_compute_instr(struct pt_regs *regs,
 	/* Patch the NOP with the actual instruction */
 	patch_instruction_site(&patch__exec_instr, instr);
 	if (exec_instr(regs)) {
-		pr_info("execution failed, instruction = 0x%08x\n", ppc_inst_val(instr));
+		pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr));
 		return -EFAULT;
 	}
 
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 16ee6639a60c..1dd3bf02021b 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2958,7 +2958,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr,
 		dotted = 0;
 		last_inst = inst;
 		if (praddr)
-			printf(REG"  %.8x", adr, ppc_inst_val(inst));
+			printf(REG"  %s", adr, ppc_inst_as_str(inst));
 		printf("\t");
 		dump_func(ppc_inst_val(inst), adr);
 		printf("\n");
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH] powerpc/64/syscall: Disable sanitisers for C syscall entry/exit code
From: Michael Ellerman @ 2020-06-02  5:25 UTC (permalink / raw)
  To: Daniel Axtens, linuxppc-dev, npiggin; +Cc: ajd, Daniel Axtens
In-Reply-To: <20200529061446.2773-1-dja@axtens.net>

On Fri, 2020-05-29 at 06:14:46 UTC, Daniel Axtens wrote:
> syzkaller is picking up a bunch of crashes that look like this:
> 
> Unrecoverable exception 380 at c00000000037ed60 (msr=8000000000001031)
> Oops: Unrecoverable exception, sig: 6 [#1]
> LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> Modules linked in:
> CPU: 0 PID: 874 Comm: syz-executor.0 Not tainted 5.7.0-rc7-syzkaller-00016-gb0c3ba31be3e #0
> NIP:  c00000000037ed60 LR: c00000000004bac8 CTR: c000000000030990
> REGS: c0000000555a7230 TRAP: 0380   Not tainted  (5.7.0-rc7-syzkaller-00016-gb0c3ba31be3e)
> MSR:  8000000000001031 <SF,ME,IR,DR,LE>  CR: 48222882  XER: 20000000
> CFAR: c00000000004bac4 IRQMASK: 0
> GPR00: c00000000004bb68 c0000000555a74c0 c0000000024b3500 0000000000000005
> GPR04: 0000000000000000 0000000000000000 c00000000004bb88 c008000000910000
> GPR08: 00000000000b0000 c00000000004bac8 0000000000016000 c000000002503500
> GPR12: c000000000030990 c000000003190000 00000000106a5898 00000000106a0000
> GPR16: 00000000106a5890 c000000007a92000 c000000008180e00 c000000007a8f700
> GPR20: c000000007a904b0 0000000010110000 c00000000259d318 5deadbeef0000100
> GPR24: 5deadbeef0000122 c000000078422700 c000000009ee88b8 c000000078422778
> GPR28: 0000000000000001 800000000280b033 0000000000000000 c0000000555a75a0
> NIP [c00000000037ed60] __sanitizer_cov_trace_pc+0x40/0x50
> LR [c00000000004bac8] interrupt_exit_kernel_prepare+0x118/0x310
> Call Trace:
> [c0000000555a74c0] [c00000000004bb68] interrupt_exit_kernel_prepare+0x1b8/0x310 (unreliable)
> [c0000000555a7530] [c00000000000f9a8] interrupt_return+0x118/0x1c0
> --- interrupt: 900 at __sanitizer_cov_trace_pc+0x0/0x50
> ...<random previous call chain>...
> 
> That looks like the KCOV helper accessing memory that's not safe to
> access in the interrupt handling context.
> 
> Do not instrument the new syscall entry/exit code with KCOV, GCOV or
> UBSAN.
> 
> Cc: Nicholas Piggin <npiggin@gmail.com>
> Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C")
> Signed-off-by: Daniel Axtens <dja@axtens.net>

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/2f26ed1764b42a8c40d9c48441c73a70d805decf

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/64s: Fix restore of NV GPRs after facility unavailable exception
From: Michael Ellerman @ 2020-06-02  5:25 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: npiggin
In-Reply-To: <20200526061808.2472279-1-mpe@ellerman.id.au>

On Tue, 2020-05-26 at 06:18:08 UTC, Michael Ellerman wrote:
> Commit 702f09805222 ("powerpc/64s/exception: Remove lite interrupt
> return") changed the interrupt return path to not restore non-volatile
> registers by default, and explicitly restore them in paths where it is
> required.
> 
> But it missed that the facility unavailable exception can sometimes
> modify user registers, ie. when it does emulation of move from DSCR.
> 
> This is seen as a failure of the dscr_sysfs_thread_test:
>   test: dscr_sysfs_thread_test
>   [cpu 0] User DSCR should be 1 but is 0
>   failure: dscr_sysfs_thread_test
> 
> So restore non-volatile GPRs after facility unavailable exceptions.
> 
> Currently the hypervisor facility unavailable exception is also wired
> up to call facility_unavailable_exception().
> 
> In practice we should never take a hypervisor facility unavailable
> exception for the DSCR. On older bare metal systems we set HFSCR_DSCR
> unconditionally in __init_HFSCR, or on newer systems it should be
> enabled via the "data-stream-control-register" device tree CPU
> feature.
> 
> Even if it's not, since commit f3c99f97a3cd ("KVM: PPC: Book3S HV:
> Don't access HFSCR, LPIDR or LPCR when running nested"), the KVM code
> has unconditionally set HFSCR_DSCR when running guests.
> 
> So we should only get a hypervisor facility unavailable for the DSCR
> if skiboot has disabled the "data-stream-control-register" feature,
> and we are somehow in guest context but not via KVM.
> 
> Given all that, it should be unnecessary to add a restore of
> non-volatile GPRs after the hypervisor facility exception, because we
> never expect to hit that path. But equally we may as well add the
> restore, because we never expect to hit that path, and if we ever did,
> at least we would correctly restore the registers to their post
> emulation state.
> 
> In future we can split the non-HV and HV facility unavailable handling
> so that there is no emulation in the HV handler, and then remove the
> restore for the HV case.
> 
> Fixes: 702f09805222 ("powerpc/64s/exception: Remove lite interrupt return")
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/595d153dd1022392083ac93a1550382cbee127e0

cheers

^ permalink raw reply

* Re: [PATCH v6 0/2] Append new variables to vmcoreinfo (TCR_EL1.T1SZ for arm64 and MAX_PHYSMEM_BITS for all archs)
From: Bhupesh Sharma @ 2020-06-02  5:24 UTC (permalink / raw)
  To: linux-arm-kernel, x86
  Cc: Mark Rutland, Linux Doc Mailing List, Paul Mackerras,
	Amit Kachhap, Will Deacon, Ingo Molnar, Jonathan Corbet,
	Catalin Marinas, John Donnelly, scott.branden, Boris Petkov,
	Thomas Gleixner, Bhupesh SHARMA, Kazuhito Hagio, Ard Biesheuvel,
	Steve Capper, kexec mailing list, Linux Kernel Mailing List,
	James Morse, Dave Anderson, linuxppc-dev
In-Reply-To: <1589395957-24628-1-git-send-email-bhsharma@redhat.com>

Hello,

On Thu, May 14, 2020 at 12:22 AM Bhupesh Sharma <bhsharma@redhat.com> wrote:
>
> Apologies for the delayed update. Its been quite some time since I
> posted the last version (v5), but I have been really caught up in some
> other critical issues.
>
> Changes since v5:
> ----------------
> - v5 can be viewed here:
>   http://lists.infradead.org/pipermail/kexec/2019-November/024055.html
> - Addressed review comments from James Morse and Boris.
> - Added Tested-by received from John on v5 patchset.
> - Rebased against arm64 (for-next/ptr-auth) branch which has Amit's
>   patchset for ARMv8.3-A Pointer Authentication feature vmcoreinfo
>   applied.
>
> Changes since v4:
> ----------------
> - v4 can be seen here:
>   http://lists.infradead.org/pipermail/kexec/2019-November/023961.html
> - Addressed comments from Dave and added patches for documenting
>   new variables appended to vmcoreinfo documentation.
> - Added testing report shared by Akashi for PATCH 2/5.
>
> Changes since v3:
> ----------------
> - v3 can be seen here:
>   http://lists.infradead.org/pipermail/kexec/2019-March/022590.html
> - Addressed comments from James and exported TCR_EL1.T1SZ in vmcoreinfo
>   instead of PTRS_PER_PGD.
> - Added a new patch (via [PATCH 3/3]), which fixes a simple typo in
>   'Documentation/arm64/memory.rst'
>
> Changes since v2:
> ----------------
> - v2 can be seen here:
>   http://lists.infradead.org/pipermail/kexec/2019-March/022531.html
> - Protected 'MAX_PHYSMEM_BITS' vmcoreinfo variable under CONFIG_SPARSEMEM
>   ifdef sections, as suggested by Kazu.
> - Updated vmcoreinfo documentation to add description about
>   'MAX_PHYSMEM_BITS' variable (via [PATCH 3/3]).
>
> Changes since v1:
> ----------------
> - v1 was sent out as a single patch which can be seen here:
>   http://lists.infradead.org/pipermail/kexec/2019-February/022411.html
>
> - v2 breaks the single patch into two independent patches:
>   [PATCH 1/2] appends 'PTRS_PER_PGD' to vmcoreinfo for arm64 arch, whereas
>   [PATCH 2/2] appends 'MAX_PHYSMEM_BITS' to vmcoreinfo in core kernel code (all archs)
>
> This patchset primarily fixes the regression reported in user-space
> utilities like 'makedumpfile' and 'crash-utility' on arm64 architecture
> with the availability of 52-bit address space feature in underlying
> kernel. These regressions have been reported both on CPUs which don't
> support ARMv8.2 extensions (i.e. LVA, LPA) and are running newer kernels
> and also on prototype platforms (like ARMv8 FVP simulator model) which
> support ARMv8.2 extensions and are running newer kernels.
>
> The reason for these regressions is that right now user-space tools
> have no direct access to these values (since these are not exported
> from the kernel) and hence need to rely on a best-guess method of
> determining value of 'vabits_actual' and 'MAX_PHYSMEM_BITS' supported
> by underlying kernel.
>
> Exporting these values via vmcoreinfo will help user-land in such cases.
> In addition, as per suggestion from makedumpfile maintainer (Kazu),
> it makes more sense to append 'MAX_PHYSMEM_BITS' to
> vmcoreinfo in the core code itself rather than in arm64 arch-specific
> code, so that the user-space code for other archs can also benefit from
> this addition to the vmcoreinfo and use it as a standard way of
> determining 'SECTIONS_SHIFT' value in user-land.
>
> Cc: Boris Petkov <bp@alien8.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: James Morse <james.morse@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Steve Capper <steve.capper@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Dave Anderson <anderson@redhat.com>
> Cc: Kazuhito Hagio <k-hagio@ab.jp.nec.com>
> Cc: John Donnelly <john.p.donnelly@oracle.com>
> Cc: scott.branden@broadcom.com
> Cc: Amit Kachhap <amit.kachhap@arm.com>
> Cc: x86@kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Cc: linux-doc@vger.kernel.org
> Cc: kexec@lists.infradead.org
>
> Bhupesh Sharma (2):
>   crash_core, vmcoreinfo: Append 'MAX_PHYSMEM_BITS' to vmcoreinfo
>   arm64/crash_core: Export TCR_EL1.T1SZ in vmcoreinfo
>
>  Documentation/admin-guide/kdump/vmcoreinfo.rst | 16 ++++++++++++++++
>  arch/arm64/include/asm/pgtable-hwdef.h         |  1 +
>  arch/arm64/kernel/crash_core.c                 | 10 ++++++++++
>  kernel/crash_core.c                            |  1 +
>  4 files changed, 28 insertions(+)

Ping. @James Morse , Others

Please share if you have some comments regarding this patchset.

Thanks,
Bhupesh


^ permalink raw reply

* Re: [PATCH] hw_breakpoint: Fix build warnings with clang
From: Christophe Leroy @ 2020-06-02  5:10 UTC (permalink / raw)
  To: Ravi Bangoria, mpe
  Cc: christophe.leroy, apopple, mikey, linux-kernel, npiggin, paulus,
	naveen.n.rao, linuxppc-dev
In-Reply-To: <20200602041208.128913-1-ravi.bangoria@linux.ibm.com>



Le 02/06/2020 à 06:12, Ravi Bangoria a écrit :
> kbuild test robot reported few build warnings with hw_breakpoint code
> when compiled with clang[1]. Fix those.
> 
> [1]: https://lore.kernel.org/linuxppc-dev/202005192233.oi9CjRtA%25lkp@intel.com/
> 
> Reported-by: kbuild test robot <lkp@intel.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
> Note: Prepared on top of powerpc/next.
> 
>   arch/powerpc/include/asm/hw_breakpoint.h | 3 ---
>   include/linux/hw_breakpoint.h            | 4 ++++
>   2 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
> index f42a55eb77d2..cb424799da0d 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -70,9 +70,6 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
>   						unsigned long val, void *data);
>   int arch_install_hw_breakpoint(struct perf_event *bp);
>   void arch_uninstall_hw_breakpoint(struct perf_event *bp);
> -int arch_reserve_bp_slot(struct perf_event *bp);
> -void arch_release_bp_slot(struct perf_event *bp);
> -void arch_unregister_hw_breakpoint(struct perf_event *bp);
>   void hw_breakpoint_pmu_read(struct perf_event *bp);
>   extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
>   
> diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
> index 6058c3844a76..521481f0d320 100644
> --- a/include/linux/hw_breakpoint.h
> +++ b/include/linux/hw_breakpoint.h
> @@ -80,6 +80,10 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp);
>   extern int dbg_release_bp_slot(struct perf_event *bp);
>   extern int reserve_bp_slot(struct perf_event *bp);
>   extern void release_bp_slot(struct perf_event *bp);
> +extern int hw_breakpoint_weight(struct perf_event *bp);
> +extern int arch_reserve_bp_slot(struct perf_event *bp);
> +extern void arch_release_bp_slot(struct perf_event *bp);
> +extern void arch_unregister_hw_breakpoint(struct perf_event *bp);

Please no new 'extern'. In the old days 'extern' keyword was used, but 
new code shall not introduce new unnecessary usage of 'extern' keyword. 
See report from Checkpatch below:

WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description 
(prefer a maximum 75 chars per line)
#9:
[1]: 
https://lore.kernel.org/linuxppc-dev/202005192233.oi9CjRtA%25lkp@intel.com/

CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#40: FILE: include/linux/hw_breakpoint.h:83:
+extern int hw_breakpoint_weight(struct perf_event *bp);

CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#41: FILE: include/linux/hw_breakpoint.h:84:
+extern int arch_reserve_bp_slot(struct perf_event *bp);

CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#42: FILE: include/linux/hw_breakpoint.h:85:
+extern void arch_release_bp_slot(struct perf_event *bp);

CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#43: FILE: include/linux/hw_breakpoint.h:86:
+extern void arch_unregister_hw_breakpoint(struct perf_event *bp);

total: 0 errors, 1 warnings, 4 checks, 19 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
       mechanically convert to the typical style using --fix or 
--fix-inplace.

the.patch has style problems, please review.

NOTE: Ignored message types: ARCH_INCLUDE_LINUX BIT_MACRO 
COMPARISON_TO_NULL DT_SPLIT_BINDING_PATCH EMAIL_SUBJECT 
FILE_PATH_CHANGES GLOBAL_INITIALISERS LINE_SPACING MULTIPLE_ASSIGNMENTS

NOTE: If any of the errors are false positives, please report
       them to the maintainer, see CHECKPATCH in MAINTAINERS.

Christophe

^ permalink raw reply

* Re: [PATCH] powerpc/nvram: Replace kmalloc with kzalloc in the error message
From: Markus Elfring @ 2020-06-02  5:01 UTC (permalink / raw)
  To: Michael Ellerman, Liao Pingfang, linuxppc-dev
  Cc: Yi Wang, Tony Luck, Kees Cook, Wang Liang, Anton Vorontsov,
	kernel-janitors, LKML, Colin Cross, Paul Mackerras, Xue Zhihong,
	Greg Kroah-Hartman, Thomas Gleixner, Allison Randal
In-Reply-To: <87imgai394.fsf@mpe.ellerman.id.au>

>>> Please just remove the message instead, it's a tiny allocation that's
>>> unlikely to ever fail, and the caller will print an error anyway.
>>
>> How do you think about to take another look at a previous update suggestion
>> like the following?
>>
>> powerpc/nvram: Delete three error messages for a failed memory allocation
>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/00845261-8528-d011-d3b8-e9355a231d3a@users.sourceforge.net/
>> https://lore.kernel.org/linuxppc-dev/00845261-8528-d011-d3b8-e9355a231d3a@users.sourceforge.net/
>> https://lore.kernel.org/patchwork/patch/752720/
>> https://lkml.org/lkml/2017/1/19/537
>
> That deleted the messages from nvram_scan_partitions(), but neither of
> the callers of nvram_scan_paritions() check its return value or print
> anything if it fails. So removing those messages would make those
> failures silent which is not what we want.

* How do you think about information like the following?
  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst?id=f359287765c04711ff54fbd11645271d8e5ff763#n883
“…
These generic allocation functions all emit a stack dump on failure when used
without __GFP_NOWARN so there is no use in emitting an additional failure
message when NULL is returned.
…”

* Would you like to clarify software development concerns around
  the Linux allocation failure report any more?

Regards,
Markus

^ permalink raw reply

* [PATCH 2/2] powerpc/rtas: don't online CPUs for partition suspend
From: Nathan Lynch @ 2020-06-02  4:31 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: svaidy, npiggin, ego
In-Reply-To: <20200602043140.397746-1-nathanl@linux.ibm.com>

Partition suspension, used for hibernation and migration, requires
that the OS place all but one of the LPAR's processor threads into one
of two states prior to calling the ibm,suspend-me RTAS function:

  * the architected offline state (via RTAS stop-self); or
  * the H_JOIN hcall, which does not return until the partition
    resumes execution

Using H_CEDE as the offline mode, introduced by
commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into
an appropriate offline state"), means that any threads which are
offline from Linux's point of view must be moved to one of those two
states before a partition suspension can proceed.

This was eventually addressed in commit 120496ac2d2d ("powerpc: Bring
all threads online prior to migration/hibernation"), which added code
to temporarily bring up any offline processor threads so they can call
H_JOIN. Conceptually this is fine, but the implementation has had
multiple races with cpu hotplug operations initiated from user
space[1][2][3], the error handling is fragile, and it generates
user-visible cpu hotplug events which is a lot of noise for a platform
feature that's supposed to minimize disruption to workloads.

With commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU
into an appropriate offline state") reverted, this code becomes
unnecessary, so remove it. Since any offline CPUs now are truly
offline from the platform's point of view, it is no longer necessary
to bring up CPUs only to have them call H_JOIN and then go offline
again upon resuming. Only active threads are required to call H_JOIN;
stopped threads can be left alone.

[1] commit a6717c01ddc2 ("powerpc/rtas: use device model APIs and
    serialization during LPM")
[2] commit 9fb603050ffd ("powerpc/rtas: retry when cpu offline races
    with suspend/migration")
[3] commit dfd718a2ed1f ("powerpc/rtas: Fix a potential race between
    CPU-Offline & Migration")

Fixes: 120496ac2d2d ("powerpc: Bring all threads online prior to migration/hibernation")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
 arch/powerpc/include/asm/rtas.h          |   2 -
 arch/powerpc/kernel/rtas.c               | 122 +----------------------
 arch/powerpc/platforms/pseries/suspend.c |  22 +---
 3 files changed, 3 insertions(+), 143 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3c1887351c71..bd227e0eab07 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -368,8 +368,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
 extern void rtas_progress(char *s, unsigned short hex);
 extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
-extern int rtas_online_cpus_mask(cpumask_var_t cpus);
-extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
 extern int rtas_ibm_suspend_me(u64 handle);
 
 struct rtc_time;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c5fa251b8950..01210593d60c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -842,96 +842,6 @@ static void rtas_percpu_suspend_me(void *info)
 	__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
 }
 
-enum rtas_cpu_state {
-	DOWN,
-	UP,
-};
-
-#ifndef CONFIG_SMP
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
-				cpumask_var_t cpus)
-{
-	if (!cpumask_empty(cpus)) {
-		cpumask_clear(cpus);
-		return -EINVAL;
-	} else
-		return 0;
-}
-#else
-/* On return cpumask will be altered to indicate CPUs changed.
- * CPUs with states changed will be set in the mask,
- * CPUs with status unchanged will be unset in the mask. */
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
-				cpumask_var_t cpus)
-{
-	int cpu;
-	int cpuret = 0;
-	int ret = 0;
-
-	if (cpumask_empty(cpus))
-		return 0;
-
-	for_each_cpu(cpu, cpus) {
-		struct device *dev = get_cpu_device(cpu);
-
-		switch (state) {
-		case DOWN:
-			cpuret = device_offline(dev);
-			break;
-		case UP:
-			cpuret = device_online(dev);
-			break;
-		}
-		if (cpuret < 0) {
-			pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
-					__func__,
-					((state == UP) ? "up" : "down"),
-					cpu, cpuret);
-			if (!ret)
-				ret = cpuret;
-			if (state == UP) {
-				/* clear bits for unchanged cpus, return */
-				cpumask_shift_right(cpus, cpus, cpu);
-				cpumask_shift_left(cpus, cpus, cpu);
-				break;
-			} else {
-				/* clear bit for unchanged cpu, continue */
-				cpumask_clear_cpu(cpu, cpus);
-			}
-		}
-		cond_resched();
-	}
-
-	return ret;
-}
-#endif
-
-int rtas_online_cpus_mask(cpumask_var_t cpus)
-{
-	int ret;
-
-	ret = rtas_cpu_state_change_mask(UP, cpus);
-
-	if (ret) {
-		cpumask_var_t tmp_mask;
-
-		if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
-			return ret;
-
-		/* Use tmp_mask to preserve cpus mask from first failure */
-		cpumask_copy(tmp_mask, cpus);
-		rtas_offline_cpus_mask(tmp_mask);
-		free_cpumask_var(tmp_mask);
-	}
-
-	return ret;
-}
-
-int rtas_offline_cpus_mask(cpumask_var_t cpus)
-{
-	return rtas_cpu_state_change_mask(DOWN, cpus);
-}
-
 int rtas_ibm_suspend_me(u64 handle)
 {
 	long state;
@@ -939,8 +849,6 @@ int rtas_ibm_suspend_me(u64 handle)
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	struct rtas_suspend_me_data data;
 	DECLARE_COMPLETION_ONSTACK(done);
-	cpumask_var_t offline_mask;
-	int cpuret;
 
 	if (!rtas_service_present("ibm,suspend-me"))
 		return -ENOSYS;
@@ -961,9 +869,6 @@ int rtas_ibm_suspend_me(u64 handle)
 		return -EIO;
 	}
 
-	if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
-		return -ENOMEM;
-
 	atomic_set(&data.working, 0);
 	atomic_set(&data.done, 0);
 	atomic_set(&data.error, 0);
@@ -972,24 +877,8 @@ int rtas_ibm_suspend_me(u64 handle)
 
 	lock_device_hotplug();
 
-	/* All present CPUs must be online */
-	cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
-	cpuret = rtas_online_cpus_mask(offline_mask);
-	if (cpuret) {
-		pr_err("%s: Could not bring present CPUs online.\n", __func__);
-		atomic_set(&data.error, cpuret);
-		goto out;
-	}
-
 	cpu_hotplug_disable();
 
-	/* Check if we raced with a CPU-Offline Operation */
-	if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) {
-		pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__);
-		atomic_set(&data.error, -EAGAIN);
-		goto out_hotplug_enable;
-	}
-
 	/* Call function on all CPUs.  One of us will make the
 	 * rtas call
 	 */
@@ -1000,18 +889,11 @@ int rtas_ibm_suspend_me(u64 handle)
 	if (atomic_read(&data.error) != 0)
 		printk(KERN_ERR "Error doing global join\n");
 
-out_hotplug_enable:
-	cpu_hotplug_enable();
 
-	/* Take down CPUs not online prior to suspend */
-	cpuret = rtas_offline_cpus_mask(offline_mask);
-	if (cpuret)
-		pr_warn("%s: Could not restore CPUs to offline state.\n",
-				__func__);
+	cpu_hotplug_enable();
 
-out:
 	unlock_device_hotplug();
-	free_cpumask_var(offline_mask);
+
 	return atomic_read(&data.error);
 }
 #else /* CONFIG_PPC_PSERIES */
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 0a24a5a185f0..f789693f61f4 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -132,15 +132,11 @@ static ssize_t store_hibernate(struct device *dev,
 			       struct device_attribute *attr,
 			       const char *buf, size_t count)
 {
-	cpumask_var_t offline_mask;
 	int rc;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
-		return -ENOMEM;
-
 	stream_id = simple_strtoul(buf, NULL, 16);
 
 	do {
@@ -150,32 +146,16 @@ static ssize_t store_hibernate(struct device *dev,
 	} while (rc == -EAGAIN);
 
 	if (!rc) {
-		/* All present CPUs must be online */
-		cpumask_andnot(offline_mask, cpu_present_mask,
-				cpu_online_mask);
-		rc = rtas_online_cpus_mask(offline_mask);
-		if (rc) {
-			pr_err("%s: Could not bring present CPUs online.\n",
-					__func__);
-			goto out;
-		}
-
 		stop_topology_update();
 		rc = pm_suspend(PM_SUSPEND_MEM);
 		start_topology_update();
-
-		/* Take down CPUs not online prior to suspend */
-		if (!rtas_offline_cpus_mask(offline_mask))
-			pr_warn("%s: Could not restore CPUs to offline "
-					"state.\n", __func__);
 	}
 
 	stream_id = 0;
 
 	if (!rc)
 		rc = count;
-out:
-	free_cpumask_var(offline_mask);
+
 	return rc;
 }
 
-- 
2.25.4


^ permalink raw reply related

* [PATCH 0/2] pseries extended cede cpu offline mode removal
From: Nathan Lynch @ 2020-06-02  4:31 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: svaidy, npiggin, ego

I propose removing the "extended cede" offline mode for CPUs as well
as the partition suspend code which accommodates it by temporarily
onlining all CPUs prior to suspending the LPAR.

Detailed justifications are within the individual commit messages.

I'm hoping to move the pseries partition suspend implementation to
the Linux suspend framework, and I expect that having only one offline
mode for CPUs will make that task quite a bit less complex.

Nathan Lynch (2):
  powerpc/pseries: remove cede offline state for CPUs
  powerpc/rtas: don't online CPUs for partition suspend

 Documentation/core-api/cpu_hotplug.rst        |   7 -
 arch/powerpc/include/asm/rtas.h               |   2 -
 arch/powerpc/kernel/rtas.c                    | 122 +------------
 arch/powerpc/platforms/pseries/hotplug-cpu.c  | 170 ++----------------
 .../platforms/pseries/offline_states.h        |  38 ----
 arch/powerpc/platforms/pseries/pmem.c         |   1 -
 arch/powerpc/platforms/pseries/smp.c          |  28 +--
 arch/powerpc/platforms/pseries/suspend.c      |  22 +--
 8 files changed, 18 insertions(+), 372 deletions(-)
 delete mode 100644 arch/powerpc/platforms/pseries/offline_states.h

-- 
2.25.4


^ permalink raw reply

* [PATCH 1/2] powerpc/pseries: remove cede offline state for CPUs
From: Nathan Lynch @ 2020-06-02  4:31 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: svaidy, npiggin, ego
In-Reply-To: <20200602043140.397746-1-nathanl@linux.ibm.com>

This effectively reverts commit 3aa565f53c39 ("powerpc/pseries: Add
hooks to put the CPU into an appropriate offline state"), which added
an offline mode for CPUs which uses the H_CEDE hcall instead of the
architected stop-self RTAS function in order to facilitate "folding"
of dedicated mode processors on PowerVM platforms to achieve energy
savings. This has been the default offline mode since its
introduction.

There's nothing about stop-self that would prevent the hypervisor from
achieving the energy savings available via H_CEDE, so the original
premise of this change appears to be flawed.

I also have encountered the claim that the transition to and from
ceded state is much faster than stop-self/start-cpu. Certainly we
would not want to use stop-self as an *idle* mode. That is what H_CEDE
is for. However, this difference is insignificant in the context of
Linux CPU hotplug, where the latency of an offline or online operation
on current systems is on the order of 100ms, mainly attributable to
all the various subsystems' cpuhp callbacks.

The cede offline mode also prevents accurate accounting, as discussed
before:
https://lore.kernel.org/linuxppc-dev/1571740391-3251-1-git-send-email-ego@linux.vnet.ibm.com/

Unconditionally use stop-self to offline processor threads. This is
the architected method for offlining CPUs on PAPR systems.

The "cede_offline" boot parameter is rendered obsolete.

Removing this code enables the removal of the partition suspend code
which temporarily onlines all present CPUs.

Fixes: 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into an appropriate offline state")

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
 Documentation/core-api/cpu_hotplug.rst        |   7 -
 arch/powerpc/platforms/pseries/hotplug-cpu.c  | 170 ++----------------
 .../platforms/pseries/offline_states.h        |  38 ----
 arch/powerpc/platforms/pseries/pmem.c         |   1 -
 arch/powerpc/platforms/pseries/smp.c          |  28 +--
 5 files changed, 15 insertions(+), 229 deletions(-)
 delete mode 100644 arch/powerpc/platforms/pseries/offline_states.h

diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index 4a50ab7817f7..b1ae1ac159cf 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -50,13 +50,6 @@ Command Line Switches
 
   This option is limited to the X86 and S390 architecture.
 
-``cede_offline={"off","on"}``
-  Use this option to disable/enable putting offlined processors to an extended
-  ``H_CEDE`` state on supported pseries platforms. If nothing is specified,
-  ``cede_offline`` is set to "on".
-
-  This option is limited to the PowerPC architecture.
-
 ``cpu0_hotplug``
   Allow to shutdown CPU0.
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 3e8cbfe7a80f..d4b346355bb9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -35,54 +35,10 @@
 #include <asm/topology.h>
 
 #include "pseries.h"
-#include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
-static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
-							CPU_STATE_OFFLINE;
-static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
-
-static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-
-static bool cede_offline_enabled __read_mostly = true;
-
-/*
- * Enable/disable cede_offline when available.
- */
-static int __init setup_cede_offline(char *str)
-{
-	return (kstrtobool(str, &cede_offline_enabled) == 0);
-}
-
-__setup("cede_offline=", setup_cede_offline);
-
-enum cpu_state_vals get_cpu_current_state(int cpu)
-{
-	return per_cpu(current_state, cpu);
-}
-
-void set_cpu_current_state(int cpu, enum cpu_state_vals state)
-{
-	per_cpu(current_state, cpu) = state;
-}
-
-enum cpu_state_vals get_preferred_offline_state(int cpu)
-{
-	return per_cpu(preferred_offline_state, cpu);
-}
-
-void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
-{
-	per_cpu(preferred_offline_state, cpu) = state;
-}
-
-void set_default_offline_state(int cpu)
-{
-	per_cpu(preferred_offline_state, cpu) = default_offline_state;
-}
-
 static void rtas_stop_self(void)
 {
 	static struct rtas_args args;
@@ -101,9 +57,7 @@ static void rtas_stop_self(void)
 
 static void pseries_mach_cpu_die(void)
 {
-	unsigned int cpu = smp_processor_id();
 	unsigned int hwcpu = hard_smp_processor_id();
-	u8 cede_latency_hint = 0;
 
 	local_irq_disable();
 	idle_task_exit();
@@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void)
 	else
 		xics_teardown_cpu();
 
-	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
-		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
-		if (ppc_md.suspend_disable_cpu)
-			ppc_md.suspend_disable_cpu();
-
-		cede_latency_hint = 2;
-
-		get_lppaca()->idle = 1;
-		if (!lppaca_shared_proc(get_lppaca()))
-			get_lppaca()->donate_dedicated_cpu = 1;
-
-		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
-			while (!prep_irq_for_idle()) {
-				local_irq_enable();
-				local_irq_disable();
-			}
-
-			extended_cede_processor(cede_latency_hint);
-		}
-
-		local_irq_disable();
-
-		if (!lppaca_shared_proc(get_lppaca()))
-			get_lppaca()->donate_dedicated_cpu = 0;
-		get_lppaca()->idle = 0;
-
-		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
-			unregister_slb_shadow(hwcpu);
-
-			hard_irq_disable();
-			/*
-			 * Call to start_secondary_resume() will not return.
-			 * Kernel stack will be reset and start_secondary()
-			 * will be called to continue the online operation.
-			 */
-			start_secondary_resume();
-		}
-	}
-
-	/* Requested state is CPU_STATE_OFFLINE at this point */
-	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
-
-	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
 	unregister_slb_shadow(hwcpu);
 	rtas_stop_self();
 
@@ -200,24 +111,13 @@ static void pseries_cpu_die(unsigned int cpu)
 	int cpu_status = 1;
 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
 
-	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
-		cpu_status = 1;
-		for (tries = 0; tries < 5000; tries++) {
-			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
-				cpu_status = 0;
-				break;
-			}
-			msleep(1);
-		}
-	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+	for (tries = 0; tries < 25; tries++) {
+		cpu_status = smp_query_cpu_stopped(pcpu);
+		if (cpu_status == QCSS_STOPPED ||
+		    cpu_status == QCSS_HARDWARE_ERROR)
+			break;
+		cpu_relax();
 
-		for (tries = 0; tries < 25; tries++) {
-			cpu_status = smp_query_cpu_stopped(pcpu);
-			if (cpu_status == QCSS_STOPPED ||
-			    cpu_status == QCSS_HARDWARE_ERROR)
-				break;
-			cpu_relax();
-		}
 	}
 
 	if (cpu_status != 0) {
@@ -359,28 +259,15 @@ static int dlpar_offline_cpu(struct device_node *dn)
 			if (get_hard_smp_processor_id(cpu) != thread)
 				continue;
 
-			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+			if (!cpu_online(cpu))
 				break;
 
-			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
-				set_preferred_offline_state(cpu,
-							    CPU_STATE_OFFLINE);
-				cpu_maps_update_done();
-				timed_topology_update(1);
-				rc = device_offline(get_cpu_device(cpu));
-				if (rc)
-					goto out;
-				cpu_maps_update_begin();
-				break;
-			}
-
-			/*
-			 * The cpu is in CPU_STATE_INACTIVE.
-			 * Upgrade it's state to CPU_STATE_OFFLINE.
-			 */
-			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-			WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
-			__cpu_die(cpu);
+			cpu_maps_update_done();
+			timed_topology_update(1);
+			rc = device_offline(get_cpu_device(cpu));
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
 			break;
 		}
 		if (cpu == num_possible_cpus()) {
@@ -414,8 +301,6 @@ static int dlpar_online_cpu(struct device_node *dn)
 		for_each_present_cpu(cpu) {
 			if (get_hard_smp_processor_id(cpu) != thread)
 				continue;
-			BUG_ON(get_cpu_current_state(cpu)
-					!= CPU_STATE_OFFLINE);
 			cpu_maps_update_done();
 			timed_topology_update(1);
 			find_and_online_cpu_nid(cpu);
@@ -1013,27 +898,8 @@ static struct notifier_block pseries_smp_nb = {
 	.notifier_call = pseries_smp_notifier,
 };
 
-#define MAX_CEDE_LATENCY_LEVELS		4
-#define	CEDE_LATENCY_PARAM_LENGTH	10
-#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
-	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
-#define CEDE_LATENCY_TOKEN		45
-
-static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
-
-static int parse_cede_parameters(void)
-{
-	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
-	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
-			 NULL,
-			 CEDE_LATENCY_TOKEN,
-			 __pa(cede_parameters),
-			 CEDE_LATENCY_PARAM_MAX_LENGTH);
-}
-
 static int __init pseries_cpu_hotplug_init(void)
 {
-	int cpu;
 	int qcss_tok;
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
@@ -1056,16 +922,8 @@ static int __init pseries_cpu_hotplug_init(void)
 	smp_ops->cpu_die = pseries_cpu_die;
 
 	/* Processors can be added/removed only on LPAR */
-	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+	if (firmware_has_feature(FW_FEATURE_LPAR))
 		of_reconfig_notifier_register(&pseries_smp_nb);
-		cpu_maps_update_begin();
-		if (cede_offline_enabled && parse_cede_parameters() == 0) {
-			default_offline_state = CPU_STATE_INACTIVE;
-			for_each_online_cpu(cpu)
-				set_default_offline_state(cpu);
-		}
-		cpu_maps_update_done();
-	}
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
deleted file mode 100644
index 51414aee2862..000000000000
--- a/arch/powerpc/platforms/pseries/offline_states.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _OFFLINE_STATES_H_
-#define _OFFLINE_STATES_H_
-
-/* Cpu offline states go here */
-enum cpu_state_vals {
-	CPU_STATE_OFFLINE,
-	CPU_STATE_INACTIVE,
-	CPU_STATE_ONLINE,
-	CPU_MAX_OFFLINE_STATES
-};
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern enum cpu_state_vals get_cpu_current_state(int cpu);
-extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
-extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
-extern void set_default_offline_state(int cpu);
-#else
-static inline enum cpu_state_vals get_cpu_current_state(int cpu)
-{
-	return CPU_STATE_ONLINE;
-}
-
-static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state)
-{
-}
-
-static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
-{
-}
-
-static inline void set_default_offline_state(int cpu)
-{
-}
-#endif
-
-extern enum cpu_state_vals get_preferred_offline_state(int cpu);
-#endif
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index f860a897a9e0..f827de7087e9 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -24,7 +24,6 @@
 #include <asm/topology.h>
 
 #include "pseries.h"
-#include "offline_states.h"
 
 static struct device_node *pmem_node;
 
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index ad61e90032da..a8a070269151 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,8 +44,6 @@
 #include <asm/svm.h>
 
 #include "pseries.h"
-#include "offline_states.h"
-
 
 /*
  * The Primary thread of each non-boot processor was started from the OF client
@@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 
 	/* Fixup atomic count: it exited inside IRQ handler. */
 	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
-#ifdef CONFIG_HOTPLUG_CPU
-	if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
-		goto out;
-#endif
+
 	/* 
 	 * If the RTAS start-cpu token does not exist then presume the
 	 * cpu is already spinning.
@@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 		return 0;
 	}
 
-#ifdef CONFIG_HOTPLUG_CPU
-out:
-#endif
 	return 1;
 }
 
@@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu)
 		vpa_init(cpu);
 
 	cpumask_clear_cpu(cpu, of_spin_mask);
-#ifdef CONFIG_HOTPLUG_CPU
-	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
-	set_default_offline_state(cpu);
-#endif
 }
 
 static int smp_pSeries_kick_cpu(int nr)
@@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr)
 	 * the processor will continue on to secondary_start
 	 */
 	paca_ptrs[nr]->cpu_start = 1;
-#ifdef CONFIG_HOTPLUG_CPU
-	set_preferred_offline_state(nr, CPU_STATE_ONLINE);
-
-	if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
-		long rc;
-		unsigned long hcpuid;
-
-		hcpuid = get_hard_smp_processor_id(nr);
-		rc = plpar_hcall_norets(H_PROD, hcpuid);
-		if (rc != H_SUCCESS)
-			printk(KERN_ERR "Error: Prod to wake up processor %d "
-						"Ret= %ld\n", nr, rc);
-	}
-#endif
 
 	return 0;
 }
-- 
2.25.4


^ permalink raw reply related

* [PATCH] hw_breakpoint: Fix build warnings with clang
From: Ravi Bangoria @ 2020-06-02  4:12 UTC (permalink / raw)
  To: mpe
  Cc: christophe.leroy, ravi.bangoria, mikey, apopple, linux-kernel,
	npiggin, paulus, naveen.n.rao, linuxppc-dev

kbuild test robot reported few build warnings with hw_breakpoint code
when compiled with clang[1]. Fix those.

[1]: https://lore.kernel.org/linuxppc-dev/202005192233.oi9CjRtA%25lkp@intel.com/

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
Note: Prepared on top of powerpc/next.

 arch/powerpc/include/asm/hw_breakpoint.h | 3 ---
 include/linux/hw_breakpoint.h            | 4 ++++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index f42a55eb77d2..cb424799da0d 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -70,9 +70,6 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 						unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
 void arch_uninstall_hw_breakpoint(struct perf_event *bp);
-int arch_reserve_bp_slot(struct perf_event *bp);
-void arch_release_bp_slot(struct perf_event *bp);
-void arch_unregister_hw_breakpoint(struct perf_event *bp);
 void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 6058c3844a76..521481f0d320 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -80,6 +80,10 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp);
 extern int dbg_release_bp_slot(struct perf_event *bp);
 extern int reserve_bp_slot(struct perf_event *bp);
 extern void release_bp_slot(struct perf_event *bp);
+extern int hw_breakpoint_weight(struct perf_event *bp);
+extern int arch_reserve_bp_slot(struct perf_event *bp);
+extern void arch_release_bp_slot(struct perf_event *bp);
+extern void arch_unregister_hw_breakpoint(struct perf_event *bp);
 
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH] cxl: Remove dead Kconfig options
From: Andrew Donnellan @ 2020-06-02  4:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: fbarrat

The CXL_AFU_DRIVER_OPS and CXL_LIB Kconfig options were added to coordinate
merging of new features. They no longer serve any purpose, so remove them.

Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
---
 drivers/misc/cxl/Kconfig | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 39eec9031487..51aecafdcbdf 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -7,18 +7,10 @@ config CXL_BASE
 	bool
 	select PPC_COPRO_BASE
 
-config CXL_AFU_DRIVER_OPS
-	bool
-
-config CXL_LIB
-	bool
-
 config CXL
 	tristate "Support for IBM Coherent Accelerators (CXL)"
 	depends on PPC_POWERNV && PCI_MSI && EEH
 	select CXL_BASE
-	select CXL_AFU_DRIVER_OPS
-	select CXL_LIB
 	default m
 	help
 	  Select this option to enable driver support for IBM Coherent
-- 
2.20.1


^ permalink raw reply related

* [PATCH 7/7] powerpc/watchpoint: Remove 512 byte boundary
From: Ravi Bangoria @ 2020-06-02  4:01 UTC (permalink / raw)
  To: mpe, mikey
  Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec, oleg,
	npiggin, linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev,
	mingo
In-Reply-To: <20200602040106.127693-1-ravi.bangoria@linux.ibm.com>

Power10 has removed 512 bytes boundary from match criteria. i.e. The match
range can be 512 bytes unaligned as well.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/kernel/hw_breakpoint.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 0000daf0e1da..270e655108f4 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -418,8 +418,9 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
 
 	if (dawr_enabled()) {
 		max_len = DAWR_MAX_LEN;
-		/* DAWR region can't cross 512 bytes boundary */
-		if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M))
+		/* DAWR region can't cross 512 bytes boundary on p8 and p9 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M)))
 			return -EINVAL;
 	} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
 		/* 8xx can setup a range without limitation */
-- 
2.26.2


^ permalink raw reply related

* [PATCH 6/7] powerpc/watchpoint: Return available watchpoints dynamically
From: Ravi Bangoria @ 2020-06-02  4:01 UTC (permalink / raw)
  To: mpe, mikey
  Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec, oleg,
	npiggin, linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev,
	mingo
In-Reply-To: <20200602040106.127693-1-ravi.bangoria@linux.ibm.com>

So far Book3S Powerpc supported only one watchpoint. But Power10 is
introducing 2nd DAWR. Enable 2nd DAWR support for Power10. Availability
of 2nd DAWR will depend on CPU_FTR_DAWR1.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/include/asm/cputable.h      | 4 +++-
 arch/powerpc/include/asm/hw_breakpoint.h | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 3445c86e1f6f..f110f70a11bf 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -633,7 +633,9 @@ enum {
  * Maximum number of hw breakpoint supported on powerpc. Number of
  * breakpoints supported by actual hw might be less than this.
  */
-#define HBP_NUM_MAX	1
+#define HBP_NUM_MAX		2
+#define HBP_NUM_P7_TO_P9	1
+#define HBP_NUM_P10		2
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index f42a55eb77d2..0365a137d6e8 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -5,10 +5,11 @@
  * Copyright 2010, IBM Corporation.
  * Author: K.Prasad <prasad@linux.vnet.ibm.com>
  */
-
 #ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H
 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H
 
+#include <asm/cpu_has_feature.h>
+
 #ifdef	__KERNEL__
 struct arch_hw_breakpoint {
 	unsigned long	address;
@@ -46,7 +47,7 @@ struct arch_hw_breakpoint {
 
 static inline int nr_wp_slots(void)
 {
-	return HBP_NUM_MAX;
+	return cpu_has_feature(CPU_FTR_DAWR1) ? HBP_NUM_P10 : HBP_NUM_P7_TO_P9;
 }
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-- 
2.26.2


^ permalink raw reply related

* [PATCH 5/7] powerpc/watchpoint: Guest support for 2nd DAWR hcall
From: Ravi Bangoria @ 2020-06-02  4:01 UTC (permalink / raw)
  To: mpe, mikey
  Cc: christophe.leroy, ravi.bangoria, apopple, peterz, fweisbec, oleg,
	npiggin, linux-kernel, paulus, jolsa, naveen.n.rao, linuxppc-dev,
	mingo
In-Reply-To: <20200602040106.127693-1-ravi.bangoria@linux.ibm.com>

2nd DAWR can be set/unset using H_SET_MODE hcall with resource value 5.
Enable powervm guest support with that. This has no effect on kvm guest
because kvm will return error if guest does hcall with resource value 5.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/include/asm/hvcall.h         | 1 +
 arch/powerpc/include/asm/machdep.h        | 2 +-
 arch/powerpc/include/asm/plpar_wrappers.h | 5 +++++
 arch/powerpc/kernel/dawr.c                | 2 +-
 arch/powerpc/platforms/pseries/setup.c    | 7 +++++--
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a7f6f1aeda6b..3f170b9496a1 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -357,6 +357,7 @@
 #define H_SET_MODE_RESOURCE_SET_DAWR0		2
 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
 #define H_SET_MODE_RESOURCE_LE			4
+#define H_SET_MODE_RESOURCE_SET_DAWR1		5
 
 /* Values for argument to H_SIGNAL_SYS_RESET */
 #define H_SIGNAL_SYS_RESET_ALL			-1
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 7bcb64444a39..a90b892f0bfe 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -131,7 +131,7 @@ struct machdep_calls {
 				    unsigned long dabrx);
 
 	/* Set DAWR for this platform, leave empty for default implementation */
-	int		(*set_dawr)(unsigned long dawr,
+	int		(*set_dawr)(int nr, unsigned long dawr,
 				    unsigned long dawrx);
 
 #ifdef CONFIG_PPC32	/* XXX for now */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 93eb133d572c..d7a1acc83593 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -315,6 +315,11 @@ static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawr
 	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0);
 }
 
+static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1)
+{
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1);
+}
+
 static inline long plpar_signal_sys_reset(long cpu)
 {
 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 500f52fa4711..cdc2dccb987d 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -37,7 +37,7 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk)
 	dawrx |= (mrd & 0x3f) << (63 - 53);
 
 	if (ppc_md.set_dawr)
-		return ppc_md.set_dawr(dawr, dawrx);
+		return ppc_md.set_dawr(nr, dawr, dawrx);
 
 	if (nr == 0) {
 		mtspr(SPRN_DAWR0, dawr);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 64d18f4bf093..b001cde1a2d7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -832,12 +832,15 @@ static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
 	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
 }
 
-static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
+static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
 {
 	/* PAPR says we can't set HYP */
 	dawrx &= ~DAWRX_HYP;
 
-	return  plpar_set_watchpoint0(dawr, dawrx);
+	if (nr == 0)
+		return plpar_set_watchpoint0(dawr, dawrx);
+	else
+		return plpar_set_watchpoint1(dawr, dawrx);
 }
 
 #define CMO_CHARACTERISTICS_TOKEN 44
-- 
2.26.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox