LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH kernel v4 2/8] genirq/irqdomain: Clean legacy IRQ allocation
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

There are 10 users of __irq_domain_alloc_irqs() and only one - IOAPIC -
passes realloc==true. There is no obvious reason for handling this
specific case in the generic code.

This splits out __irq_domain_alloc_irqs_data() to make it clear what
IOAPIC does and makes __irq_domain_alloc_irqs() cleaner.

This should cause no behavioral change.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/linux/irqdomain.h      |  3 ++
 arch/x86/kernel/apic/io_apic.c | 13 +++--
 kernel/irq/irqdomain.c         | 89 ++++++++++++++++++++--------------
 3 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 71535e87109f..6cc37bba9951 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -470,6 +470,9 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par
 					   ops, host_data);
 }
 
+extern int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
+					unsigned int nr_irqs, int node, void *arg,
+					const struct irq_affinity_desc *affinity);
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 				   unsigned int nr_irqs, int node, void *arg,
 				   bool realloc,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7b3c7e0d4a09..df9c0ab3a119 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -970,9 +970,14 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 		return -1;
 	}
 
-	return __irq_domain_alloc_irqs(domain, irq, 1,
-				       ioapic_alloc_attr_node(info),
-				       info, legacy, NULL);
+	if (irq == -1 || !legacy)
+		return __irq_domain_alloc_irqs(domain, irq, 1,
+					       ioapic_alloc_attr_node(info),
+					       info, false, NULL);
+
+	return __irq_domain_alloc_irqs_data(domain, irq, 1,
+					    ioapic_alloc_attr_node(info),
+					    info, NULL);
 }
 
 /*
@@ -1006,7 +1011,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 			return -ENOMEM;
 	} else {
 		info->flags |= X86_IRQ_ALLOC_LEGACY;
-		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
+		irq = __irq_domain_alloc_irqs_data(domain, irq, 1, node, info,
 					      NULL);
 		if (irq >= 0) {
 			irq_data = irq_domain_get_irq_data(domain, irq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cf8b374b892d..ca5c78366c85 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1386,6 +1386,51 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 	return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 }
 
+int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
+				 unsigned int nr_irqs, int node, void *arg,
+				 const struct irq_affinity_desc *affinity)
+{
+	int i, ret;
+
+	if (domain == NULL) {
+		domain = irq_default_domain;
+		if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+			return -EINVAL;
+	}
+
+	if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
+		pr_debug("cannot allocate memory for IRQ%d\n", virq);
+		ret = -ENOMEM;
+		goto out_free_irq_data;
+	}
+
+	mutex_lock(&irq_domain_mutex);
+	ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
+	if (ret < 0) {
+		mutex_unlock(&irq_domain_mutex);
+		goto out_free_irq_data;
+	}
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = irq_domain_trim_hierarchy(virq + i);
+		if (ret) {
+			mutex_unlock(&irq_domain_mutex);
+			goto out_free_irq_data;
+		}
+	}
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_domain_insert_irq(virq + i);
+	}
+	mutex_unlock(&irq_domain_mutex);
+
+	return virq;
+
+out_free_irq_data:
+	irq_domain_free_irq_data(virq, nr_irqs);
+	return ret;
+}
+
 /**
  * __irq_domain_alloc_irqs - Allocate IRQs from domain
  * @domain:	domain to allocate from
@@ -1412,7 +1457,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			    unsigned int nr_irqs, int node, void *arg,
 			    bool realloc, const struct irq_affinity_desc *affinity)
 {
-	int i, ret, virq;
+	int ret, virq;
 
 	if (domain == NULL) {
 		domain = irq_default_domain;
@@ -1420,47 +1465,19 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			return -EINVAL;
 	}
 
-	if (realloc && irq_base >= 0) {
-		virq = irq_base;
-	} else {
-		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
-					      affinity);
-		if (virq < 0) {
-			pr_debug("cannot allocate IRQ(base %d, count %d)\n",
-				 irq_base, nr_irqs);
-			return virq;
-		}
+	virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node, affinity);
+	if (virq < 0) {
+		pr_debug("cannot allocate IRQ(base %d, count %d)\n",
+			 irq_base, nr_irqs);
+		return virq;
 	}
 
-	if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
-		pr_debug("cannot allocate memory for IRQ%d\n", virq);
-		ret = -ENOMEM;
+	ret = __irq_domain_alloc_irqs_data(domain, virq, nr_irqs, node, arg, affinity);
+	if (ret <= 0)
 		goto out_free_desc;
-	}
-
-	mutex_lock(&irq_domain_mutex);
-	ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
-	if (ret < 0) {
-		mutex_unlock(&irq_domain_mutex);
-		goto out_free_irq_data;
-	}
-
-	for (i = 0; i < nr_irqs; i++) {
-		ret = irq_domain_trim_hierarchy(virq + i);
-		if (ret) {
-			mutex_unlock(&irq_domain_mutex);
-			goto out_free_irq_data;
-		}
-	}
-	
-	for (i = 0; i < nr_irqs; i++)
-		irq_domain_insert_irq(virq + i);
-	mutex_unlock(&irq_domain_mutex);
 
 	return virq;
 
-out_free_irq_data:
-	irq_domain_free_irq_data(virq, nr_irqs);
 out_free_desc:
 	irq_free_descs(virq, nr_irqs);
 	return ret;
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 1/8] genirq/ipi:  Simplify irq_reserve_ipi
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

__irq_domain_alloc_irqs() can already handle virq==-1 and free
descriptors if it failed allocating hardware interrupts so let's skip
this extra step.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 kernel/irq/ipi.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 43e3d1be622c..1b2807318ea9 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -75,18 +75,12 @@ int irq_reserve_ipi(struct irq_domain *domain,
 		}
 	}
 
-	virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL);
-	if (virq <= 0) {
-		pr_warn("Can't reserve IPI, failed to alloc descs\n");
-		return -ENOMEM;
-	}
-
-	virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE,
-				       (void *) dest, true, NULL);
+	virq = __irq_domain_alloc_irqs(domain, -1, nr_irqs, NUMA_NO_NODE,
+				       (void *) dest, false, NULL);
 
 	if (virq <= 0) {
 		pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
-		goto free_descs;
+		return -EBUSY;
 	}
 
 	for (i = 0; i < nr_irqs; i++) {
@@ -96,10 +90,6 @@ int irq_reserve_ipi(struct irq_domain *domain,
 		irq_set_status_flags(virq + i, IRQ_NO_BALANCING);
 	}
 	return virq;
-
-free_descs:
-	irq_free_descs(virq, nr_irqs);
-	return -EBUSY;
 }
 
 /**
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 4/8] genirq: Free IRQ descriptor via embedded kobject
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

At the moment the IRQ descriptor is freed via the free_desc() helper.
We want to add reference counting to IRQ descriptors and there is already
kobj embedded into irq_desc which we want to reuse.

This shuffles free_desc()/etc to make it simply call kobject_put() and
moves all the cleanup into the kobject_release hook.

As a bonus, we do not need irq_sysfs_del() as kobj removes itself from
sysfs if it knows that it was added.

This should cause no behavioral change.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 kernel/irq/irqdesc.c | 42 ++++++++++++------------------------------
 1 file changed, 12 insertions(+), 30 deletions(-)

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1a7723604399..75374b7944b5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -295,18 +295,6 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc)
 	}
 }
 
-static void irq_sysfs_del(struct irq_desc *desc)
-{
-	/*
-	 * If irq_sysfs_init() has not yet been invoked (early boot), then
-	 * irq_kobj_base is NULL and the descriptor was never added.
-	 * kobject_del() complains about a object with no parent, so make
-	 * it conditional.
-	 */
-	if (irq_kobj_base)
-		kobject_del(&desc->kobj);
-}
-
 static int __init irq_sysfs_init(void)
 {
 	struct irq_desc *desc;
@@ -337,7 +325,6 @@ static struct kobj_type irq_kobj_type = {
 };
 
 static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
-static void irq_sysfs_del(struct irq_desc *desc) {}
 
 #endif /* CONFIG_SYSFS */
 
@@ -419,39 +406,34 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 	return NULL;
 }
 
-static void irq_kobj_release(struct kobject *kobj)
-{
-	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
-
-	free_masks(desc);
-	free_percpu(desc->kstat_irqs);
-	kfree(desc);
-}
-
 static void delayed_free_desc(struct rcu_head *rhp)
 {
 	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
 
+	free_masks(desc);
+	free_percpu(desc->kstat_irqs);
+	kfree(desc);
+}
+
+static void free_desc(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
 	kobject_put(&desc->kobj);
 }
 
-static void free_desc(unsigned int irq)
+static void irq_kobj_release(struct kobject *kobj)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	unsigned int irq = desc->irq_data.irq;
 
 	irq_remove_debugfs_entry(desc);
 	unregister_irq_proc(irq, desc);
 
 	/*
-	 * sparse_irq_lock protects also show_interrupts() and
-	 * kstat_irq_usr(). Once we deleted the descriptor from the
-	 * sparse tree we can free it. Access in proc will fail to
-	 * lookup the descriptor.
-	 *
 	 * The sysfs entry must be serialized against a concurrent
 	 * irq_sysfs_init() as well.
 	 */
-	irq_sysfs_del(desc);
 	delete_irq_desc(irq);
 
 	/*
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 0/8] genirq/irqdomain: Add reference counting to IRQs
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel

This is another attempt to add reference counting to IRQ
descriptors; or - more to the point - reuse already existing
kobj from irq_desc. This allows the same IRQ to be used several
times (such as legacy PCI INTx) and when disposing those, only
the last reference drop clears the hardware mappings.
Domains do not add references to irq_desc as the whole point of
this exercise is to move actual cleanup in hardware to
the last reference drop. This only changes sparse interrupts
(no idea about the other case yet).

No changelog as it is all completely rewritten. I am still running
tests but I hope this demonstrates the idea.

Some context from Cedric:
The background context for such a need is that the POWER9 and POWER10
processors have a new XIVE interrupt controller which uses MMIO pages
for interrupt management. Each interrupt has a pair of pages which are
required to be unmapped in some environment, like PHB removal. And so,
all interrupts need to be unmmaped.

1/8 .. 3/8 are removing confusing "realloc" which not strictly required
but I was touching this anyway and legacy interrupts should probably use
the new counting anyway;

4/8 .. 6/8 is reordering irq_desc disposal;

7/8 adds extra references (probably missed other places);

8/8 is the fix for the original XIVE bug; it is here for demonstration.

I am cc'ing ppc list so people can pull the patches from that patchworks.

This is based on sha1
418baf2c28f3 Linus Torvalds "Linux 5.10-rc5".

and pushed out to
https://github.com/aik/linux/commits/irqs
sha1 3955f97c448242f6a

Please comment. Thanks.

Alexey Kardashevskiy (7):
  genirq/ipi:  Simplify irq_reserve_ipi
  genirq/irqdomain: Clean legacy IRQ allocation
  genirq/irqdomain: Drop unused realloc parameter from
    __irq_domain_alloc_irqs
  genirq: Free IRQ descriptor via embedded kobject
  genirq: Add free_irq hook for IRQ descriptor and use for mapping
    disposal
  genirq/irqdomain: Move hierarchical IRQ cleanup to kobject_release
  genirq/irqdomain: Reference irq_desc for already mapped irqs

Oliver O'Halloran (1):
  powerpc/pci: Remove LSI mappings on device teardown

 include/linux/irqdesc.h             |   1 +
 include/linux/irqdomain.h           |   9 +-
 include/linux/irqhandler.h          |   1 +
 arch/powerpc/kernel/pci-common.c    |  21 ++++
 arch/x86/kernel/apic/io_apic.c      |  13 ++-
 drivers/gpio/gpiolib.c              |   1 -
 drivers/irqchip/irq-armada-370-xp.c |   2 +-
 drivers/irqchip/irq-bcm2836.c       |   3 +-
 drivers/irqchip/irq-gic-v3.c        |   3 +-
 drivers/irqchip/irq-gic-v4.c        |   6 +-
 drivers/irqchip/irq-gic.c           |   3 +-
 drivers/irqchip/irq-ixp4xx.c        |   1 -
 kernel/irq/ipi.c                    |  16 +--
 kernel/irq/irqdesc.c                |  45 +++-----
 kernel/irq/irqdomain.c              | 160 +++++++++++++++++-----------
 kernel/irq/msi.c                    |   2 +-
 16 files changed, 158 insertions(+), 129 deletions(-)

-- 
2.17.1

^ permalink raw reply

* [PATCH kernel v4 7/8] genirq/irqdomain: Reference irq_desc for already mapped irqs
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

This references an irq_desc if already mapped interrupt requested to map
again. This happends for PCI legacy interrupts where 4 interrupts are
shared among all devices on the same PCI host bus adapter.

From now on, the users shall call irq_dispose_mapping() for every
irq_create_fwspec_mapping(). Most (all?) users do not bother with
disposing though so it is not very likely to break many things.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 kernel/irq/irqdomain.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index a0a81cc6c524..07f4bde87de5 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -663,7 +663,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 	/* Check if mapping already exists */
 	virq = irq_find_mapping(domain, hwirq);
 	if (virq) {
+		desc = irq_to_desc(virq);
 		pr_debug("-> existing mapping on virq %d\n", virq);
+		kobject_get(&desc->kobj);
 		return virq;
 	}
 
@@ -762,6 +764,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
 	int virq;
+	struct irq_desc *desc;
 
 	if (fwspec->fwnode) {
 		domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED);
@@ -798,8 +801,11 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 		 * current trigger type then we are done so return the
 		 * interrupt number.
 		 */
-		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+		if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) {
+			desc = irq_to_desc(virq);
+			kobject_get(&desc->kobj);
 			return virq;
+		}
 
 		/*
 		 * If the trigger type has not been set yet, then set
@@ -811,6 +817,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 				return 0;
 
 			irqd_set_trigger_type(irq_data, type);
+			desc = irq_to_desc(virq);
+			kobject_get(&desc->kobj);
 			return virq;
 		}
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 3/8] genirq/irqdomain: Drop unused realloc parameter from __irq_domain_alloc_irqs
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

The two previous patches made @realloc obsolete. This finishes removing it.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/linux/irqdomain.h           | 4 +---
 arch/x86/kernel/apic/io_apic.c      | 2 +-
 drivers/gpio/gpiolib.c              | 1 -
 drivers/irqchip/irq-armada-370-xp.c | 2 +-
 drivers/irqchip/irq-bcm2836.c       | 3 +--
 drivers/irqchip/irq-gic-v3.c        | 3 +--
 drivers/irqchip/irq-gic-v4.c        | 6 ++----
 drivers/irqchip/irq-gic.c           | 3 +--
 drivers/irqchip/irq-ixp4xx.c        | 1 -
 kernel/irq/ipi.c                    | 2 +-
 kernel/irq/irqdomain.c              | 4 +---
 kernel/irq/msi.c                    | 2 +-
 12 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 6cc37bba9951..a353b93ddf9e 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -475,7 +475,6 @@ extern int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
 					const struct irq_affinity_desc *affinity);
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 				   unsigned int nr_irqs, int node, void *arg,
-				   bool realloc,
 				   const struct irq_affinity_desc *affinity);
 extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
 extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early);
@@ -484,8 +483,7 @@ extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
 static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 			unsigned int nr_irqs, int node, void *arg)
 {
-	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
-				       NULL);
+	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, NULL);
 }
 
 extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index df9c0ab3a119..5b45f0874571 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -973,7 +973,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 	if (irq == -1 || !legacy)
 		return __irq_domain_alloc_irqs(domain, irq, 1,
 					       ioapic_alloc_attr_node(info),
-					       info, false, NULL);
+					       info, NULL);
 
 	return __irq_domain_alloc_irqs_data(domain, irq, 1,
 					    ioapic_alloc_attr_node(info),
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 089ddcaa9bc6..b7cfecb5c701 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1059,7 +1059,6 @@ static void gpiochip_set_hierarchical_irqchip(struct gpio_chip *gc,
 						      1,
 						      NUMA_NO_NODE,
 						      &fwspec,
-						      false,
 						      NULL);
 			if (ret < 0) {
 				chip_err(gc,
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index d7eb2e93db8f..bf17eb312669 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -431,7 +431,7 @@ static __init void armada_xp_ipi_init(struct device_node *node)
 
 	irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
 	base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, IPI_DOORBELL_END,
-					   NUMA_NO_NODE, NULL, false, NULL);
+					   NUMA_NO_NODE, NULL, NULL);
 	if (WARN_ON(!base_ipi))
 		return;
 
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index cbc7c740e4dc..fe9ff90940d3 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -269,8 +269,7 @@ static void __init bcm2836_arm_irqchip_smp_init(void)
 	irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
 
 	base_ipi = __irq_domain_alloc_irqs(ipi_domain, -1, BITS_PER_MBOX,
-					   NUMA_NO_NODE, NULL,
-					   false, NULL);
+					   NUMA_NO_NODE, NULL, NULL);
 
 	if (WARN_ON(!base_ipi))
 		return;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 16fecc0febe8..ff20fd54921f 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1163,8 +1163,7 @@ static void __init gic_smp_init(void)
 
 	/* Register all 8 non-secure SGIs */
 	base_sgi = __irq_domain_alloc_irqs(gic_data.domain, -1, 8,
-					   NUMA_NO_NODE, &sgi_fwspec,
-					   false, NULL);
+					   NUMA_NO_NODE, &sgi_fwspec, NULL);
 	if (WARN_ON(base_sgi <= 0))
 		return;
 
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 0c18714ae13e..dd64dc50610c 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -117,8 +117,7 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx)
 		goto err;
 
 	sgi_base = __irq_domain_alloc_irqs(vpe->sgi_domain, -1, 16,
-					       NUMA_NO_NODE, vpe,
-					       false, NULL);
+					       NUMA_NO_NODE, vpe, NULL);
 	if (sgi_base <= 0)
 		goto err;
 
@@ -154,8 +153,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm)
 	}
 
 	vpe_base_irq = __irq_domain_alloc_irqs(vm->domain, -1, vm->nr_vpes,
-					       NUMA_NO_NODE, vm,
-					       false, NULL);
+					       NUMA_NO_NODE, vm, NULL);
 	if (vpe_base_irq <= 0)
 		goto err;
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 6053245a4754..28e5e5e4836a 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -846,8 +846,7 @@ static __init void gic_smp_init(void)
 				  gic_starting_cpu, NULL);
 
 	base_sgi = __irq_domain_alloc_irqs(gic_data[0].domain, -1, 8,
-					   NUMA_NO_NODE, &sgi_fwspec,
-					   false, NULL);
+					   NUMA_NO_NODE, &sgi_fwspec, NULL);
 	if (WARN_ON(base_sgi <= 0))
 		return;
 
diff --git a/drivers/irqchip/irq-ixp4xx.c b/drivers/irqchip/irq-ixp4xx.c
index 37e0749215c7..9dafcc22b592 100644
--- a/drivers/irqchip/irq-ixp4xx.c
+++ b/drivers/irqchip/irq-ixp4xx.c
@@ -353,7 +353,6 @@ void __init ixp4xx_irq_init(resource_size_t irqbase,
 					      chunk->nr_irqs,
 					      NUMA_NO_NODE,
 					      &fwspec,
-					      false,
 					      NULL);
 		if (ret < 0) {
 			pr_crit("IXP4XX: can not allocate irqs in hierarchy %d\n",
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 1b2807318ea9..fc20adf7ee0d 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -76,7 +76,7 @@ int irq_reserve_ipi(struct irq_domain *domain,
 	}
 
 	virq = __irq_domain_alloc_irqs(domain, -1, nr_irqs, NUMA_NO_NODE,
-				       (void *) dest, false, NULL);
+				       (void *) dest, NULL);
 
 	if (virq <= 0) {
 		pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index ca5c78366c85..805478f81d96 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1438,12 +1438,10 @@ int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
  * @nr_irqs:	number of IRQs to allocate
  * @node:	NUMA node id for memory allocation
  * @arg:	domain specific argument
- * @realloc:	IRQ descriptors have already been allocated if true
  * @affinity:	Optional irq affinity mask for multiqueue devices
  *
  * Allocate IRQ numbers and initialized all data structures to support
  * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
  * Returns error code or allocated IRQ number
  *
  * The whole process to setup an IRQ has been split into two steps.
@@ -1455,7 +1453,7 @@ int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
  */
 int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			    unsigned int nr_irqs, int node, void *arg,
-			    bool realloc, const struct irq_affinity_desc *affinity)
+			    const struct irq_affinity_desc *affinity)
 {
 	int ret, virq;
 
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2c0c4d6d0f83..b1898514d9dc 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -414,7 +414,7 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		ops->set_desc(&arg, desc);
 
 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
-					       dev_to_node(dev), &arg, false,
+					       dev_to_node(dev), &arg,
 					       desc->affinity);
 		if (virq < 0) {
 			ret = -ENOSPC;
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 5/8] genirq: Add free_irq hook for IRQ descriptor and use for mapping disposal
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

We want to make the irq_desc.kobj's release hook free associated resources
but we do not want to pollute the irqdesc code with domains.

This adds a free_irq hook which is called when the last reference to
the descriptor is dropped.

The first user is mapped irqs. This potentially can break the existing
users; however they seem to do the right thing and call dispose once
per mapping.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 include/linux/irqdesc.h    |  1 +
 include/linux/irqdomain.h  |  2 --
 include/linux/irqhandler.h |  1 +
 kernel/irq/irqdesc.c       |  3 +++
 kernel/irq/irqdomain.c     | 14 ++++++++++++--
 5 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 5745491303e0..6d44cb6a20ad 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -57,6 +57,7 @@ struct irq_desc {
 	struct irq_data		irq_data;
 	unsigned int __percpu	*kstat_irqs;
 	irq_flow_handler_t	handle_irq;
+	irq_free_handler_t	free_irq;
 	struct irqaction	*action;	/* IRQ action list */
 	unsigned int		status_use_accessors;
 	unsigned int		core_internal_state__do_not_mess_with_it;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a353b93ddf9e..ccca87cd3d15 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -381,8 +381,6 @@ extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq,
 extern void irq_domain_associate_many(struct irq_domain *domain,
 				      unsigned int irq_base,
 				      irq_hw_number_t hwirq_base, int count);
-extern void irq_domain_disassociate(struct irq_domain *domain,
-				    unsigned int irq);
 
 extern unsigned int irq_create_mapping(struct irq_domain *host,
 				       irq_hw_number_t hwirq);
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
index c30f454a9518..3dbc2bb764f3 100644
--- a/include/linux/irqhandler.h
+++ b/include/linux/irqhandler.h
@@ -10,5 +10,6 @@
 struct irq_desc;
 struct irq_data;
 typedef	void (*irq_flow_handler_t)(struct irq_desc *desc);
+typedef	void (*irq_free_handler_t)(struct irq_desc *desc);
 
 #endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 75374b7944b5..071363da8688 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -427,6 +427,9 @@ static void irq_kobj_release(struct kobject *kobj)
 	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 	unsigned int irq = desc->irq_data.irq;
 
+	if (desc->free_irq)
+		desc->free_irq(desc);
+
 	irq_remove_debugfs_entry(desc);
 	unregister_irq_proc(irq, desc);
 
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 805478f81d96..4779d912bb86 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -485,7 +485,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 	}
 }
 
-void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
+static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
 {
 	struct irq_data *irq_data = irq_get_irq_data(irq);
 	irq_hw_number_t hwirq;
@@ -582,6 +582,13 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
 }
 EXPORT_SYMBOL_GPL(irq_domain_associate_many);
 
+static void irq_mapped_free_desc(struct irq_desc *desc)
+{
+	unsigned int virq = desc->irq_data.irq;
+
+	irq_domain_disassociate(desc->irq_data.domain, virq);
+}
+
 /**
  * irq_create_direct_mapping() - Allocate an irq for direct mapping
  * @domain: domain to allocate the irq for or NULL for default domain
@@ -638,6 +645,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 {
 	struct device_node *of_node;
 	int virq;
+	struct irq_desc *desc;
 
 	pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
 
@@ -674,6 +682,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 	pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
 		hwirq, of_node_full_name(of_node), virq);
 
+	desc = irq_to_desc(virq);
+	desc->free_irq = irq_mapped_free_desc;
+
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_mapping);
@@ -865,7 +876,6 @@ void irq_dispose_mapping(unsigned int virq)
 	if (irq_domain_is_hierarchy(domain)) {
 		irq_domain_free_irqs(virq, 1);
 	} else {
-		irq_domain_disassociate(domain, virq);
 		irq_free_desc(virq);
 	}
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 6/8] genirq/irqdomain: Move hierarchical IRQ cleanup to kobject_release
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

This moves hierarchical domain's irqs cleanup into the kobject release
hook to make irq_domain_free_irqs() as simple as kobject_put.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 kernel/irq/irqdomain.c | 43 +++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4779d912bb86..a0a81cc6c524 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -863,21 +863,9 @@ EXPORT_SYMBOL_GPL(irq_create_of_mapping);
  */
 void irq_dispose_mapping(unsigned int virq)
 {
-	struct irq_data *irq_data = irq_get_irq_data(virq);
-	struct irq_domain *domain;
+	struct irq_desc *desc = irq_to_desc(virq);
 
-	if (!virq || !irq_data)
-		return;
-
-	domain = irq_data->domain;
-	if (WARN_ON(domain == NULL))
-		return;
-
-	if (irq_domain_is_hierarchy(domain)) {
-		irq_domain_free_irqs(virq, 1);
-	} else {
-		irq_free_desc(virq);
-	}
+	kobject_put(&desc->kobj);
 }
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
@@ -1396,6 +1384,19 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 	return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 }
 
+static void irq_domain_hierarchy_free_desc(struct irq_desc *desc)
+{
+	unsigned int virq = desc->irq_data.irq;
+	struct irq_data *data = irq_get_irq_data(virq);
+
+	mutex_lock(&irq_domain_mutex);
+	irq_domain_remove_irq(virq);
+	irq_domain_free_irqs_hierarchy(data->domain, virq, 1);
+	mutex_unlock(&irq_domain_mutex);
+
+	irq_domain_free_irq_data(virq, 1);
+}
+
 int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
 				 unsigned int nr_irqs, int node, void *arg,
 				 const struct irq_affinity_desc *affinity)
@@ -1430,7 +1431,10 @@ int __irq_domain_alloc_irqs_data(struct irq_domain *domain, int virq,
 	}
 
 	for (i = 0; i < nr_irqs; i++) {
+		struct irq_desc *desc = irq_to_desc(virq + i);
+
 		irq_domain_insert_irq(virq + i);
+		desc->free_irq = irq_domain_hierarchy_free_desc;
 	}
 	mutex_unlock(&irq_domain_mutex);
 
@@ -1675,14 +1679,11 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
 		 "NULL pointer, cannot free irq\n"))
 		return;
 
-	mutex_lock(&irq_domain_mutex);
-	for (i = 0; i < nr_irqs; i++)
-		irq_domain_remove_irq(virq + i);
-	irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs);
-	mutex_unlock(&irq_domain_mutex);
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_desc *desc = irq_to_desc(virq + i);
 
-	irq_domain_free_irq_data(virq, nr_irqs);
-	irq_free_descs(virq, nr_irqs);
+		kobject_put(&desc->kobj);
+	}
 }
 
 /**
-- 
2.17.1


^ permalink raw reply related

* [PATCH kernel v4 8/8] powerpc/pci: Remove LSI mappings on device teardown
From: Alexey Kardashevskiy @ 2020-11-24  6:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexey Kardashevskiy, Marc Zyngier, x86, linux-gpio,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20201124061720.86766-1-aik@ozlabs.ru>

From: Oliver O'Halloran <oohall@gmail.com>

When a passthrough IO adapter is removed from a pseries machine using hash
MMU and the XIVE interrupt mode, the POWER hypervisor expects the guest OS
to clear all page table entries related to the adapter. If some are still
present, the RTAS call which isolates the PCI slot returns error 9001
"valid outstanding translations" and the removal of the IO adapter fails.
This is because when the PHBs are scanned, Linux maps automatically the
INTx interrupts in the Linux interrupt number space but these are never
removed.

This problem can be fixed by adding the corresponding unmap operation when
the device is removed. There's no pcibios_* hook for the remove case, but
the same effect can be achieved using a bus notifier.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kernel/pci-common.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index be108616a721..95f4e173368a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -404,6 +404,27 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
 	return 0;
 }

+static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
+			       unsigned long action, void *data)
+{
+	struct pci_dev *pdev = to_pci_dev(data);
+
+	if (action == BUS_NOTIFY_DEL_DEVICE)
+		irq_dispose_mapping(pdev->irq);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_pci_unmap_irq_notifier = {
+	.notifier_call = ppc_pci_unmap_irq_line,
+};
+
+static int ppc_pci_register_irq_notifier(void)
+{
+	return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
+}
+arch_initcall(ppc_pci_register_irq_notifier);
+
 /*
  * Platform support for /proc/bus/pci/X/Y mmap()s.
  *  -- paulus.
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH v2 00/19] Add generic vdso_base tracking
From: Christophe Leroy @ 2020-11-24  6:53 UTC (permalink / raw)
  To: Dmitry Safonov, linux-kernel
  Cc: Thomas Bogendoerfer, Arnd Bergmann, Catalin Marinas, x86,
	Dmitry Safonov, Oleg Nesterov, Russell King,
	linuxppc-dev@lists.ozlabs.org, Ingo Molnar, Borislav Petkov,
	Alexander Viro, Andy Lutomirski, H. Peter Anvin, Guo Ren,
	Andrew Morton, Vincenzo Frascino, Will Deacon, Thomas Gleixner
In-Reply-To: <20201124002932.1220517-1-dima@arista.com>



Le 24/11/2020 à 01:29, Dmitry Safonov a écrit :
> v2 Changes:
> - Rename user_landing to vdso_base as it tracks vDSO VMA start address,
>    rather than the explicit address to land (Andy)
> - Reword and don't use "new-execed" and "new-born" task (Andy)
> - Fix failures reported by build robot
> 
> Started from discussion [1], where was noted that currently a couple of
> architectures support mremap() for vdso/sigpage, but not munmap().
> If an application maps something on the ex-place of vdso/sigpage,
> later after processing signal it will land there (good luck!)
> 
> Patches set is based on linux-next (next-20201123) and it depends on
> changes in x86/cleanups (those reclaim TIF_IA32/TIF_X32) and also
> on my changes in akpm (fixing several mremap() issues).

I have a series that cleans up VDSO init on powerpc and migrates powerpc to 
_install_special_mapping() (patch 10 of the series).

https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=204396&state=%2A&archive=both

I'm wondering how we should coordinate with your series for merging.

I guess your series will also imply removal of arch_unmap() ? see 
https://elixir.bootlin.com/linux/v5.10-rc4/source/arch/powerpc/include/asm/mmu_context.h#L262

> 
> Logically, the patches set divides on:
> - patch       1: a cleanup for patches in x86/cleanups
> - patches  2-11: cleanups for arch_setup_additional_pages()
> - patches 12-13: x86 signal changes for unmapped vdso
> - patches 14-19: provide generic vdso_base in mm_struct
> 
> In the end, besides cleanups, it's now more predictable what happens for
> applications with unmapped vdso on architectures those support .mremap()
> for vdso/sigpage.
> 
> I'm aware of only one user that unmaps vdso - Valgrind [2].
> (there possibly are more, but this one is "special", it unmaps vdso, but
>   not vvar, which confuses CRIU [Checkpoint Restore In Userspace], that's
>   why I'm aware of it)
> 
> Patches as a .git branch:
> https://github.com/0x7f454c46/linux/tree/setup_additional_pages-v2
> 
> v1 Link:
> https://lore.kernel.org/lkml/20201108051730.2042693-1-dima@arista.com/
> 
> [1]: https://lore.kernel.org/linux-arch/CAJwJo6ZANqYkSHbQ+3b+Fi_VT80MtrzEV5yreQAWx-L8j8x2zA@mail.gmail.com/
> [2]: https://github.com/checkpoint-restore/criu/issues/488
> 

Christophe

^ permalink raw reply

* [PATCH] net: fs_enet: Fix incorrect IS_ERR_VALUE macro usages
From: Wei Li @ 2020-11-24  6:24 UTC (permalink / raw)
  To: Pantelis Antoniou, David S. Miller, Jakub Kicinski, Scott Wood,
	Jeff Garzik, Timur Tabi, Kumar Gala
  Cc: netdev, linuxppc-dev, linux-kernel, guohanjun

IS_ERR_VALUE macro should be used only with unsigned long type.
Especially it works incorrectly with unsigned shorter types on
64bit machines.

Fixes: 976de6a8c304 ("fs_enet: Be an of_platform device when CONFIG_PPC_CPM_NEW_BINDING is set.")
Fixes: 4c35630ccda5 ("[POWERPC] Change rheap functions to use ulongs instead of pointers")
Signed-off-by: Wei Li <liwei391@huawei.com>
---
 drivers/net/ethernet/freescale/fs_enet/mac-fcc.c | 2 +-
 drivers/net/ethernet/freescale/fs_enet/mac-scc.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index b47490be872c..e2117ad46130 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -107,7 +107,7 @@ static int do_pd_setup(struct fs_enet_private *fep)
 
 	fep->fcc.mem = (void __iomem *)cpm2_immr;
 	fpi->dpram_offset = cpm_dpalloc(128, 32);
-	if (IS_ERR_VALUE(fpi->dpram_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)fpi->dpram_offset)) {
 		ret = fpi->dpram_offset;
 		goto out_fcccp;
 	}
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index 64300ac13e02..90f82df0b1bb 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -136,7 +136,7 @@ static int allocate_bd(struct net_device *dev)
 
 	fep->ring_mem_addr = cpm_dpalloc((fpi->tx_ring + fpi->rx_ring) *
 					 sizeof(cbd_t), 8);
-	if (IS_ERR_VALUE(fep->ring_mem_addr))
+	if (IS_ERR_VALUE((unsigned long)(int)fep->ring_mem_addr))
 		return -ENOMEM;
 
 	fep->ring_base = (void __iomem __force*)
-- 
2.17.1


^ permalink raw reply related

* [PATCH] net/ethernet/freescale: Fix incorrect IS_ERR_VALUE macro usages
From: Wei Li @ 2020-11-24  6:22 UTC (permalink / raw)
  To: Li Yang, David S. Miller, Jakub Kicinski, Paul Gortmaker,
	Kumar Gala, Timur Tabi
  Cc: netdev, linuxppc-dev, linux-kernel, guohanjun

IS_ERR_VALUE macro should be used only with unsigned long type.
Especially it works incorrectly with unsigned shorter types on
64bit machines.

Fixes: 4c35630ccda5 ("[POWERPC] Change rheap functions to use ulongs instead of pointers")
Signed-off-by: Wei Li <liwei391@huawei.com>
---
 drivers/net/ethernet/freescale/ucc_geth.c | 30 +++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 714b501be7d0..8656d9be256a 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -286,7 +286,7 @@ static int fill_init_enet_entries(struct ucc_geth_private *ugeth,
 		else {
 			init_enet_offset =
 			    qe_muram_alloc(thread_size, thread_alignment);
-			if (IS_ERR_VALUE(init_enet_offset)) {
+			if (IS_ERR_VALUE((unsigned long)(int)init_enet_offset)) {
 				if (netif_msg_ifup(ugeth))
 					pr_err("Can not allocate DPRAM memory\n");
 				qe_put_snum((u8) snum);
@@ -2223,7 +2223,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
 			ugeth->tx_bd_ring_offset[j] =
 			    qe_muram_alloc(length,
 					   UCC_GETH_TX_BD_RING_ALIGNMENT);
-			if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j]))
+			if (!IS_ERR_VALUE((unsigned long)(int)ugeth->tx_bd_ring_offset[j]))
 				ugeth->p_tx_bd_ring[j] =
 				    (u8 __iomem *) qe_muram_addr(ugeth->
 							 tx_bd_ring_offset[j]);
@@ -2300,7 +2300,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
 			ugeth->rx_bd_ring_offset[j] =
 			    qe_muram_alloc(length,
 					   UCC_GETH_RX_BD_RING_ALIGNMENT);
-			if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j]))
+			if (!IS_ERR_VALUE((unsigned long)(int)ugeth->rx_bd_ring_offset[j]))
 				ugeth->p_rx_bd_ring[j] =
 				    (u8 __iomem *) qe_muram_addr(ugeth->
 							 rx_bd_ring_offset[j]);
@@ -2510,7 +2510,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	ugeth->tx_glbl_pram_offset =
 	    qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram),
 			   UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->tx_glbl_pram_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n");
 		return -ENOMEM;
@@ -2530,7 +2530,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 			   sizeof(struct ucc_geth_thread_data_tx) +
 			   32 * (numThreadsTxNumerical == 1),
 			   UCC_GETH_THREAD_DATA_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->thread_dat_tx_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->thread_dat_tx_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_thread_data_tx\n");
 		return -ENOMEM;
@@ -2557,7 +2557,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	    qe_muram_alloc(ug_info->numQueuesTx *
 			   sizeof(struct ucc_geth_send_queue_qd),
 			   UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->send_q_mem_reg_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_send_q_mem_reg\n");
 		return -ENOMEM;
@@ -2597,7 +2597,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		ugeth->scheduler_offset =
 		    qe_muram_alloc(sizeof(struct ucc_geth_scheduler),
 				   UCC_GETH_SCHEDULER_ALIGNMENT);
-		if (IS_ERR_VALUE(ugeth->scheduler_offset)) {
+		if (IS_ERR_VALUE((unsigned long)(int)ugeth->scheduler_offset)) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate DPRAM memory for p_scheduler\n");
 			return -ENOMEM;
@@ -2644,7 +2644,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		    qe_muram_alloc(sizeof
 				   (struct ucc_geth_tx_firmware_statistics_pram),
 				   UCC_GETH_TX_STATISTICS_ALIGNMENT);
-		if (IS_ERR_VALUE(ugeth->tx_fw_statistics_pram_offset)) {
+		if (IS_ERR_VALUE((unsigned long)(int)ugeth->tx_fw_statistics_pram_offset)) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate DPRAM memory for p_tx_fw_statistics_pram\n");
 			return -ENOMEM;
@@ -2681,7 +2681,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	ugeth->rx_glbl_pram_offset =
 	    qe_muram_alloc(sizeof(struct ucc_geth_rx_global_pram),
 			   UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->rx_glbl_pram_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_rx_glbl_pram\n");
 		return -ENOMEM;
@@ -2700,7 +2700,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	    qe_muram_alloc(numThreadsRxNumerical *
 			   sizeof(struct ucc_geth_thread_data_rx),
 			   UCC_GETH_THREAD_DATA_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->thread_dat_rx_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->thread_dat_rx_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_thread_data_rx\n");
 		return -ENOMEM;
@@ -2721,7 +2721,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		    qe_muram_alloc(sizeof
 				   (struct ucc_geth_rx_firmware_statistics_pram),
 				   UCC_GETH_RX_STATISTICS_ALIGNMENT);
-		if (IS_ERR_VALUE(ugeth->rx_fw_statistics_pram_offset)) {
+		if (IS_ERR_VALUE((unsigned long)(int)ugeth->rx_fw_statistics_pram_offset)) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate DPRAM memory for p_rx_fw_statistics_pram\n");
 			return -ENOMEM;
@@ -2741,7 +2741,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	    qe_muram_alloc(ug_info->numQueuesRx *
 			   sizeof(struct ucc_geth_rx_interrupt_coalescing_entry)
 			   + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->rx_irq_coalescing_tbl_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_rx_irq_coalescing_tbl\n");
 		return -ENOMEM;
@@ -2807,7 +2807,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 			   (sizeof(struct ucc_geth_rx_bd_queues_entry) +
 			    sizeof(struct ucc_geth_rx_prefetched_bds)),
 			   UCC_GETH_RX_BD_QUEUES_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->rx_bd_qs_tbl_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)ugeth->rx_bd_qs_tbl_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_rx_bd_qs_tbl\n");
 		return -ENOMEM;
@@ -2892,7 +2892,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		ugeth->exf_glbl_param_offset =
 		    qe_muram_alloc(sizeof(struct ucc_geth_exf_global_pram),
 		UCC_GETH_RX_EXTENDED_FILTERING_GLOBAL_PARAMETERS_ALIGNMENT);
-		if (IS_ERR_VALUE(ugeth->exf_glbl_param_offset)) {
+		if (IS_ERR_VALUE((unsigned long)(int)ugeth->exf_glbl_param_offset)) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate DPRAM memory for p_exf_glbl_param\n");
 			return -ENOMEM;
@@ -3026,7 +3026,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 
 	/* Allocate InitEnet command parameter structure */
 	init_enet_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_init_pram), 4);
-	if (IS_ERR_VALUE(init_enet_pram_offset)) {
+	if (IS_ERR_VALUE((unsigned long)(int)init_enet_pram_offset)) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_init_enet_pram\n");
 		return -ENOMEM;
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH V2 4/5] ocxl: Add mmu notifier
From: Christoph Hellwig @ 2020-11-24  9:17 UTC (permalink / raw)
  To: Christophe Lombard; +Cc: linuxppc-dev, ajd, fbarrat, Jason Gunthorpe
In-Reply-To: <20201120173241.59229-5-clombard@linux.vnet.ibm.com>

You probably want to add Jason for an audit of new notifier uses.

On Fri, Nov 20, 2020 at 06:32:40PM +0100, Christophe Lombard wrote:
> Add invalidate_range mmu notifier, when required (ATSD access of MMIO
> registers is available), to initiate TLB invalidation commands.
> For the time being, the ATSD0 set of registers is used by default.
> 
> The pasid and bdf values have to be configured in the Process Element
> Entry.
> The PEE must be set up to match the BDF/PASID of the AFU.
> 
> Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
> ---
>  drivers/misc/ocxl/link.c | 58 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
> index 20444db8a2bb..100bdfe9ec37 100644
> --- a/drivers/misc/ocxl/link.c
> +++ b/drivers/misc/ocxl/link.c
> @@ -2,8 +2,10 @@
>  // Copyright 2017 IBM Corp.
>  #include <linux/sched/mm.h>
>  #include <linux/mutex.h>
> +#include <linux/mm.h>
>  #include <linux/mm_types.h>
>  #include <linux/mmu_context.h>
> +#include <linux/mmu_notifier.h>
>  #include <asm/copro.h>
>  #include <asm/pnv-ocxl.h>
>  #include <asm/xive.h>
> @@ -33,6 +35,7 @@
>  
>  #define SPA_PE_VALID		0x80000000
>  
> +struct ocxl_link;
>  
>  struct pe_data {
>  	struct mm_struct *mm;
> @@ -41,6 +44,8 @@ struct pe_data {
>  	/* opaque pointer to be passed to the above callback */
>  	void *xsl_err_data;
>  	struct rcu_head rcu;
> +	struct ocxl_link *link;
> +	struct mmu_notifier mmu_notifier;
>  };
>  
>  struct spa {
> @@ -83,6 +88,8 @@ struct ocxl_link {
>  	int domain;
>  	int bus;
>  	int dev;
> +	void __iomem *arva;     /* ATSD register virtual address */
> +	spinlock_t atsd_lock;   /* to serialize shootdowns */
>  	atomic_t irq_available;
>  	struct spa *spa;
>  	void *platform_data;
> @@ -403,6 +410,11 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l
>  	if (rc)
>  		goto err_xsl_irq;
>  
> +	rc = pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0,
> +					  &link->arva);
> +	if (!rc)
> +		spin_lock_init(&link->atsd_lock);
> +
>  	*out_link = link;
>  	return 0;
>  
> @@ -454,6 +466,11 @@ static void release_xsl(struct kref *ref)
>  {
>  	struct ocxl_link *link = container_of(ref, struct ocxl_link, ref);
>  
> +	if (link->arva) {
> +		pnv_ocxl_unmap_lpar(&link->arva);
> +		link->arva = NULL;
> +	}
> +
>  	list_del(&link->list);
>  	/* call platform code before releasing data */
>  	pnv_ocxl_spa_release(link->platform_data);
> @@ -470,6 +487,26 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle)
>  }
>  EXPORT_SYMBOL_GPL(ocxl_link_release);
>  
> +static void invalidate_range(struct mmu_notifier *mn,
> +			     struct mm_struct *mm,
> +			     unsigned long start, unsigned long end)
> +{
> +	struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
> +	struct ocxl_link *link = pe_data->link;
> +	unsigned long addr, pid, page_size = PAGE_SIZE;
> +
> +	pid = mm->context.id;
> +
> +	spin_lock(&link->atsd_lock);
> +	for (addr = start; addr < end; addr += page_size)
> +		pnv_ocxl_tlb_invalidate(&link->arva, pid, addr);
> +	spin_unlock(&link->atsd_lock);
> +}
> +
> +static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
> +	.invalidate_range = invalidate_range,
> +};
> +
>  static u64 calculate_cfg_state(bool kernel)
>  {
>  	u64 state;
> @@ -526,6 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
>  	pe_data->mm = mm;
>  	pe_data->xsl_err_cb = xsl_err_cb;
>  	pe_data->xsl_err_data = xsl_err_data;
> +	pe_data->link = link;
> +	pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops;
>  
>  	memset(pe, 0, sizeof(struct ocxl_process_element));
>  	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
> @@ -542,8 +581,16 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
>  	 * by the nest MMU. If we have a kernel context, TLBIs are
>  	 * already global.
>  	 */
> -	if (mm)
> +	if (mm) {
>  		mm_context_add_copro(mm);
> +		if (link->arva) {
> +			/* Use MMIO registers for the TLB Invalidate
> +			 * operations.
> +			 */
> +			mmu_notifier_register(&pe_data->mmu_notifier, mm);
> +		}
> +	}
> +
>  	/*
>  	 * Barrier is to make sure PE is visible in the SPA before it
>  	 * is used by the device. It also helps with the global TLBI
> @@ -674,6 +721,15 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
>  		WARN(1, "Couldn't find pe data when removing PE\n");
>  	} else {
>  		if (pe_data->mm) {
> +			if (link->arva) {
> +				mmu_notifier_unregister(&pe_data->mmu_notifier,
> +							pe_data->mm);
> +				spin_lock(&link->atsd_lock);
> +				pnv_ocxl_tlb_invalidate(&link->arva,
> +							pe_data->mm->context.id,
> +							0ull);
> +				spin_unlock(&link->atsd_lock);
> +			}
>  			mm_context_remove_copro(pe_data->mm);
>  			mmdrop(pe_data->mm);
>  		}
> -- 
> 2.28.0
> 
---end quoted text---

^ permalink raw reply

* Re: [PATCH kernel v4 2/8] genirq/irqdomain: Clean legacy IRQ allocation
From: Andy Shevchenko @ 2020-11-24  9:19 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: Marc Zyngier, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Linux Kernel Mailing List, open list:GPIO SUBSYSTEM,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek,
	open list:LINUX FOR POWERPC PA SEMI PWRFICIENT,
	linux-arm Mailing List
In-Reply-To: <20201124061720.86766-3-aik@ozlabs.ru>

On Tue, Nov 24, 2020 at 8:20 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> There are 10 users of __irq_domain_alloc_irqs() and only one - IOAPIC -
> passes realloc==true. There is no obvious reason for handling this
> specific case in the generic code.
>
> This splits out __irq_domain_alloc_irqs_data() to make it clear what
> IOAPIC does and makes __irq_domain_alloc_irqs() cleaner.
>
> This should cause no behavioral change.

> +       ret = __irq_domain_alloc_irqs_data(domain, virq, nr_irqs, node, arg, affinity);
> +       if (ret <= 0)
>                 goto out_free_desc;

Was or wasn't 0 considered as error code previously?

>         return virq;

>  out_free_desc:
>         irq_free_descs(virq, nr_irqs);
>         return ret;

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply

* Re: [PATCH v1 0/2] Use H_RPT_INVALIDATE for nested guest
From: Bharata B Rao @ 2020-11-24  9:44 UTC (permalink / raw)
  To: kvm-ppc, linuxppc-dev; +Cc: aneesh.kumar, npiggin
In-Reply-To: <20201019112642.53016-1-bharata@linux.ibm.com>

Hi,

Any comments on this patchset? Anything specific to be addressed
before it could be considered for inclusion?

Regards,
Bharata.

On Mon, Oct 19, 2020 at 04:56:40PM +0530, Bharata B Rao wrote:
> This patchset adds support for the new hcall H_RPT_INVALIDATE
> (currently handles nested case only) and replaces the nested tlb flush
> calls with this new hcall if the support for the same exists.
> 
> Changes in v1:
> -------------
> - Removed the bits that added the FW_FEATURE_RPT_INVALIDATE feature
>   as they are already upstream.
> 
> v0: https://lore.kernel.org/linuxppc-dev/20200703104420.21349-1-bharata@linux.ibm.com/T/#m1800c5f5b3d4f6a154ae58fc1c617c06f286358f
> 
> H_RPT_INVALIDATE
> ================
> Syntax:
> int64   /* H_Success: Return code on successful completion */
>         /* H_Busy - repeat the call with the same */
>         /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid parameters */
>         hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT translation lookaside information */
>               uint64 pid,       /* PID/LPID to invalidate */
>               uint64 target,    /* Invalidation target */
>               uint64 type,      /* Type of lookaside information */
>               uint64 pageSizes,     /* Page sizes */
>               uint64 start,     /* Start of Effective Address (EA) range (inclusive) */
>               uint64 end)       /* End of EA range (exclusive) */
> 
> Invalidation targets (target)
> -----------------------------
> Core MMU        0x01 /* All virtual processors in the partition */
> Core local MMU  0x02 /* Current virtual processor */
> Nest MMU        0x04 /* All nest/accelerator agents in use by the partition */
> 
> A combination of the above can be specified, except core and core local.
> 
> Type of translation to invalidate (type)
> ---------------------------------------
> NESTED       0x0001  /* Invalidate nested guest partition-scope */
> TLB          0x0002  /* Invalidate TLB */
> PWC          0x0004  /* Invalidate Page Walk Cache */
> PRT          0x0008  /* Invalidate Process Table Entries if NESTED is clear */
> PAT          0x0008  /* Invalidate Partition Table Entries if NESTED is set */
> 
> A combination of the above can be specified.
> 
> Page size mask (pageSizes)
> --------------------------
> 4K              0x01
> 64K             0x02
> 2M              0x04
> 1G              0x08
> All sizes       (-1UL)
> 
> A combination of the above can be specified.
> All page sizes can be selected with -1.
> 
> Semantics: Invalidate radix tree lookaside information
>            matching the parameters given.
> * Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters are
>   different from the defined values.
> * Return H_PARAMETER if NESTED is set and pid is not a valid nested
>   LPID allocated to this partition
> * Return H_P5 if (start, end) doesn't form a valid range. Start and end
>   should be a valid Quadrant address and  end > start.
> * Return H_NotSupported if the partition is not in running in radix
>   translation mode.
> * May invalidate more translation information than requested.
> * If start = 0 and end = -1, set the range to cover all valid addresses.
>   Else start and end should be aligned to 4kB (lower 11 bits clear).
> * If NESTED is clear, then invalidate process scoped lookaside information.
>   Else pid specifies a nested LPID, and the invalidation is performed
>   on nested guest partition table and nested guest partition scope real
>   addresses.
> * If pid = 0 and NESTED is clear, then valid addresses are quadrant 3 and
>   quadrant 0 spaces, Else valid addresses are quadrant 0.
> * Pages which are fully covered by the range are to be invalidated.
>   Those which are partially covered are considered outside invalidation
>   range, which allows a caller to optimally invalidate ranges that may
>   contain mixed page sizes.
> * Return H_SUCCESS on success.
> 
> Bharata B Rao (2):
>   KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE (nested case
>     only)
>   KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM
> 
>  Documentation/virt/kvm/api.rst                |  17 +++
>  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
>  arch/powerpc/include/asm/kvm_book3s.h         |   3 +
>  arch/powerpc/kvm/book3s_64_mmu_radix.c        |  26 ++++-
>  arch/powerpc/kvm/book3s_hv.c                  |  32 ++++++
>  arch/powerpc/kvm/book3s_hv_nested.c           | 107 +++++++++++++++++-
>  arch/powerpc/kvm/powerpc.c                    |   3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c          |   4 -
>  include/uapi/linux/kvm.h                      |   1 +
>  9 files changed, 200 insertions(+), 11 deletions(-)
> 
> -- 
> 2.26.2

^ permalink raw reply

* [PATCH V3 3/5] ocxl: Update the Process Element Entry
From: Christophe Lombard @ 2020-11-24  9:58 UTC (permalink / raw)
  To: linuxppc-dev, fbarrat, ajd
In-Reply-To: <20201124095838.18665-1-clombard@linux.vnet.ibm.com>

To complete the MMIO based mechanism, the fields: PASID, bus, device and
function of the Process Element Entry have to be filled. (See
OpenCAPI Power Platform Architecture document)

                   Hypervisor Process Element Entry
Word
    0 1 .... 7  8  ...... 12  13 ..15  16.... 19  20 ........... 31
0                  OSL Configuration State (0:31)
1                  OSL Configuration State (32:63)
2               PASID                      |    Reserved
3       Bus   |   Device    |Function |        Reserved
4                             Reserved
5                             Reserved
6                               ....

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 drivers/misc/ocxl/context.c       | 4 +++-
 drivers/misc/ocxl/link.c          | 4 +++-
 drivers/misc/ocxl/ocxl_internal.h | 9 ++++++---
 drivers/scsi/cxlflash/ocxl_hw.c   | 6 ++++--
 include/misc/ocxl.h               | 2 +-
 5 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index c21f65a5c762..9eb0d93b01c6 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -70,6 +70,7 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm)
 {
 	int rc;
 	unsigned long pidr = 0;
+	struct pci_dev *dev;
 
 	// Locks both status & tidr
 	mutex_lock(&ctx->status_mutex);
@@ -81,8 +82,9 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm)
 	if (mm)
 		pidr = mm->context.id;
 
+	dev = to_pci_dev(ctx->afu->fn->dev.parent);
 	rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr,
-			      amr, mm, xsl_fault_error, ctx);
+			      amr, pci_dev_id(dev), mm, xsl_fault_error, ctx);
 	if (rc)
 		goto out;
 
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index fd73d3bc0eb6..77381dda2c45 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -494,7 +494,7 @@ static u64 calculate_cfg_state(bool kernel)
 }
 
 int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
-		u64 amr, struct mm_struct *mm,
+		u64 amr, u16 bdf, struct mm_struct *mm,
 		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
 		void *xsl_err_data)
 {
@@ -529,6 +529,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
 
 	memset(pe, 0, sizeof(struct ocxl_process_element));
 	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
+	pe->pasid = cpu_to_be32(pasid << (31 - 19));
+	pe->bdf = cpu_to_be16(bdf);
 	pe->lpid = cpu_to_be32(mfspr(SPRN_LPID));
 	pe->pid = cpu_to_be32(pidr);
 	pe->tid = cpu_to_be32(tidr);
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
index 0bad0a123af6..10125a22d5a5 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -84,13 +84,16 @@ struct ocxl_context {
 
 struct ocxl_process_element {
 	__be64 config_state;
-	__be32 reserved1[11];
+	__be32 pasid;
+	__be16 bdf;
+	__be16 reserved1;
+	__be32 reserved2[9];
 	__be32 lpid;
 	__be32 tid;
 	__be32 pid;
-	__be32 reserved2[10];
+	__be32 reserved3[10];
 	__be64 amr;
-	__be32 reserved3[3];
+	__be32 reserved4[3];
 	__be32 software_state;
 };
 
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index e4e0d767b98e..244fc27215dc 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -329,6 +329,7 @@ static int start_context(struct ocxlflash_context *ctx)
 	struct ocxl_hw_afu *afu = ctx->hw_afu;
 	struct ocxl_afu_config *acfg = &afu->acfg;
 	void *link_token = afu->link_token;
+	struct pci_dev *pdev = afu->pdev;
 	struct device *dev = afu->dev;
 	bool master = ctx->master;
 	struct mm_struct *mm;
@@ -360,8 +361,9 @@ static int start_context(struct ocxlflash_context *ctx)
 		mm = current->mm;
 	}
 
-	rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, mm,
-			      ocxlflash_xsl_fault, ctx);
+	rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0,
+			      pci_dev_id(pdev), mm, ocxlflash_xsl_fault,
+			      ctx);
 	if (unlikely(rc)) {
 		dev_err(dev, "%s: ocxl_link_add_pe failed rc=%d\n",
 			__func__, rc);
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index e013736e275d..3ed736da02c8 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -447,7 +447,7 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle);
  * defined
  */
 int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
-		u64 amr, struct mm_struct *mm,
+		u64 amr, u16 bdf, struct mm_struct *mm,
 		void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr),
 		void *xsl_err_data);
 
-- 
2.28.0


^ permalink raw reply related

* [PATCH V3 4/5] ocxl: Add mmu notifier
From: Christophe Lombard @ 2020-11-24  9:58 UTC (permalink / raw)
  To: linuxppc-dev, fbarrat, ajd
In-Reply-To: <20201124095838.18665-1-clombard@linux.vnet.ibm.com>

Add invalidate_range mmu notifier, when required (ATSD access of MMIO
registers is available), to initiate TLB invalidation commands.
For the time being, the ATSD0 set of registers is used by default.

The pasid and bdf values have to be configured in the Process Element
Entry.
The PEE must be set up to match the BDF/PASID of the AFU.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 drivers/misc/ocxl/link.c | 62 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 77381dda2c45..129d4eddc4d2 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -2,8 +2,10 @@
 // Copyright 2017 IBM Corp.
 #include <linux/sched/mm.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 #include <linux/mm_types.h>
 #include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
 #include <asm/copro.h>
 #include <asm/pnv-ocxl.h>
 #include <asm/xive.h>
@@ -33,6 +35,7 @@
 
 #define SPA_PE_VALID		0x80000000
 
+struct ocxl_link;
 
 struct pe_data {
 	struct mm_struct *mm;
@@ -41,6 +44,8 @@ struct pe_data {
 	/* opaque pointer to be passed to the above callback */
 	void *xsl_err_data;
 	struct rcu_head rcu;
+	struct ocxl_link *link;
+	struct mmu_notifier mmu_notifier;
 };
 
 struct spa {
@@ -83,6 +88,8 @@ struct ocxl_link {
 	int domain;
 	int bus;
 	int dev;
+	void __iomem *arva;     /* ATSD register virtual address */
+	spinlock_t atsd_lock;   /* to serialize shootdowns */
 	atomic_t irq_available;
 	struct spa *spa;
 	void *platform_data;
@@ -388,6 +395,7 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l
 	link->bus = dev->bus->number;
 	link->dev = PCI_SLOT(dev->devfn);
 	atomic_set(&link->irq_available, MAX_IRQ_PER_LINK);
+	spin_lock_init(&link->atsd_lock);
 
 	rc = alloc_spa(dev, link);
 	if (rc)
@@ -403,6 +411,13 @@ static int alloc_link(struct pci_dev *dev, int PE_mask, struct ocxl_link **out_l
 	if (rc)
 		goto err_xsl_irq;
 
+	/* if link->arva is not defeined, MMIO registers are not used to
+	 * generate TLB invalidate. PowerBus snooping is enabled.
+	 * Otherwise, PowerBus snooping is disabled. TLB Invalidates are
+	 * initiated using MMIO registers.
+	 */
+	pnv_ocxl_map_lpar(dev, mfspr(SPRN_LPID), 0, &link->arva);
+
 	*out_link = link;
 	return 0;
 
@@ -454,6 +469,11 @@ static void release_xsl(struct kref *ref)
 {
 	struct ocxl_link *link = container_of(ref, struct ocxl_link, ref);
 
+	if (link->arva) {
+		pnv_ocxl_unmap_lpar(link->arva);
+		link->arva = NULL;
+	}
+
 	list_del(&link->list);
 	/* call platform code before releasing data */
 	pnv_ocxl_spa_release(link->platform_data);
@@ -470,6 +490,26 @@ void ocxl_link_release(struct pci_dev *dev, void *link_handle)
 }
 EXPORT_SYMBOL_GPL(ocxl_link_release);
 
+static void invalidate_range(struct mmu_notifier *mn,
+			     struct mm_struct *mm,
+			     unsigned long start, unsigned long end)
+{
+	struct pe_data *pe_data = container_of(mn, struct pe_data, mmu_notifier);
+	struct ocxl_link *link = pe_data->link;
+	unsigned long addr, pid, page_size = PAGE_SIZE;
+
+	pid = mm->context.id;
+
+	spin_lock(&link->atsd_lock);
+	for (addr = start; addr < end; addr += page_size)
+		pnv_ocxl_tlb_invalidate(link->arva, pid, addr, page_size);
+	spin_unlock(&link->atsd_lock);
+}
+
+static const struct mmu_notifier_ops ocxl_mmu_notifier_ops = {
+	.invalidate_range = invalidate_range,
+};
+
 static u64 calculate_cfg_state(bool kernel)
 {
 	u64 state;
@@ -526,6 +566,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
 	pe_data->mm = mm;
 	pe_data->xsl_err_cb = xsl_err_cb;
 	pe_data->xsl_err_data = xsl_err_data;
+	pe_data->link = link;
+	pe_data->mmu_notifier.ops = &ocxl_mmu_notifier_ops;
 
 	memset(pe, 0, sizeof(struct ocxl_process_element));
 	pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0));
@@ -542,8 +584,16 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
 	 * by the nest MMU. If we have a kernel context, TLBIs are
 	 * already global.
 	 */
-	if (mm)
+	if (mm) {
 		mm_context_add_copro(mm);
+		if (link->arva) {
+			/* Use MMIO registers for the TLB Invalidate
+			 * operations.
+			 */
+			mmu_notifier_register(&pe_data->mmu_notifier, mm);
+		}
+	}
+
 	/*
 	 * Barrier is to make sure PE is visible in the SPA before it
 	 * is used by the device. It also helps with the global TLBI
@@ -674,6 +724,16 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
 		WARN(1, "Couldn't find pe data when removing PE\n");
 	} else {
 		if (pe_data->mm) {
+			if (link->arva) {
+				mmu_notifier_unregister(&pe_data->mmu_notifier,
+							pe_data->mm);
+				spin_lock(&link->atsd_lock);
+				pnv_ocxl_tlb_invalidate(link->arva,
+							pe_data->mm->context.id,
+							0ull,
+							PAGE_SIZE);
+				spin_unlock(&link->atsd_lock);
+			}
 			mm_context_remove_copro(pe_data->mm);
 			mmdrop(pe_data->mm);
 		}
-- 
2.28.0


^ permalink raw reply related

* [PATCH V3 2/5] ocxl: Initiate a TLB invalidate command
From: Christophe Lombard @ 2020-11-24  9:58 UTC (permalink / raw)
  To: linuxppc-dev, fbarrat, ajd
In-Reply-To: <20201124095838.18665-1-clombard@linux.vnet.ibm.com>

When a TLB Invalidate is required for the Logical Partition, the following
sequence has to be performed:

1. Load MMIO ATSD AVA register with the necessary value, if required.
2. Write the MMIO ATSD launch register to initiate the TLB Invalidate
command.
3. Poll the MMIO ATSD status register to determine when the TLB Invalidate
   has been completed.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pnv-ocxl.h   | 51 +++++++++++++++++++
 arch/powerpc/platforms/powernv/ocxl.c | 70 +++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index 60c3c74427d9..9acd1fbf1197 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -3,12 +3,59 @@
 #ifndef _ASM_PNV_OCXL_H
 #define _ASM_PNV_OCXL_H
 
+#include <linux/bitfield.h>
 #include <linux/pci.h>
 
 #define PNV_OCXL_TL_MAX_TEMPLATE        63
 #define PNV_OCXL_TL_BITS_PER_RATE       4
 #define PNV_OCXL_TL_RATE_BUF_SIZE       ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
 
+#define PNV_OCXL_ATSD_TIMEOUT		1
+
+/* TLB Management Instructions */
+#define PNV_OCXL_ATSD_LNCH		0x00
+/* Radix Invalidate */
+#define   PNV_OCXL_ATSD_LNCH_R		PPC_BIT(0)
+/* Radix Invalidation Control
+ * 0b00 Just invalidate TLB.
+ * 0b01 Invalidate just Page Walk Cache.
+ * 0b10 Invalidate TLB, Page Walk Cache, and any
+ * caching of Partition and Process Table Entries.
+ */
+#define   PNV_OCXL_ATSD_LNCH_RIC	PPC_BITMASK(1, 2)
+/* Number and Page Size of translations to be invalidated */
+#define   PNV_OCXL_ATSD_LNCH_LP		PPC_BITMASK(3, 10)
+/* Invalidation Criteria
+ * 0b00 Invalidate just the target VA.
+ * 0b01 Invalidate matching PID.
+ */
+#define   PNV_OCXL_ATSD_LNCH_IS		PPC_BITMASK(11, 12)
+/* 0b1: Process Scope, 0b0: Partition Scope */
+#define   PNV_OCXL_ATSD_LNCH_PRS	PPC_BIT(13)
+/* Invalidation Flag */
+#define   PNV_OCXL_ATSD_LNCH_B		PPC_BIT(14)
+/* Actual Page Size to be invalidated
+ * 000 4KB
+ * 101 64KB
+ * 001 2MB
+ * 010 1GB
+ */
+#define   PNV_OCXL_ATSD_LNCH_AP		PPC_BITMASK(15, 17)
+/* Defines the large page select
+ * L=0b0 for 4KB pages
+ * L=0b1 for large pages)
+ */
+#define   PNV_OCXL_ATSD_LNCH_L		PPC_BIT(18)
+/* Process ID */
+#define   PNV_OCXL_ATSD_LNCH_PID	PPC_BITMASK(19, 38)
+/* NoFlush – Assumed to be 0b0 */
+#define   PNV_OCXL_ATSD_LNCH_F		PPC_BIT(39)
+#define   PNV_OCXL_ATSD_LNCH_OCAPI_SLBI	PPC_BIT(40)
+#define   PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON	PPC_BIT(41)
+#define PNV_OCXL_ATSD_AVA		0x08
+#define   PNV_OCXL_ATSD_AVA_AVA		PPC_BITMASK(0, 51)
+#define PNV_OCXL_ATSD_STAT		0x10
+
 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported);
 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
 
@@ -31,4 +78,8 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
 int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
 		      uint64_t lpcr, void __iomem **arva);
 void pnv_ocxl_unmap_lpar(void __iomem *arva);
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+			     unsigned long pid,
+			     unsigned long addr,
+			     unsigned long page_size);
 #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 57fc1062677b..f665846d2b28 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -528,3 +528,73 @@ void pnv_ocxl_unmap_lpar(void __iomem *arva)
 	iounmap(arva);
 }
 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
+
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+			     unsigned long pid,
+			     unsigned long addr,
+			     unsigned long page_size)
+{
+	unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
+	u64 val = 0ull;
+	int pend;
+	u8 size;
+
+	if (!(arva))
+		return;
+
+	if (addr) {
+		/* load Abbreviated Virtual Address register with
+		 * the necessary value
+		 */
+		val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
+		out_be64(arva + PNV_OCXL_ATSD_AVA, val);
+	}
+
+	/* Write access initiates a shoot down to initiate the
+	 * TLB Invalidate command
+	 */
+	val = PNV_OCXL_ATSD_LNCH_R;
+	if (addr) {
+		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b00);
+		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
+	} else {
+		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
+		val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
+		val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
+	}
+	val |= PNV_OCXL_ATSD_LNCH_PRS;
+	/* Actual Page Size to be invalidated
+	 * 000 4KB
+	 * 101 64KB
+	 * 001 2MB
+	 * 010 1GB
+	 */
+	size = 0b101;
+	if (page_size == 0x10000)
+		size = 0b000;
+	if (page_size == 0x200000)
+		size = 0b001;
+	if (page_size == 0x40000000)
+		size = 0b010;
+	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
+	val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
+	out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
+
+	/* Poll the ATSD status register to determine when the
+	 * TLB Invalidate has been completed.
+	 */
+	val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+	pend = val >> 63;
+
+	while (pend) {
+		if (time_after_eq(jiffies, timeout)) {
+			pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
+			       __func__, val, pid);
+			return;
+		}
+		cpu_relax();
+		val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+		pend = val >> 63;
+	}
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
-- 
2.28.0


^ permalink raw reply related

* [PATCH V3 0/5] ocxl: Mmio invalidation support
From: Christophe Lombard @ 2020-11-24  9:58 UTC (permalink / raw)
  To: linuxppc-dev, fbarrat, ajd

OpenCAPI 4.0/5.0 with TLBI/SLBI Snooping, is not used due to performance
problems caused by the PAU having to process all incoming TLBI/SLBI
commands which will cause them to back up on the PowerBus.

When the Address Translation Mode requires TLB operations to be initiated
using MMIO registers, a set of registers like the following is used:
• XTS MMIO ATSD0 LPARID register
• XTS MMIO ATSD0 AVA register
• XTS MMIO ATSD0 launch register, write access initiates a shoot down
• XTS MMIO ATSD0 status register

The MMIO based mechanism also blocks the NPU/PAU from snooping TLBIE
commands from the PowerBus.

The Shootdown commands (ATSD) will be generated using MMIO registers
in the NPU/PAU and sent to the device.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>

---
Changelog[v3]
 - Rebase to latest upstream.
 - Add page_size argument in pnv_ocxl_tlb_invalidate()
 - Remove double pointer
 
Changelog[v2]
 - Rebase to latest upstream.
 - Create a set of smaller patches
 - Move the device tree parsing and ioremap() for the shootdown page in a
   platform-specific file (powernv)
 - Release the shootdown page in release_xsl()
 - Initialize atsd_lock
 - Move the code to initiate the TLB Invalidate command in a
   platform-specific file (powernv)
 - Use the notifier invalidate_range
---
Christophe Lombard (5):
  ocxl: Assign a register set to a Logical Partition
  ocxl: Initiate a TLB invalidate command
  ocxl: Update the Process Element Entry
  ocxl: Add mmu notifier
  ocxl: Add new kernel traces

 arch/powerpc/include/asm/pnv-ocxl.h   |  54 ++++++++++++
 arch/powerpc/platforms/powernv/ocxl.c | 115 ++++++++++++++++++++++++++
 drivers/misc/ocxl/context.c           |   4 +-
 drivers/misc/ocxl/link.c              |  70 +++++++++++++++-
 drivers/misc/ocxl/ocxl_internal.h     |   9 +-
 drivers/misc/ocxl/trace.h             |  64 ++++++++++++++
 drivers/scsi/cxlflash/ocxl_hw.c       |   6 +-
 include/misc/ocxl.h                   |   2 +-
 8 files changed, 315 insertions(+), 9 deletions(-)

-- 
2.28.0


^ permalink raw reply

* [PATCH V3 1/5] ocxl: Assign a register set to a Logical Partition
From: Christophe Lombard @ 2020-11-24  9:58 UTC (permalink / raw)
  To: linuxppc-dev, fbarrat, ajd
In-Reply-To: <20201124095838.18665-1-clombard@linux.vnet.ibm.com>

Platform specific function to assign a register set to a Logical Partition.
The "ibm,mmio-atsd" property, provided by the firmware, contains the 16
base ATSD physical addresses (ATSD0 through ATSD15) of the set of MMIO
registers (XTS MMIO ATSDx LPARID/AVA/launch/status register).

For the time being, the ATSD0 set of registers is used by default.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pnv-ocxl.h   |  3 ++
 arch/powerpc/platforms/powernv/ocxl.c | 45 +++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index d37ededca3ee..60c3c74427d9 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -28,4 +28,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **p
 void pnv_ocxl_spa_release(void *platform_data);
 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
 
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+		      uint64_t lpcr, void __iomem **arva);
+void pnv_ocxl_unmap_lpar(void __iomem *arva);
 #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index ecdad219d704..57fc1062677b 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -483,3 +483,48 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 	return rc;
 }
 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+		      uint64_t lpcr, void __iomem **arva)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	u64 mmio_atsd;
+	int rc;
+
+	/* ATSD physical address.
+	 * ATSD LAUNCH register: write access initiates a shoot down to
+	 * initiate the TLB Invalidate command.
+	 */
+	rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+					0, &mmio_atsd);
+	if (rc) {
+		dev_info(&dev->dev, "No available ATSD found\n");
+		return rc;
+	}
+
+	/* Assign a register set to a Logical Partition and MMIO ATSD
+	 * LPARID register to the required value.
+	 */
+	rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
+			       lparid, lpcr);
+	if (rc) {
+		dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
+		return rc;
+	}
+
+	*arva = ioremap(mmio_atsd, 24);
+	if (!(*arva)) {
+		dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
+		rc = -ENOMEM;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
+
+void pnv_ocxl_unmap_lpar(void __iomem *arva)
+{
+	iounmap(arva);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
-- 
2.28.0


^ permalink raw reply related

* [PATCH V3 5/5] ocxl: Add new kernel traces
From: Christophe Lombard @ 2020-11-24  9:58 UTC (permalink / raw)
  To: linuxppc-dev, fbarrat, ajd
In-Reply-To: <20201124095838.18665-1-clombard@linux.vnet.ibm.com>

Add specific kernel traces which provide information on mmu notifier and on
pages range.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 drivers/misc/ocxl/link.c  |  4 +++
 drivers/misc/ocxl/trace.h | 64 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index 129d4eddc4d2..ab039c115381 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -499,6 +499,7 @@ static void invalidate_range(struct mmu_notifier *mn,
 	unsigned long addr, pid, page_size = PAGE_SIZE;
 
 	pid = mm->context.id;
+	trace_ocxl_mmu_notifier_range(start, end, pid);
 
 	spin_lock(&link->atsd_lock);
 	for (addr = start; addr < end; addr += page_size)
@@ -590,6 +591,7 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr,
 			/* Use MMIO registers for the TLB Invalidate
 			 * operations.
 			 */
+			trace_ocxl_init_mmu_notifier(pasid, mm->context.id);
 			mmu_notifier_register(&pe_data->mmu_notifier, mm);
 		}
 	}
@@ -725,6 +727,8 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
 	} else {
 		if (pe_data->mm) {
 			if (link->arva) {
+				trace_ocxl_release_mmu_notifier(pasid,
+								pe_data->mm->context.id);
 				mmu_notifier_unregister(&pe_data->mmu_notifier,
 							pe_data->mm);
 				spin_lock(&link->atsd_lock);
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
index 17e21cb2addd..a33a5094ff6c 100644
--- a/drivers/misc/ocxl/trace.h
+++ b/drivers/misc/ocxl/trace.h
@@ -8,6 +8,70 @@
 
 #include <linux/tracepoint.h>
 
+
+TRACE_EVENT(ocxl_mmu_notifier_range,
+	TP_PROTO(unsigned long start, unsigned long end, unsigned long pidr),
+	TP_ARGS(start, end, pidr),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, start)
+		__field(unsigned long, end)
+		__field(unsigned long, pidr)
+	),
+
+	TP_fast_assign(
+		__entry->start = start;
+		__entry->end = end;
+		__entry->pidr = pidr;
+	),
+
+	TP_printk("start=0x%lx end=0x%lx pidr=0x%lx",
+		__entry->start,
+		__entry->end,
+		__entry->pidr
+	)
+);
+
+TRACE_EVENT(ocxl_init_mmu_notifier,
+	TP_PROTO(int pasid, unsigned long pidr),
+	TP_ARGS(pasid, pidr),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(unsigned long, pidr)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+	),
+
+	TP_printk("pasid=%d, pidr=0x%lx",
+		__entry->pasid,
+		__entry->pidr
+	)
+);
+
+TRACE_EVENT(ocxl_release_mmu_notifier,
+	TP_PROTO(int pasid, unsigned long pidr),
+	TP_ARGS(pasid, pidr),
+
+	TP_STRUCT__entry(
+		__field(int, pasid)
+		__field(unsigned long, pidr)
+	),
+
+	TP_fast_assign(
+		__entry->pasid = pasid;
+		__entry->pidr = pidr;
+	),
+
+	TP_printk("pasid=%d, pidr=0x%lx",
+		__entry->pasid,
+		__entry->pidr
+	)
+);
+
 DECLARE_EVENT_CLASS(ocxl_context,
 	TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr),
 	TP_ARGS(pid, spa, pasid, pidr, tidr),
-- 
2.28.0


^ permalink raw reply related

* Re: C vdso
From: Christophe Leroy @ 2020-11-24 10:11 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <50214d90-be25-f673-494c-840fdfb96206@csgroup.eu>

Hi Michael,

Le 03/11/2020 à 19:13, Christophe Leroy a écrit :
> 
> 
> Le 23/10/2020 à 15:24, Michael Ellerman a écrit :
>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>> Le 24/09/2020 à 15:17, Christophe Leroy a écrit :
>>>> Le 17/09/2020 à 14:33, Michael Ellerman a écrit :
>>>>> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>>>>>
>>>>>> What is the status with the generic C vdso merge ?
>>>>>> In some mail, you mentionned having difficulties getting it working on
>>>>>> ppc64, any progress ? What's the problem ? Can I help ?
>>>>>
>>>>> Yeah sorry I was hoping to get time to work on it but haven't been able
>>>>> to.
>>>>>
>>>>> It's causing crashes on ppc64 ie. big endian.
>> ...
>>>>
>>>> Can you tell what defconfig you are using ? I have been able to setup a full glibc PPC64 cross
>>>> compilation chain and been able to test it under QEMU with success, using Nathan's vdsotest tool.
>>>
>>> What config are you using ?
>>
>> ppc64_defconfig + guest.config
>>
>> Or pseries_defconfig.
>>
>> I'm using Ubuntu GCC 9.3.0 mostly, but it happens with other toolchains too.
>>
>> At a minimum we're seeing relocations in the output, which is a problem:
>>
>>    $ readelf -r build\~/arch/powerpc/kernel/vdso64/vdso64.so
>>    Relocation section '.rela.dyn' at offset 0x12a8 contains 8 entries:
>>      Offset          Info           Type           Sym. Value    Sym. Name + Addend
>>    000000001368  000000000016 R_PPC64_RELATIVE                     7c0
>>    000000001370  000000000016 R_PPC64_RELATIVE                     9300
>>    000000001380  000000000016 R_PPC64_RELATIVE                     970
>>    000000001388  000000000016 R_PPC64_RELATIVE                     9300
>>    000000001398  000000000016 R_PPC64_RELATIVE                     a90
>>    0000000013a0  000000000016 R_PPC64_RELATIVE                     9300
>>    0000000013b0  000000000016 R_PPC64_RELATIVE                     b20
>>    0000000013b8  000000000016 R_PPC64_RELATIVE                     9300
> 
> Looks like it's due to the OPD and relation between the function() and .function()
> 
> By using DOTSYM() in the 'bl' call, that's directly the dot function which is called and the OPD is 
> not used anymore, it can get dropped.
> 
> Now I get .rela.dyn full of 0, don't know if we should drop it explicitely.
> 

What is the status now with latest version of CVDSO ? I saw you had it in next-test for some time, 
it is not there anymore today.

Thanks,
Christophe

^ permalink raw reply

* Re: [PATCH 3/3] selftests/powerpc: Add VF recovery tests
From: Frederic Barrat @ 2020-11-24 10:14 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20201103044503.917128-3-oohall@gmail.com>



On 03/11/2020 05:45, Oliver O'Halloran wrote:

> --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
> +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
> @@ -135,3 +135,111 @@ eeh_one_dev() {
>   	return 0;
>   }
>   
> +eeh_has_driver() {
> +	test -e /sys/bus/pci/devices/$1/driver;
> +	return $?
> +}
> +
> +eeh_can_recover() {
> +	# we'll get an IO error if the device's current driver doesn't support
> +	# error recovery
> +	echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null
> +
> +	return $?
> +}
> +
> +eeh_find_all_pfs() {
> +	devices=""
> +
> +	# SR-IOV on pseries requires hypervisor support, so check for that
> +	is_pseries=""
> +	if grep -q pSeries /proc/cpuinfo ; then
> +		if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] ||
> +		   [ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then
> +			return 1;
> +		fi


Is it possible to run those tests on pseries? I haven't managed to set 
up a LPAR with a physical function which would let me enable a virtual 
function. All I could do is assign a virtual function to a LPAR. When 
assigning a physical function to the LPAR, enabling a virtual function 
fails because of missing properties in the device tree, so it looks like 
the hypervisor doesn't support it (?).

Same story on qemu.

   Fred



^ permalink raw reply

* Re: [PATCH kernel v4 2/8] genirq/irqdomain: Clean legacy IRQ allocation
From: Alexey Kardashevskiy @ 2020-11-24 10:56 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Marc Zyngier, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Linux Kernel Mailing List, open list:GPIO SUBSYSTEM,
	Oliver O'Halloran, Cédric Le Goater, Frederic Barrat,
	Thomas Gleixner, Michal Suchánek,
	open list:LINUX FOR POWERPC PA SEMI PWRFICIENT,
	linux-arm Mailing List
In-Reply-To: <CAHp75VfV4mG23C9Ep1vNLk2oBjB=LTQGyU=fhWPhw4PX-Ci-7A@mail.gmail.com>



On 11/24/20 8:19 PM, Andy Shevchenko wrote:
> On Tue, Nov 24, 2020 at 8:20 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>> There are 10 users of __irq_domain_alloc_irqs() and only one - IOAPIC -
>> passes realloc==true. There is no obvious reason for handling this
>> specific case in the generic code.
>>
>> This splits out __irq_domain_alloc_irqs_data() to make it clear what
>> IOAPIC does and makes __irq_domain_alloc_irqs() cleaner.
>>
>> This should cause no behavioral change.
> 
>> +       ret = __irq_domain_alloc_irqs_data(domain, virq, nr_irqs, node, arg, affinity);
>> +       if (ret <= 0)
>>                  goto out_free_desc;
> 
> Was or wasn't 0 considered as error code previously?

Oh. I need to clean this up, the idea is since this does not allocate 
IRQs, this should return error code and not an irq, I'll make this explicit.

> 
>>          return virq;
> 
>>   out_free_desc:
>>          irq_free_descs(virq, nr_irqs);
>>          return ret;
> 

-- 
Alexey

^ permalink raw reply

* [PATCH v2 2/4] KVM: PPC: Rename current DAWR macros and variables
From: Ravi Bangoria @ 2020-11-24 10:59 UTC (permalink / raw)
  To: mpe, paulus
  Cc: christophe.leroy, ravi.bangoria, mikey, kvm, leobras.c, jniethe5,
	linux-kernel, npiggin, kvm-ppc, pbonzini, linuxppc-dev
In-Reply-To: <20201124105953.39325-1-ravi.bangoria@linux.ibm.com>

Power10 is introducing second DAWR. Use real register names (with
suffix 0) from ISA for current macros and variables used by kvm.
One exception is KVM_REG_PPC_DAWR. Keep it as it is because it's
uapi so changing it will break userspace.

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 arch/powerpc/include/asm/kvm_host.h     |  4 ++--
 arch/powerpc/kernel/asm-offsets.c       |  4 ++--
 arch/powerpc/kvm/book3s_hv.c            | 24 ++++++++++++------------
 arch/powerpc/kvm/book3s_hv_nested.c     |  8 ++++----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 20 ++++++++++----------
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d67a470e95a3..62cadf1a596e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -584,8 +584,8 @@ struct kvm_vcpu_arch {
 	u32 ctrl;
 	u32 dabrx;
 	ulong dabr;
-	ulong dawr;
-	ulong dawrx;
+	ulong dawr0;
+	ulong dawrx0;
 	ulong ciabr;
 	ulong cfar;
 	ulong ppr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..e4256f5b4602 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -547,8 +547,8 @@ int main(void)
 	OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
 	OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
 	OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
-	OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
-	OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+	OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+	OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
 	OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
 	OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
 	OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 490a0f6a7285..d5c6efc8a76e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -782,8 +782,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 			return H_UNSUPPORTED_FLAG_START;
 		if (value2 & DABRX_HYP)
 			return H_P4;
-		vcpu->arch.dawr  = value1;
-		vcpu->arch.dawrx = value2;
+		vcpu->arch.dawr0  = value1;
+		vcpu->arch.dawrx0 = value2;
 		return H_SUCCESS;
 	case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
 		/* KVM does not support mflags=2 (AIL=2) */
@@ -1747,10 +1747,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.vcore->vtb);
 		break;
 	case KVM_REG_PPC_DAWR:
-		*val = get_reg_val(id, vcpu->arch.dawr);
+		*val = get_reg_val(id, vcpu->arch.dawr0);
 		break;
 	case KVM_REG_PPC_DAWRX:
-		*val = get_reg_val(id, vcpu->arch.dawrx);
+		*val = get_reg_val(id, vcpu->arch.dawrx0);
 		break;
 	case KVM_REG_PPC_CIABR:
 		*val = get_reg_val(id, vcpu->arch.ciabr);
@@ -1979,10 +1979,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		vcpu->arch.vcore->vtb = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_DAWR:
-		vcpu->arch.dawr = set_reg_val(id, *val);
+		vcpu->arch.dawr0 = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_DAWRX:
-		vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+		vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
 		break;
 	case KVM_REG_PPC_CIABR:
 		vcpu->arch.ciabr = set_reg_val(id, *val);
@@ -3437,8 +3437,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	int trap;
 	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
 	unsigned long host_ciabr = mfspr(SPRN_CIABR);
-	unsigned long host_dawr = mfspr(SPRN_DAWR0);
-	unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
+	unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
+	unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
 	unsigned long host_psscr = mfspr(SPRN_PSSCR);
 	unsigned long host_pidr = mfspr(SPRN_PID);
 
@@ -3477,8 +3477,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
 	if (dawr_enabled()) {
-		mtspr(SPRN_DAWR0, vcpu->arch.dawr);
-		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
+		mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
 	}
 	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 	mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3530,8 +3530,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
 	mtspr(SPRN_HFSCR, host_hfscr);
 	mtspr(SPRN_CIABR, host_ciabr);
-	mtspr(SPRN_DAWR0, host_dawr);
-	mtspr(SPRN_DAWRX0, host_dawrx);
+	mtspr(SPRN_DAWR0, host_dawr0);
+	mtspr(SPRN_DAWRX0, host_dawrx0);
 	mtspr(SPRN_PID, host_pidr);
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 2b433c3bacea..1d4b335485d0 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -33,8 +33,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	hr->dpdes = vc->dpdes;
 	hr->hfscr = vcpu->arch.hfscr;
 	hr->tb_offset = vc->tb_offset;
-	hr->dawr0 = vcpu->arch.dawr;
-	hr->dawrx0 = vcpu->arch.dawrx;
+	hr->dawr0 = vcpu->arch.dawr0;
+	hr->dawrx0 = vcpu->arch.dawrx0;
 	hr->ciabr = vcpu->arch.ciabr;
 	hr->purr = vcpu->arch.purr;
 	hr->spurr = vcpu->arch.spurr;
@@ -151,8 +151,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	vc->pcr = hr->pcr | PCR_MASK;
 	vc->dpdes = hr->dpdes;
 	vcpu->arch.hfscr = hr->hfscr;
-	vcpu->arch.dawr = hr->dawr0;
-	vcpu->arch.dawrx = hr->dawrx0;
+	vcpu->arch.dawr0 = hr->dawr0;
+	vcpu->arch.dawrx0 = hr->dawrx0;
 	vcpu->arch.ciabr = hr->ciabr;
 	vcpu->arch.purr = hr->purr;
 	vcpu->arch.spurr = hr->spurr;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 799d6d0f4ead..8e3fb393a980 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -52,8 +52,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_PID		(SFS-32)
 #define STACK_SLOT_IAMR		(SFS-40)
 #define STACK_SLOT_CIABR	(SFS-48)
-#define STACK_SLOT_DAWR		(SFS-56)
-#define STACK_SLOT_DAWRX	(SFS-64)
+#define STACK_SLOT_DAWR0	(SFS-56)
+#define STACK_SLOT_DAWRX0	(SFS-64)
 #define STACK_SLOT_HFSCR	(SFS-72)
 #define STACK_SLOT_AMR		(SFS-80)
 #define STACK_SLOT_UAMOR	(SFS-88)
@@ -711,8 +711,8 @@ BEGIN_FTR_SECTION
 	mfspr	r7, SPRN_DAWRX0
 	mfspr	r8, SPRN_IAMR
 	std	r5, STACK_SLOT_CIABR(r1)
-	std	r6, STACK_SLOT_DAWR(r1)
-	std	r7, STACK_SLOT_DAWRX(r1)
+	std	r6, STACK_SLOT_DAWR0(r1)
+	std	r7, STACK_SLOT_DAWRX0(r1)
 	std	r8, STACK_SLOT_IAMR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
@@ -801,8 +801,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	lbz	r5, 0(r5)
 	cmpdi	r5, 0
 	beq	1f
-	ld	r5, VCPU_DAWR(r4)
-	ld	r6, VCPU_DAWRX(r4)
+	ld	r5, VCPU_DAWR0(r4)
+	ld	r6, VCPU_DAWRX0(r4)
 	mtspr	SPRN_DAWR0, r5
 	mtspr	SPRN_DAWRX0, r6
 1:
@@ -1759,8 +1759,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/* Restore host values of some registers */
 BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_CIABR(r1)
-	ld	r6, STACK_SLOT_DAWR(r1)
-	ld	r7, STACK_SLOT_DAWRX(r1)
+	ld	r6, STACK_SLOT_DAWR0(r1)
+	ld	r7, STACK_SLOT_DAWRX0(r1)
 	mtspr	SPRN_CIABR, r5
 	/*
 	 * If the DAWR doesn't work, it's ok to write these here as
@@ -2566,8 +2566,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
 	rlwimi	r5, r4, 2, DAWRX_WT
 	clrrdi	r4, r4, 3
-	std	r4, VCPU_DAWR(r3)
-	std	r5, VCPU_DAWRX(r3)
+	std	r4, VCPU_DAWR0(r3)
+	std	r5, VCPU_DAWRX0(r3)
 	/*
 	 * If came in through the real mode hcall handler then it is necessary
 	 * to write the registers since the return path won't. Otherwise it is
-- 
2.26.2


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox