LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH v2 02/11] perf core: export swevent hrtimer helpers
From: Michael Ellerman @ 2014-02-25  3:33 UTC (permalink / raw)
  To: Cody P Schafer, Linux PPC, Arnaldo Carvalho de Melo, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra
  Cc: LKML
In-Reply-To: <1392415338-16288-3-git-send-email-cody@linux.vnet.ibm.com>

On Fri, 2014-14-02 at 22:02:06 UTC, Cody P Schafer wrote:
> Export the swevent hrtimer helpers currently only used in events/core.c
> to allow the addition of architecture specific sw-like pmus.

Peter, Ingo, can we get your ACK on this please?

cheers


> Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
> ---
>  include/linux/perf_event.h | 5 ++++-
>  kernel/events/core.c       | 8 ++++----
>  2 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 2702e91..24378a9 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -559,7 +559,10 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
>  				int src_cpu, int dst_cpu);
>  extern u64 perf_event_read_value(struct perf_event *event,
>  				 u64 *enabled, u64 *running);
> -
> +extern void perf_swevent_init_hrtimer(struct perf_event *event);
> +extern void perf_swevent_start_hrtimer(struct perf_event *event);
> +extern void perf_swevent_cancel_hrtimer(struct perf_event *event);
> +extern int perf_swevent_event_idx(struct perf_event *event);
>  
>  struct perf_sample_data {
>  	u64				type;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 56003c6..feb0347 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5816,7 +5816,7 @@ static int perf_swevent_init(struct perf_event *event)
>  	return 0;
>  }
>  
> -static int perf_swevent_event_idx(struct perf_event *event)
> +int perf_swevent_event_idx(struct perf_event *event)
>  {
>  	return 0;
>  }
> @@ -6045,7 +6045,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
>  	return ret;
>  }
>  
> -static void perf_swevent_start_hrtimer(struct perf_event *event)
> +void perf_swevent_start_hrtimer(struct perf_event *event)
>  {
>  	struct hw_perf_event *hwc = &event->hw;
>  	s64 period;
> @@ -6067,7 +6067,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
>  				HRTIMER_MODE_REL_PINNED, 0);
>  }
>  
> -static void perf_swevent_cancel_hrtimer(struct perf_event *event)
> +void perf_swevent_cancel_hrtimer(struct perf_event *event)
>  {
>  	struct hw_perf_event *hwc = &event->hw;
>  
> @@ -6079,7 +6079,7 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
>  	}
>  }
>  
> -static void perf_swevent_init_hrtimer(struct perf_event *event)
> +void perf_swevent_init_hrtimer(struct perf_event *event)
>  {
>  	struct hw_perf_event *hwc = &event->hw;
>  
> -- 
> 1.8.5.4
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 
> 

^ permalink raw reply

* Re: [PATCH v2 01/11] perf: add PMU_RANGE_ATTR() helper for use by sw-like pmus
From: Michael Ellerman @ 2014-02-25  3:33 UTC (permalink / raw)
  To: Cody P Schafer, Linux PPC, Arnaldo Carvalho de Melo, Ingo Molnar,
	Paul Mackerras, Peter Zijlstra
  Cc: LKML
In-Reply-To: <1392415338-16288-2-git-send-email-cody@linux.vnet.ibm.com>

On Fri, 2014-14-02 at 22:02:05 UTC, Cody P Schafer wrote:
> Add PMU_RANGE_ATTR() and PMU_RANGE_RESV() (for reserved areas) which
> generate functions to extract the relevent bits from
> event->attr.config{,1,2} for use by sw-like pmus where the
> 'config{,1,2}' values don't map directly to hardware registers.
> 
> Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
> ---
>  include/linux/perf_event.h | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index e56b07f..2702e91 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -871,4 +871,21 @@ _name##_show(struct device *dev,					\
>  									\
>  static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
>  
> +#define PMU_RANGE_ATTR(name, attr_var, bit_start, bit_end)		\
> +PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);		\
> +PMU_RANGE_RESV(name, attr_var, bit_start, bit_end)
> +
> +#define PMU_RANGE_RESV(name, attr_var, bit_start, bit_end)		\
> +static u64 event_get_##name##_max(void)					\
> +{									\
> +	int bits = (bit_end) - (bit_start) + 1;				\
> +	return ((0x1ULL << (bits - 1ULL)) - 1ULL) |			\
> +		(0xFULL << (bits - 4ULL));				\
> +}									\
> +static u64 event_get_##name(struct perf_event *event)			\
> +{									\
> +	return (event->attr.attr_var >> (bit_start)) &			\
> +		event_get_##name##_max();				\
> +}

I still don't like the names.

EVENT_GETTER_AND_FORMAT()
EVENT_RESERVED()

?

It's not clear to me the max routine is useful in general. Can't we just do:

> +#define EVENT_RESERVED(name, attr_var, bit_start, bit_end)		\
> +static u64 event_get_##name(struct perf_event *event)		\
> +{									\
> +	return (event->attr.attr_var >> (bit_start)) &			\
> +		((0x1ULL << ((bit_end) - (bit_start) + 1)) - 1ULL);	\
> +}


cheers

^ permalink raw reply

* Re: [PATCH 1/3] mm: return NUMA_NO_NODE in local_memory_node if zonelists are not setup
From: Nishanth Aravamudan @ 2014-02-25  2:34 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Michal Hocko, linux-mm, Mel Gorman, David Rientjes, Andrew Morton,
	linuxppc-dev, Joonsoo Kim, Anton Blanchard
In-Reply-To: <alpine.DEB.2.10.1402241342480.20839@nuc>

On 24.02.2014 [13:43:31 -0600], Christoph Lameter wrote:
> On Fri, 21 Feb 2014, Nishanth Aravamudan wrote:
> 
> > I added two calls to local_memory_node(), I *think* both are necessary,
> > but am willing to be corrected.
> >
> > One is in map_cpu_to_node() and one is in start_secondary(). The
> > start_secondary() path is fine, AFAICT, as we are up & running at that
> > point. But in [the renamed function] update_numa_cpu_node() which is
> > used by hotplug, we get called from do_init_bootmem(), which is before
> > the zonelists are setup.
> >
> > I think both calls are necessary because I believe the
> > arch_update_cpu_topology() is used for supporting firmware-driven
> > home-noding, which does not invoke start_secondary() again (the
> > processor is already running, we're just updating the topology in that
> > situation).
> >
> > Then again, I could special-case the do_init_bootmem callpath, which is
> > only called at kernel init time?
> 
> Well taht looks to be simpler.

Ok, I'll work on this.

> > > I do agree that calling local_memory_node() too early then trying to
> > > fudge around the consequences seems rather wrong.
> >
> > If the answer is to simply not call local_memory_node() early, I'll
> > submit a patch to at least add a comment, as there's nothing in the code
> > itself to prevent this from happening and is guaranteed to oops.
> 
> Ok.

Thanks!
-Nish

^ permalink raw reply

* [PATCH] powerpc/powernv Platform dump interface
From: Stewart Smith @ 2014-02-25  1:58 UTC (permalink / raw)
  To: Vasant Hegde, benh, linuxppc-dev; +Cc: Stewart Smith

This enables support for userspace to fetch and initiate FSP and
Platform dumps from the service processor (via firmware) through sysfs.

Based on original patch from Vasant Hegde <hegdevasant@linux.vnet.ibm.com>

Flow:
  - We register for OPAL notification events.
  - OPAL sends new dump available notification.
  - We make information on dump available via sysfs
  - Userspace requests dump contents
  - We retrieve the dump via OPAL interface
  - User copies the dump data
  - userspace sends ack for dump
  - We send ACK to OPAL.

sysfs files:
  - We add the /sys/firmware/opal/dump directory
  - echoing 1 (well, anything, but in future we may support
    different dump types) to /sys/firmware/opal/dump/initiate_dump
    will initiate a dump.
  - Each dump that we've been notified of gets a directory
    in /sys/firmware/opal/dump/ with a name of the dump ID (in hex,
    as this is what's used elsewhere to identify the dump).
  - Each dump has files: id, type, dump and acknowledge
    dump is binary and is the dump itself.
    echoing 'ack' to acknowledge (currently any string will do) will
    acknowledge the dump and it will soon after disappear from sysfs.

OPAL APIs:
  - opal_dump_init()
  - opal_dump_info()
  - opal_dump_read()
  - opal_dump_ack()
  - opal_dump_resend_notification()

Currently we are only ever notified for one dump at a time (until
the user explicitly acks the current dump, then we get a notification
of the next dump), but this kernel code should "just work" when OPAL
starts notifying us of all the dumps present.

Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
---
 Documentation/ABI/stable/sysfs-firmware-opal-dump |   29 ++
 arch/powerpc/include/asm/opal.h                   |   12 +
 arch/powerpc/platforms/powernv/Makefile           |    2 +-
 arch/powerpc/platforms/powernv/opal-dump.c        |  511 +++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S    |    5 +
 arch/powerpc/platforms/powernv/opal.c             |    2 +
 6 files changed, 560 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/stable/sysfs-firmware-opal-dump
 create mode 100644 arch/powerpc/platforms/powernv/opal-dump.c

diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-dump b/Documentation/ABI/stable/sysfs-firmware-opal-dump
new file mode 100644
index 0000000..3c2d252
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-firmware-opal-dump
@@ -0,0 +1,29 @@
+What:		/sys/firmware/opal/dump
+Date:		Feb 2014
+Contact:	Stewart Smith <stewart@linux.vnet.ibm.com>
+Description:
+		This directory exposes interfaces for interacting with
+		the FSP and platform dumps through OPAL firmware interface.
+
+		This is only for the powerpc/powernv platform.
+
+		initiate_dump:	When '1' is written to it,
+				we will initiate a dump.
+				Read this file for supported commands.
+
+		0xXXXX:		A directory for dump 0xXXXX (in hex).
+
+		Each dump has the following files:
+		id:		An ASCII representation of the dump ID
+				in hex.
+		type:		An ASCII representation of the type of
+				dump (or 'unknown').
+		dump:		A binary file containing the dump.
+				The size of the dump is the size of this file.
+		acknowledge:	When 'ack' is written to this, we will
+				acknowledge that we've retrieved the
+				dump to the service processor. It will
+				then remove it, making the dump
+				inaccessible.
+				Reading this file will get a list of
+				supported actions.
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 40157e2..3194870 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -154,8 +154,13 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_FLASH_VALIDATE			76
 #define OPAL_FLASH_MANAGE			77
 #define OPAL_FLASH_UPDATE			78
+#define OPAL_DUMP_INIT				81
+#define OPAL_DUMP_INFO				82
+#define OPAL_DUMP_READ				83
+#define OPAL_DUMP_ACK				84
 #define OPAL_GET_MSG				85
 #define OPAL_CHECK_ASYNC_COMPLETION		86
+#define OPAL_DUMP_RESEND			91
 #define OPAL_SYNC_HOST_REBOOT			87
 
 #ifndef __ASSEMBLY__
@@ -237,6 +242,7 @@ enum OpalPendingState {
 	OPAL_EVENT_EPOW			= 0x80,
 	OPAL_EVENT_LED_STATUS		= 0x100,
 	OPAL_EVENT_PCI_ERROR		= 0x200,
+	OPAL_EVENT_DUMP_AVAIL		= 0x400,
 	OPAL_EVENT_MSG_PENDING		= 0x800,
 };
 
@@ -826,6 +832,11 @@ int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type,
 int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result);
 int64_t opal_manage_flash(uint8_t op);
 int64_t opal_update_flash(uint64_t blk_list);
+int64_t opal_dump_init(uint8_t dump_type);
+int64_t opal_dump_info(uint32_t *dump_id, uint32_t *dump_size);
+int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer);
+int64_t opal_dump_ack(uint32_t dump_id);
+int64_t opal_dump_resend_notification(void);
 
 int64_t opal_get_msg(uint64_t buffer, size_t size);
 int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token);
@@ -861,6 +872,7 @@ extern void opal_get_rtc_time(struct rtc_time *tm);
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
 extern void opal_flash_init(void);
+extern void opal_platform_dump_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 8d767fd..3528c11 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,6 +1,6 @@
 obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
-obj-y			+= rng.o
+obj-y			+= rng.o opal-dump.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
new file mode 100644
index 0000000..b29fe04
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -0,0 +1,511 @@
+/*
+ * PowerNV OPAL Dump Interface
+ *
+ * Copyright 2013,2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+#define DUMP_TYPE_FSP	0x01
+
+struct dump_obj {
+	struct kobject  kobj;
+	struct bin_attribute dump_attr;
+	uint32_t	id;  /* becomes object name */
+	uint32_t	size;
+	char		*buffer;
+};
+#define to_dump_obj(x) container_of(x, struct dump_obj, kobj)
+
+struct dump_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr,
+			 const char *buf, size_t count);
+};
+#define to_dump_attr(x) container_of(x, struct dump_attribute, attr)
+
+static ssize_t dump_id_show(struct dump_obj *dump_obj,
+			    struct dump_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "0x%x\n", dump_obj->id);
+}
+
+static ssize_t dump_type_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	/* FIXME: Add OPAL support for getting dump type */
+	return sprintf(buf, "unknown\n");
+}
+
+static ssize_t dump_ack_show(struct dump_obj *dump_obj,
+			     struct dump_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "ack - acknowledge dump\n");
+}
+
+/*
+ * Send acknowledgement to OPAL
+ */
+static int64_t dump_send_ack(uint32_t dump_id)
+{
+	int rc;
+
+	rc = opal_dump_ack(dump_id);
+	if (rc)
+		pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
+			__func__, dump_id, rc);
+	return rc;
+}
+
+static void delay_release_kobj(void *kobj)
+{
+	kobject_put((struct kobject *)kobj);
+}
+
+static ssize_t dump_ack_store(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      const char *buf,
+			      size_t count)
+{
+	dump_send_ack(dump_obj->id);
+	sysfs_schedule_callback(&dump_obj->kobj, delay_release_kobj,
+				&dump_obj->kobj, THIS_MODULE);
+	return count;
+}
+
+/* Attributes of a dump
+ * The binary attribute of the dump itself is dynamic
+ * due to the dynamic size of the dump
+ */
+static struct dump_attribute id_attribute =
+	__ATTR(id, 0666, dump_id_show, NULL);
+static struct dump_attribute type_attribute =
+	__ATTR(type, 0666, dump_type_show, NULL);
+static struct dump_attribute ack_attribute =
+	__ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
+
+static ssize_t init_dump_show(struct dump_obj *dump_obj,
+			      struct dump_attribute *attr,
+			      char *buf)
+{
+	return sprintf(buf, "1 - initiate dump\n");
+}
+
+static int64_t dump_fips_init(uint8_t type)
+{
+	int rc;
+
+	rc = opal_dump_init(type);
+	if (rc)
+		pr_warn("%s: Failed to initiate FipS dump (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static ssize_t init_dump_store(struct dump_obj *dump_obj,
+			       struct dump_attribute *attr,
+			       const char *buf,
+			       size_t count)
+{
+	dump_fips_init(DUMP_TYPE_FSP);
+	pr_info("%s: Initiated FSP dump\n", __func__);
+	return count;
+}
+
+static struct dump_attribute initiate_attribute =
+	__ATTR(initiate_dump, 0600, init_dump_show, init_dump_store);
+
+static struct attribute *initiate_attrs[] = {
+	&initiate_attribute.attr,
+	NULL,
+};
+
+static struct attribute_group initiate_attr_group = {
+	.attrs = initiate_attrs,
+};
+
+static struct kset *dump_kset;
+
+static ssize_t dump_attr_show(struct kobject *kobj,
+			      struct attribute *attr,
+			      char *buf)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(dump, attribute, buf);
+}
+
+static ssize_t dump_attr_store(struct kobject *kobj,
+			       struct attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct dump_attribute *attribute;
+	struct dump_obj *dump;
+
+	attribute = to_dump_attr(attr);
+	dump = to_dump_obj(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(dump, attribute, buf, len);
+}
+
+static const struct sysfs_ops dump_sysfs_ops = {
+	.show = dump_attr_show,
+	.store = dump_attr_store,
+};
+
+static void dump_release(struct kobject *kobj)
+{
+	struct dump_obj *dump;
+
+	dump = to_dump_obj(kobj);
+	vfree(dump->buffer);
+	kfree(dump);
+}
+
+static struct attribute *dump_default_attrs[] = {
+	&id_attribute.attr,
+	&type_attribute.attr,
+	&ack_attribute.attr,
+	NULL,
+};
+
+static struct kobj_type dump_ktype = {
+	.sysfs_ops = &dump_sysfs_ops,
+	.release = &dump_release,
+	.default_attrs = dump_default_attrs,
+};
+
+static void free_dump_sg_list(struct opal_sg_list *list)
+{
+	struct opal_sg_list *sg1;
+	while (list) {
+		sg1 = list->next;
+		kfree(list);
+		list = sg1;
+	}
+	list = NULL;
+}
+
+static struct opal_sg_list *dump_data_to_sglist(struct dump_obj *dump)
+{
+	struct opal_sg_list *sg1, *list = NULL;
+	void *addr;
+	int64_t size;
+
+	addr = dump->buffer;
+	size = dump->size;
+
+	sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!sg1)
+		goto nomem;
+
+	list = sg1;
+	sg1->num_entries = 0;
+	while (size > 0) {
+		/* Translate virtual address to physical address */
+		sg1->entry[sg1->num_entries].data =
+			(void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT);
+
+		if (size > PAGE_SIZE)
+			sg1->entry[sg1->num_entries].length = PAGE_SIZE;
+		else
+			sg1->entry[sg1->num_entries].length = size;
+
+		sg1->num_entries++;
+		if (sg1->num_entries >= SG_ENTRIES_PER_NODE) {
+			sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!sg1->next)
+				goto nomem;
+
+			sg1 = sg1->next;
+			sg1->num_entries = 0;
+		}
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	return list;
+
+nomem:
+	pr_err("%s : Failed to allocate memory\n", __func__);
+	free_dump_sg_list(list);
+	return NULL;
+}
+
+static void sglist_to_phy_addr(struct opal_sg_list *list)
+{
+	struct opal_sg_list *sg, *next;
+
+	for (sg = list; sg; sg = next) {
+		next = sg->next;
+		/* Don't translate NULL pointer for last entry */
+		if (sg->next)
+			sg->next = (struct opal_sg_list *)__pa(sg->next);
+		else
+			sg->next = NULL;
+
+		/* Convert num_entries to length */
+		sg->num_entries =
+			sg->num_entries * sizeof(struct opal_sg_entry) + 16;
+	}
+}
+
+static int64_t dump_read_info(uint32_t *id, uint32_t *size)
+{
+	int rc;
+
+	rc = opal_dump_info(id, size);
+	if (rc)
+		pr_warn("%s: Failed to get dump info (%d)\n",
+			__func__, rc);
+	return rc;
+}
+
+static int64_t dump_read_data(struct dump_obj *dump)
+{
+	struct opal_sg_list *list;
+	uint64_t addr;
+	int64_t rc;
+
+	/* Allocate memory */
+	dump->buffer = vzalloc(PAGE_ALIGN(dump->size));
+	if (!dump->buffer) {
+		pr_err("%s : Failed to allocate memory\n", __func__);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Generate SG list */
+	list = dump_data_to_sglist(dump);
+	if (!list) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Translate sg list addr to real address */
+	sglist_to_phy_addr(list);
+
+	/* First entry address */
+	addr = __pa(list);
+
+	/* Fetch data */
+	rc = OPAL_BUSY_EVENT;
+	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+		rc = opal_dump_read(dump->id, addr);
+		if (rc == OPAL_BUSY_EVENT) {
+			opal_poll_events(NULL);
+			msleep(20);
+		}
+	}
+
+	if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
+		pr_warn("%s: Extract dump failed for ID 0x%x\n",
+			__func__, dump->id);
+
+	/* Free SG list */
+	free_dump_sg_list(list);
+
+out:
+	return rc;
+}
+
+static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
+			      struct bin_attribute *bin_attr,
+			      char *buffer, loff_t pos, size_t count)
+{
+	ssize_t rc;
+
+	struct dump_obj *dump = to_dump_obj(kobj);
+
+	if (!dump->buffer) {
+		rc = dump_read_data(dump);
+
+		if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
+			vfree(dump->buffer);
+			dump->buffer = NULL;
+
+			return -EIO;
+		}
+		if (rc == OPAL_PARTIAL) {
+			/* On a partial read, we just return EIO
+			 * and rely on userspace to ask us to try
+			 * again.
+			 */
+			pr_info("%s: Platform dump partially read.ID = 0x%x\n",
+				__func__, dump->id);
+			return -EIO;
+		}
+	}
+
+	memcpy(buffer, dump->buffer + pos, count);
+
+	/* If we're done reading, let's free the buffer as we
+	 * probably won't need it around anymore (and can always
+	 * re-fetch it from firmware.
+	*/
+	if ((pos+count) >= dump->size) {
+		pr_info("%s: Freeing Platform dump Id = 0x%x\n",
+			__func__, dump->id);
+		vfree(dump->buffer);
+		dump->buffer = NULL;
+	}
+
+	return count;
+}
+
+static struct dump_obj *create_dump_obj(uint32_t id, size_t size)
+{
+	struct dump_obj *dump;
+	int rc;
+
+	dump = kzalloc(sizeof(*dump), GFP_KERNEL);
+	if (!dump)
+		return NULL;
+
+	dump->kobj.kset = dump_kset;
+
+	kobject_init(&dump->kobj, &dump_ktype);
+
+	sysfs_bin_attr_init(&dump->dump_attr);
+
+	dump->dump_attr.attr.name = "dump";
+	dump->dump_attr.attr.mode = 0400;
+	dump->dump_attr.size = size;
+	dump->dump_attr.read = dump_attr_read;
+
+	dump->id = id;
+	dump->size = size;
+
+	rc = kobject_add(&dump->kobj, NULL, "0x%x", id);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return NULL;
+	}
+
+	rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
+	if (rc) {
+		kobject_put(&dump->kobj);
+		return NULL;
+	}
+
+	pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
+		__func__, dump->id, dump->size);
+
+	kobject_uevent(&dump->kobj, KOBJ_ADD);
+
+	return dump;
+}
+
+static int process_dump(void)
+{
+	int rc;
+	uint32_t dump_id, dump_size;
+	struct dump_obj *dump;
+	char name[11];
+
+	rc = dump_read_info(&dump_id, &dump_size);
+	if (rc != OPAL_SUCCESS)
+		return rc;
+
+	sprintf(name, "0x%x", dump_id);
+
+	/* we may get notified twice, let's handle
+	 * that gracefully and not create two conflicting
+	 * entries.
+	 */
+	if (kset_find_obj(dump_kset, name))
+		return 0;
+
+	dump = create_dump_obj(dump_id, dump_size);
+	if (!dump)
+		return -1;
+
+	return 0;
+}
+
+static void dump_work_fn(struct work_struct *work)
+{
+	process_dump();
+}
+
+static DECLARE_WORK(dump_work, dump_work_fn);
+
+static void schedule_process_dump(void)
+{
+	schedule_work(&dump_work);
+}
+
+/*
+ * New dump available notification
+ *
+ * Once we get notification, we add sysfs entries for it.
+ * We only fetch the dump on demand, and create sysfs asynchronously.
+ */
+static int dump_event(struct notifier_block *nb,
+		      unsigned long events, void *change)
+{
+	if (events & OPAL_EVENT_DUMP_AVAIL)
+		schedule_process_dump();
+
+	return 0;
+}
+
+static struct notifier_block dump_nb = {
+	.notifier_call  = dump_event,
+	.next           = NULL,
+	.priority       = 0
+};
+
+void __init opal_platform_dump_init(void)
+{
+	int rc;
+
+	dump_kset = kset_create_and_add("dump", NULL, opal_kobj);
+	if (!dump_kset) {
+		pr_warn("%s: Failed to create dump kset\n", __func__);
+		return;
+	}
+
+	rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group);
+	if (rc) {
+		pr_warn("%s: Failed to create initiate dump attr group\n",
+			__func__);
+		kobject_put(&dump_kset->kobj);
+		return;
+	}
+
+	rc = opal_notifier_register(&dump_nb);
+	if (rc) {
+		pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+			__func__, rc);
+		return;
+	}
+
+	opal_dump_resend_notification();
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3e8829c..eb403da 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -126,6 +126,11 @@ OPAL_CALL(opal_return_cpu,			OPAL_RETURN_CPU);
 OPAL_CALL(opal_validate_flash,			OPAL_FLASH_VALIDATE);
 OPAL_CALL(opal_manage_flash,			OPAL_FLASH_MANAGE);
 OPAL_CALL(opal_update_flash,			OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_dump_init,			OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info,			OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_read,			OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack,			OPAL_DUMP_ACK);
 OPAL_CALL(opal_get_msg,				OPAL_GET_MSG);
 OPAL_CALL(opal_check_completion,		OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification,	OPAL_DUMP_RESEND);
 OPAL_CALL(opal_sync_host_reboot,		OPAL_SYNC_HOST_REBOOT);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 65499ad..262cd1a 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -474,6 +474,8 @@ static int __init opal_init(void)
 	if (rc == 0) {
 		/* Setup code update interface */
 		opal_flash_init();
+		/* Setup platform dump extract interface */
+		opal_platform_dump_init();
 	}
 
 	return 0;
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH] powerpc/crashdump : fix page frame number check in copy_oldmem_page
From: Michael Ellerman @ 2014-02-25  1:47 UTC (permalink / raw)
  To: Laurent Dufour; +Cc: Paul Mackerras, linuxppc-dev
In-Reply-To: <20140224163055.7263.86979.stgit@nimbus>

On Mon, 2014-02-24 at 17:30 +0100, Laurent Dufour wrote:
> In copy_oldmem_page, the current check using max_pfn and min_low_pfn to
> decide if the page is backed or not, is not valid when the memory layout is
> not continuous.
> 
> This happens when running as a QEMU/KVM guest, where RTAS is mapped higher
> in the memory. In that case max_pfn points to the end of RTAS, and a hole
> between the end of the kdump kernel and RTAS is not backed by PTEs. As a
> consequence, the kdump kernel is crashing in copy_oldmem_page when accessing
> in a direct way the pages in that hole.
> 
> This fix relies on the memblock's service memblock_is_region_memory to
> check if the read page is part or not of the directly accessible memory.

Hi Laurent,

This looks good to me, assuming you've tested it on a PowerVM system as well as
under KVM.

cheers

^ permalink raw reply

* Re: [PATCH] ppc476: Enable a linker work around for IBM errata #46
From: Alistair Popple @ 2014-02-24 23:52 UTC (permalink / raw)
  To: Josh Boyer; +Cc: linuxppc-dev
In-Reply-To: <CA+5PVA4Hx4iKkL+vLJjOTwMaKOL-dJPJpSHLjR+nU6MTTGvFcA@mail.gmail.com>

On Mon, 24 Feb 2014 08:35:06 Josh Boyer wrote:
> On Mon, Feb 24, 2014 at 2:00 AM, Alistair Popple <alistair@popple.id.au> 
wrote:
> > This patch adds an option to enable a work around for an icache bug on
> > 476 that can cause execution of stale instructions when falling
> > through pages (IBM errata #46). It requires a recent version of
> > binutils which supports the --ppc476-workaround option.
> > 
> > The work around enables the appropriate linker options and ensures
> > that all module output sections are aligned to 4K page boundaries. The
> > work around is only required when building modules.
> 
> What happens if you're using 64K pages?  Is the alignment 4K always,
> or does it need to be aligned to PAGE_SIZE?

The work around inserts an extra instruction on 4K page boundaries. As a 64K 
(or a 16K) page boundary is also a 4K page boundary the work around should 
cover those page sizes as well.

- Alistair

> josh

^ permalink raw reply

* Re: [PATCH] watchdog: mpc8xxx_wdt: MPC8xx is HW enabled
From: Wim Van Sebroeck @ 2014-02-24 19:51 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: scottwood, linuxppc-dev, linux-kernel, linux-watchdog
In-Reply-To: <20131130154540.E83D743E15@localhost.localdomain>

Hi Christophe,

> MPC8xx watchdog is enabled at startup by HW.
> If the bootloader disables it, it cannot be reenabled.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> 
> diff -ur a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
> --- a/drivers/watchdog/mpc8xxx_wdt.c	2013-05-11 22:57:46.000000000 +0200
> +++ b/drivers/watchdog/mpc8xxx_wdt.c	2013-08-08 02:12:15.000000000 +0200
> @@ -273,6 +310,7 @@
>  		.compatible = "fsl,mpc823-wdt",
>  		.data = &(struct mpc8xxx_wdt_type) {
>  			.prescaler = 0x800,
> +			.hw_enabled = true,
>  		},
>  	},
>  	{},
> 
> ---
> Ce courrier électronique ne contient aucun virus ou logiciel malveillant parce que la protection avast! Antivirus est active.
> http://www.avast.com
> 

This patch has been added to linux-watchdog-next.

Kind regards,
Wim.

^ permalink raw reply

* Re: [RFC PATCH 2/3] topology: support node_numa_mem() for determining the fallback node
From: Christoph Lameter @ 2014-02-24 19:54 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Han Pingtian, Nishanth Aravamudan, Matt Mackall, Pekka Enberg,
	Linux Memory Management List, Paul Mackerras, Anton Blanchard,
	David Rientjes, linuxppc-dev, Wanpeng Li
In-Reply-To: <20140224050851.GB14814@lge.com>

On Mon, 24 Feb 2014, Joonsoo Kim wrote:

> > It will not common get there because of the tracking. Instead a per cpu
> > object will be used.
> > > get_partial_node() always fails even if there are some partial slab on
> > > memoryless node's neareast node.
> >
> > Correct and that leads to a page allocator action whereupon the node will
> > be marked as empty.
>
> Why do we need to request to a page allocator if there is partial slab?
> Checking whether node is memoryless or not is really easy, so we don't need
> to skip this. To skip this is suboptimal solution.

The page allocator action is also used to determine to which other node we
should fall back if the node is empty. So we need to call the page
allocator when the per cpu slab is exhaused with the node of the
memoryless node to get memory from the proper fallback node.

^ permalink raw reply

* Re: [PATCH 1/3] mm: return NUMA_NO_NODE in local_memory_node if zonelists are not setup
From: Christoph Lameter @ 2014-02-24 19:43 UTC (permalink / raw)
  To: Nishanth Aravamudan
  Cc: Michal Hocko, linux-mm, Mel Gorman, David Rientjes, Andrew Morton,
	linuxppc-dev, Joonsoo Kim, Anton Blanchard
In-Reply-To: <20140221235616.GA25399@linux.vnet.ibm.com>

On Fri, 21 Feb 2014, Nishanth Aravamudan wrote:

> I added two calls to local_memory_node(), I *think* both are necessary,
> but am willing to be corrected.
>
> One is in map_cpu_to_node() and one is in start_secondary(). The
> start_secondary() path is fine, AFAICT, as we are up & running at that
> point. But in [the renamed function] update_numa_cpu_node() which is
> used by hotplug, we get called from do_init_bootmem(), which is before
> the zonelists are setup.
>
> I think both calls are necessary because I believe the
> arch_update_cpu_topology() is used for supporting firmware-driven
> home-noding, which does not invoke start_secondary() again (the
> processor is already running, we're just updating the topology in that
> situation).
>
> Then again, I could special-case the do_init_bootmem callpath, which is
> only called at kernel init time?

Well taht looks to be simpler.

> > I do agree that calling local_memory_node() too early then trying to
> > fudge around the consequences seems rather wrong.
>
> If the answer is to simply not call local_memory_node() early, I'll
> submit a patch to at least add a comment, as there's nothing in the code
> itself to prevent this from happening and is guaranteed to oops.

Ok.

^ permalink raw reply

* [PATCH] powerpc/crashdump : fix page frame number check in copy_oldmem_page
From: Laurent Dufour @ 2014-02-24 16:30 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, linuxppc-dev

In copy_oldmem_page, the current check using max_pfn and min_low_pfn to
decide if the page is backed or not, is not valid when the memory layout is
not continuous.

This happens when running as a QEMU/KVM guest, where RTAS is mapped higher
in the memory. In that case max_pfn points to the end of RTAS, and a hole
between the end of the kdump kernel and RTAS is not backed by PTEs. As a
consequence, the kdump kernel is crashing in copy_oldmem_page when accessing
in a direct way the pages in that hole.

This fix relies on the memblock's service memblock_is_region_memory to
check if the read page is part or not of the directly accessible memory.

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/crash_dump.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 11c1d06..7a13f37 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 			size_t csize, unsigned long offset, int userbuf)
 {
 	void  *vaddr;
+	phys_addr_t paddr;

 	if (!csize)
 		return 0;

 	csize = min_t(size_t, csize, PAGE_SIZE);
+	paddr = pfn << PAGE_SHIFT;

-	if ((min_low_pfn < pfn) && (pfn < max_pfn)) {
-		vaddr = __va(pfn << PAGE_SHIFT);
+	if (memblock_is_region_memory(paddr, csize)) {
+		vaddr = __va(paddr);
 		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
 	} else {
-		vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0);
+		vaddr = __ioremap(paddr, PAGE_SIZE, 0);
 		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
 		iounmap(vaddr);
 	}

^ permalink raw reply related

* Re: [PATCH] ppc476: Enable a linker work around for IBM errata #46
From: Josh Boyer @ 2014-02-24 13:35 UTC (permalink / raw)
  To: Alistair Popple; +Cc: linuxppc-dev
In-Reply-To: <1393225256-21122-1-git-send-email-alistair@popple.id.au>

On Mon, Feb 24, 2014 at 2:00 AM, Alistair Popple <alistair@popple.id.au> wrote:
> This patch adds an option to enable a work around for an icache bug on
> 476 that can cause execution of stale instructions when falling
> through pages (IBM errata #46). It requires a recent version of
> binutils which supports the --ppc476-workaround option.
>
> The work around enables the appropriate linker options and ensures
> that all module output sections are aligned to 4K page boundaries. The
> work around is only required when building modules.

What happens if you're using 64K pages?  Is the alignment 4K always,
or does it need to be aligned to PAGE_SIZE?

josh

^ permalink raw reply

* Re: [PATCH RFC v8 2/5] dma: mpc512x: add support for peripheral transfers
From: Andy Shevchenko @ 2014-02-24 13:03 UTC (permalink / raw)
  To: Alexander Popov
  Cc: Lars-Peter Clausen, Arnd Bergmann, Vinod Koul, Gerhard Sittig,
	dmaengine, Dan Williams, Anatolij Gustschin, linuxppc-dev
In-Reply-To: <1393240172-18769-3-git-send-email-a13xp0p0v88@gmail.com>

On Mon, 2014-02-24 at 15:09 +0400, Alexander Popov wrote:
> Introduce support for slave s/g transfer preparation and the associated
> device control callback in the MPC512x DMA controller driver, which adds
> support for data transfers between memory and peripheral I/O to the
> previously supported mem-to-mem transfers.
> 

Few comments below.

> Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
> ---
>  drivers/dma/mpc512x_dma.c | 235 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 230 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
> index 2ce248b..8f504cb 100644
> --- a/drivers/dma/mpc512x_dma.c
> +++ b/drivers/dma/mpc512x_dma.c
> @@ -2,6 +2,7 @@
>   * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
>   * Copyright (C) Semihalf 2009
>   * Copyright (C) Ilya Yanok, Emcraft Systems 2010
> + * Copyright (C) Alexander Popov, Promcontroller 2013
>   *
>   * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
>   * (defines, structures and comments) was taken from MPC5121 DMA driver
> @@ -29,8 +30,17 @@
>   */
>  
>  /*
> - * This is initial version of MPC5121 DMA driver. Only memory to memory
> - * transfers are supported (tested using dmatest module).
> + * MPC512x and MPC8308 DMA driver. It supports
> + * memory to memory data transfers (tested using dmatest module) and
> + * data transfers between memory and peripheral I/O memory
> + * by means of slave s/g with these limitations:
> + *  - chunked transfers (transfers with more than one part) are refused
> + *     as long as proper support for scatter/gather is missing;
> + *  - transfers on MPC8308 always start from software as this SoC appears
> + *     not to have external request lines for peripheral flow control;
> + *  - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently
> + *     source and destination addresses must be 4-byte aligned
> + *     and transfer size must be aligned on (4 * maxburst) boundary;
>   */
>  
>  #include <linux/module.h>
> @@ -189,6 +199,7 @@ struct mpc_dma_desc {
>  	dma_addr_t			tcd_paddr;
>  	int				error;
>  	struct list_head		node;
> +	int				will_access_peripheral;
>  };
>  
>  struct mpc_dma_chan {
> @@ -201,6 +212,10 @@ struct mpc_dma_chan {
>  	struct mpc_dma_tcd		*tcd;
>  	dma_addr_t			tcd_paddr;
>  
> +	/* Settings for access to peripheral FIFO */
> +	dma_addr_t			per_paddr;	/* FIFO address */
> +	u32				tcd_nunits;
> +
>  	/* Lock for this structure */
>  	spinlock_t			lock;
>  };
> @@ -251,8 +266,23 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan)
>  	struct mpc_dma_desc *mdesc;
>  	int cid = mchan->chan.chan_id;
>  
> -	/* Move all queued descriptors to active list */
> -	list_splice_tail_init(&mchan->queued, &mchan->active);
> +	while (!list_empty(&mchan->queued)) {
> +		mdesc = list_first_entry(&mchan->queued,
> +						struct mpc_dma_desc, node);
> +		/*
> +		 * Grab either several mem-to-mem transfer descriptors
> +		 * or one peripheral transfer descriptor,
> +		 * don't mix mem-to-mem and peripheral transfer descriptors
> +		 * within the same 'active' list.
> +		 */
> +		if (mdesc->will_access_peripheral) {
> +			if (list_empty(&mchan->active))
> +				list_move_tail(&mdesc->node, &mchan->active);
> +			break;
> +		} else {
> +			list_move_tail(&mdesc->node, &mchan->active);
> +		}
> +	}
>  
>  	/* Chain descriptors into one transaction */
>  	list_for_each_entry(mdesc, &mchan->active, node) {
> @@ -278,7 +308,17 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan)
>  
>  	if (first != prev)
>  		mdma->tcd[cid].e_sg = 1;
> -	out_8(&mdma->regs->dmassrt, cid);
> +
> +	if (mdma->is_mpc8308) {
> +		/* MPC8308, no request lines, software initiated start */
> +		out_8(&mdma->regs->dmassrt, cid);
> +	} else if (first->will_access_peripheral) {
> +		/* peripherals involved, start by external request signal */

Probably you have to keep style of all comments in the code. For
example, let's start sentences from a capital letter.

> +		out_8(&mdma->regs->dmaserq, cid);
> +	} else {
> +		/* memory to memory transfer, software initiated start */
> +		out_8(&mdma->regs->dmassrt, cid);
> +	}
>  }
>  
>  /* Handle interrupt on one half of DMA controller (32 channels) */
> @@ -596,6 +636,7 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
>  	}
>  
>  	mdesc->error = 0;
> +	mdesc->will_access_peripheral = 0;
>  	tcd = mdesc->tcd;
>  
>  	/* Prepare Transfer Control Descriptor for this transaction */
> @@ -643,6 +684,187 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
>  	return &mdesc->desc;
>  }
>  
> +static struct dma_async_tx_descriptor *
> +mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
> +		unsigned int sg_len, enum dma_transfer_direction direction,
> +		unsigned long flags, void *context)
> +{
> +	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
> +	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
> +	struct mpc_dma_desc *mdesc = NULL;
> +	dma_addr_t per_paddr;
> +	u32 tcd_nunits;
> +	struct mpc_dma_tcd *tcd;
> +	unsigned long iflags;
> +	struct scatterlist *sg;
> +	size_t len;
> +	int iter, i;
> +
> +	/* currently there is no proper support for scatter/gather */
> +	if (sg_len != 1)
> +		return NULL;

You may check direction right there using is_slave_direction().

> +
> +	for_each_sg(sgl, sg, sg_len, i) {
> +		spin_lock_irqsave(&mchan->lock, iflags);
> +
> +		mdesc = list_first_entry(&mchan->free,
> +						struct mpc_dma_desc, node);
> +		if (!mdesc) {
> +			spin_unlock_irqrestore(&mchan->lock, iflags);
> +			/* try to free completed descriptors */
> +			mpc_dma_process_completed(mdma);
> +			return NULL;
> +		}
> +
> +		list_del(&mdesc->node);
> +
> +		per_paddr = mchan->per_paddr;
> +		tcd_nunits = mchan->tcd_nunits;
> +
> +		spin_unlock_irqrestore(&mchan->lock, iflags);
> +
> +		if (per_paddr == 0 || tcd_nunits == 0)
> +			goto err_prep;
> +
> +		mdesc->error = 0;
> +		mdesc->will_access_peripheral = 1;
> +		tcd = mdesc->tcd;
> +
> +		/* Prepare Transfer Control Descriptor for this transaction */
> +

Maybe instead of empty line you can move tcd assignment here.

> +		memset(tcd, 0, sizeof(struct mpc_dma_tcd));
> +
> +		if (!IS_ALIGNED(sg_dma_address(sg), 4))
> +			goto err_prep;
> +
> +		if (direction == DMA_DEV_TO_MEM) {
> +			tcd->saddr = per_paddr;
> +			tcd->daddr = sg_dma_address(sg);
> +			tcd->soff = 0;
> +			tcd->doff = 4;
> +		} else if (direction == DMA_MEM_TO_DEV) {
> +			tcd->saddr = sg_dma_address(sg);
> +			tcd->daddr = per_paddr;
> +			tcd->soff = 4;
> +			tcd->doff = 0;
> +		} else
> +			goto err_prep;

First, keep style of conditionals, second, you may remove this branch
and previous if in case of checking direction outside of loop.

> +
> +		tcd->ssize = MPC_DMA_TSIZE_4;
> +		tcd->dsize = MPC_DMA_TSIZE_4;
> +
> +		len = sg_dma_len(sg);
> +		tcd->nbytes = tcd_nunits * 4;
> +		if (!IS_ALIGNED(len, tcd->nbytes))
> +			goto err_prep;
> +
> +		iter = len / tcd->nbytes;
> +		if (iter >= 1 << 15) {
> +			/* len is too big */
> +			goto err_prep;
> +		} else {

Redundant else branch.

> +			/* citer_linkch contains the high bits of iter */
> +			tcd->biter = iter & 0x1ff;
> +			tcd->biter_linkch = iter >> 9;
> +			tcd->citer = tcd->biter;
> +			tcd->citer_linkch = tcd->biter_linkch;
> +		}
> +
> +		tcd->e_sg = 0;
> +		tcd->d_req = 1;
> +
> +		/* Place descriptor in prepared list */
> +		spin_lock_irqsave(&mchan->lock, iflags);
> +		list_add_tail(&mdesc->node, &mchan->prepared);
> +		spin_unlock_irqrestore(&mchan->lock, iflags);
> +	}
> +
> +	return &mdesc->desc;
> +
> +err_prep:
> +	/* Put the descriptor back */
> +	spin_lock_irqsave(&mchan->lock, iflags);
> +	list_add_tail(&mdesc->node, &mchan->free);
> +	spin_unlock_irqrestore(&mchan->lock, iflags);
> +
> +	return NULL;
> +}
> +
> +static int mpc_dma_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
> +							unsigned long arg)
> +{
> +	struct mpc_dma_chan *mchan;
> +	struct mpc_dma *mdma;
> +	struct dma_slave_config *cfg;
> +	unsigned long flags;
> +
> +	mchan = dma_chan_to_mpc_dma_chan(chan);
> +	switch (cmd) {
> +	case DMA_TERMINATE_ALL:
> +		/* disable channel requests */
> +		mdma = dma_chan_to_mpc_dma(chan);
> +
> +		spin_lock_irqsave(&mchan->lock, flags);
> +
> +		out_8(&mdma->regs->dmacerq, chan->chan_id);
> +		list_splice_tail_init(&mchan->prepared, &mchan->free);
> +		list_splice_tail_init(&mchan->queued, &mchan->free);
> +		list_splice_tail_init(&mchan->active, &mchan->free);
> +
> +		spin_unlock_irqrestore(&mchan->lock, flags);
> +
> +		return 0;
> +	case DMA_SLAVE_CONFIG:
> +		/* Constraints:
> +		 *  - only transfers between a peripheral device and
> +		 *     memory are supported;
> +		 *  - minimal transfer chunk is 4 bytes and consequently
> +		 *     source and destination addresses must be 4-byte aligned
> +		 *     and transfer size must be aligned on (4 * maxburst)
> +		 *     boundary;
> +		 *  - during the transfer RAM address is being incremented by
> +		 *     the size of minimal transfer chunk;
> +		 *  - peripheral port's address is constant during the transfer.
> +		 */
> +
> +		cfg = (void *)arg;
> +
> +		if (cfg->direction != DMA_DEV_TO_MEM &&
> +			cfg->direction != DMA_MEM_TO_DEV)
> +			return -EINVAL;

is_slave_direction()

> +
> +		if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES &&
> +			cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
> +			return -EINVAL;
> +
> +		spin_lock_irqsave(&mchan->lock, flags);
> +
> +		if (cfg->direction == DMA_DEV_TO_MEM) {
> +			mchan->per_paddr = cfg->src_addr;
> +			mchan->tcd_nunits = cfg->src_maxburst;
> +		} else {
> +			mchan->per_paddr = cfg->dst_addr;
> +			mchan->tcd_nunits = cfg->dst_maxburst;
> +		}
> +
> +		if (!IS_ALIGNED(mchan->per_paddr, 4)) {
> +			spin_unlock_irqrestore(&mchan->lock, flags);
> +			return -EINVAL;
> +		}
> +
> +		if (mchan->tcd_nunits == 0)
> +			mchan->tcd_nunits = 1;	/* apply default */
> +
> +		spin_unlock_irqrestore(&mchan->lock, flags);
> +
> +		return 0;
> +	default:
> +		return -ENOSYS;

Use break here.

> +	}
> +
> +	return -EINVAL;
> +}
> +
>  static int mpc_dma_probe(struct platform_device *op)
>  {
>  	struct device_node *dn = op->dev.of_node;
> @@ -727,9 +949,12 @@ static int mpc_dma_probe(struct platform_device *op)
>  	dma->device_issue_pending = mpc_dma_issue_pending;
>  	dma->device_tx_status = mpc_dma_tx_status;
>  	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
> +	dma->device_prep_slave_sg = mpc_dma_prep_slave_sg;
> +	dma->device_control = mpc_dma_device_control;
>  
>  	INIT_LIST_HEAD(&dma->channels);
>  	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
> +	dma_cap_set(DMA_SLAVE, dma->cap_mask);
>  
>  	for (i = 0; i < dma->chancnt; i++) {
>  		mchan = &mdma->channels[i];


-- 
Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Intel Finland Oy

^ permalink raw reply

* Re: [PATCH RFC v8 5/5] dma: mpc512x: register for device tree channel lookup
From: Andy Shevchenko @ 2014-02-24 13:08 UTC (permalink / raw)
  To: Alexander Popov
  Cc: devicetree, Lars-Peter Clausen, Arnd Bergmann, Vinod Koul,
	Gerhard Sittig, dmaengine, Dan Williams, Anatolij Gustschin,
	linuxppc-dev
In-Reply-To: <1393240172-18769-6-git-send-email-a13xp0p0v88@gmail.com>

On Mon, 2014-02-24 at 15:09 +0400, Alexander Popov wrote:
> From: Gerhard Sittig <gsi@denx.de>
> 
> register the controller for device tree based lookup of DMA channels
> (non-fatal for backwards compatibility with older device trees) and
> provide the '#dma-cells' property in the shared mpc5121.dtsi file
> 
> Signed-off-by: Gerhard Sittig <gsi@denx.de>
> [ a13xp0p0v88@gmail.com: resolve little patch conflict and put
>   MPC512x DMA controller bindings document to a separate patch ]
> ---
>  arch/powerpc/boot/dts/mpc5121.dtsi |  1 +
>  drivers/dma/mpc512x_dma.c          | 21 ++++++++++++++++++---
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
> index 2c0e155..7f9d14f 100644
> --- a/arch/powerpc/boot/dts/mpc5121.dtsi
> +++ b/arch/powerpc/boot/dts/mpc5121.dtsi
> @@ -498,6 +498,7 @@
>  			compatible = "fsl,mpc5121-dma";
>  			reg = <0x14000 0x1800>;
>  			interrupts = <65 0x8>;
> +			#dma-cells = <1>;
>  		};
>  	};
>  
> diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
> index 8f504cb..d9f8740 100644
> --- a/drivers/dma/mpc512x_dma.c
> +++ b/drivers/dma/mpc512x_dma.c
> @@ -52,6 +52,7 @@
>  #include <linux/of_address.h>
>  #include <linux/of_device.h>
>  #include <linux/of_irq.h>
> +#include <linux/of_dma.h>
>  #include <linux/of_platform.h>
>  
>  #include <linux/random.h>
> @@ -1018,11 +1019,23 @@ static int mpc_dma_probe(struct platform_device *op)
>  	/* Register DMA engine */
>  	dev_set_drvdata(dev, mdma);
>  	retval = dma_async_device_register(dma);
> -	if (retval) {
> -		devm_free_irq(dev, mdma->irq, mdma);
> -		irq_dispose_mapping(mdma->irq);
> +	if (retval)
> +		goto out_irq;
> +
> +	/* register with OF helpers for DMA lookups (nonfatal) */
> +	if (dev->of_node) {
> +		retval = of_dma_controller_register(dev->of_node,
> +						    of_dma_xlate_by_chan_id,
> +						    mdma);
> +		if (retval)
> +			dev_warn(dev, "could not register for OF lookup\n");
>  	}
>  
> +	return 0;
> +
> +out_irq:
> +	devm_free_irq(dev, mdma->irq, mdma);

Something wrong either with devm_request_irq() or you don't need to call
devm_free_irq() explicitly. Once we already try to discuss this earlier
in this mailing list with Lars-Peter(?), though there were no solution
how to keep devm_*_irq usability.

> +	irq_dispose_mapping(mdma->irq);
>  	return retval;
>  }
>  
> @@ -1031,6 +1044,8 @@ static int mpc_dma_remove(struct platform_device *op)
>  	struct device *dev = &op->dev;
>  	struct mpc_dma *mdma = dev_get_drvdata(dev);
>  
> +	if (dev->of_node)
> +		of_dma_controller_free(dev->of_node);
>  	dma_async_device_unregister(&mdma->dma);
>  	devm_free_irq(dev, mdma->irq, mdma);
>  	irq_dispose_mapping(mdma->irq);


-- 
Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Intel Finland Oy

^ permalink raw reply

* Re: [patch 03/26] powerpc: eeh: Kill another abuse of irq_desc
From: Thomas Gleixner @ 2014-02-24 11:32 UTC (permalink / raw)
  To: Gavin Shan; +Cc: Peter Zijlstra, Ingo Molnar, ppc, LKML
In-Reply-To: <20140224075607.GA20727@shangw.(null)>

On Mon, 24 Feb 2014, Gavin Shan wrote:
> On Sun, Feb 23, 2014 at 09:40:09PM -0000, Thomas Gleixner wrote:
> >commit 91150af3a (powerpc/eeh: Fix unbalanced enable for IRQ) is
> >another brilliant example of trainwreck engineering.
> >
> >The patch "fixes" the issue of an unbalanced call to irq_enable()
> >which causes a prominent warning by checking the disabled state of the
> >interrupt line and call conditionally into the core code.
> >
> >This is wrong in two aspects:
> >
> >1) The warning is there to tell users, that they need to fix their
> >   asymetric enable/disable patterns by finding the root cause and
> >   solving it there.
> >
> >   It's definitely not meant to work around it by conditionally
> >   calling into the core code depending on the random state of the irq
> >   line.
> >
> >   Asymetric irq_disable/enable calls are a clear sign of wrong usage
> >   of the interfaces which have to be cured at the root and not by
> >   somehow hacking around it.
> >
> >2) The abuse of core internal data structure instead of using the
> >   proper interfaces for retrieving the information for the 'hack
> >   around'
> >
> >   irq_desc is core internal and it's clear enough stated.
> >
> >Replace at least the irq_desc abuse with the proper functions and add
> >a big fat comment why this is absurd and completely wrong.
> >
> 
> Thanks for pointing it out. I think we might have this patch for now
> and I'll look into individual drivers to fix the unbalanced function
> calls later one by one.

Fine with me. You wont escape my scan scripts :)

Thanks,

	tglx

^ permalink raw reply

* [PATCH RFC v8 5/5] dma: mpc512x: register for device tree channel lookup
From: Alexander Popov @ 2014-02-24 11:09 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Alexander Popov, linuxppc-dev,
	dmaengine
  Cc: devicetree
In-Reply-To: <1393240172-18769-1-git-send-email-a13xp0p0v88@gmail.com>

From: Gerhard Sittig <gsi@denx.de>

register the controller for device tree based lookup of DMA channels
(non-fatal for backwards compatibility with older device trees) and
provide the '#dma-cells' property in the shared mpc5121.dtsi file

Signed-off-by: Gerhard Sittig <gsi@denx.de>
[ a13xp0p0v88@gmail.com: resolve little patch conflict and put
  MPC512x DMA controller bindings document to a separate patch ]
---
 arch/powerpc/boot/dts/mpc5121.dtsi |  1 +
 drivers/dma/mpc512x_dma.c          | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 2c0e155..7f9d14f 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -498,6 +498,7 @@
 			compatible = "fsl,mpc5121-dma";
 			reg = <0x14000 0x1800>;
 			interrupts = <65 0x8>;
+			#dma-cells = <1>;
 		};
 	};
 
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 8f504cb..d9f8740 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -52,6 +52,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
+#include <linux/of_dma.h>
 #include <linux/of_platform.h>
 
 #include <linux/random.h>
@@ -1018,11 +1019,23 @@ static int mpc_dma_probe(struct platform_device *op)
 	/* Register DMA engine */
 	dev_set_drvdata(dev, mdma);
 	retval = dma_async_device_register(dma);
-	if (retval) {
-		devm_free_irq(dev, mdma->irq, mdma);
-		irq_dispose_mapping(mdma->irq);
+	if (retval)
+		goto out_irq;
+
+	/* register with OF helpers for DMA lookups (nonfatal) */
+	if (dev->of_node) {
+		retval = of_dma_controller_register(dev->of_node,
+						    of_dma_xlate_by_chan_id,
+						    mdma);
+		if (retval)
+			dev_warn(dev, "could not register for OF lookup\n");
 	}
 
+	return 0;
+
+out_irq:
+	devm_free_irq(dev, mdma->irq, mdma);
+	irq_dispose_mapping(mdma->irq);
 	return retval;
 }
 
@@ -1031,6 +1044,8 @@ static int mpc_dma_remove(struct platform_device *op)
 	struct device *dev = &op->dev;
 	struct mpc_dma *mdma = dev_get_drvdata(dev);
 
+	if (dev->of_node)
+		of_dma_controller_free(dev->of_node);
 	dma_async_device_unregister(&mdma->dma);
 	devm_free_irq(dev, mdma->irq, mdma);
 	irq_dispose_mapping(mdma->irq);
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH RFC v8 4/5] dma: mpc512x: add device tree binding document
From: Alexander Popov @ 2014-02-24 11:09 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Alexander Popov, linuxppc-dev,
	dmaengine
  Cc: devicetree
In-Reply-To: <1393240172-18769-1-git-send-email-a13xp0p0v88@gmail.com>

From: Gerhard Sittig <gsi@denx.de>

introduce a device tree binding document for the MPC512x DMA controller

Signed-off-by: Gerhard Sittig <gsi@denx.de>
[ a13xp0p0v88@gmail.com: turn this into a separate patch ]
---
 .../devicetree/bindings/dma/mpc512x-dma.txt        | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/dma/mpc512x-dma.txt

diff --git a/Documentation/devicetree/bindings/dma/mpc512x-dma.txt b/Documentation/devicetree/bindings/dma/mpc512x-dma.txt
new file mode 100644
index 0000000..a4867d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mpc512x-dma.txt
@@ -0,0 +1,55 @@
+* Freescale MPC512x DMA Controller
+
+The DMA controller in the Freescale MPC512x SoC can move blocks of
+memory contents between memory and peripherals or memory to memory.
+
+Refer to the "Generic DMA Controller and DMA request bindings" description
+in the dma.txt file for a more detailled discussion of the binding.  The
+MPC512x DMA engine binding follows the common scheme, but doesn't provide
+support for the optional channels and requests counters (those values are
+derived from the detected hardware features) and has a fixed client
+specifier length of 1 integer cell (the value is the DMA channel, since
+the DMA controller uses a fixed assignment of request lines per channel).
+
+
+DMA controller node properties:
+
+Required properties:
+- compatible:		should be "fsl,mpc5121-dma"
+- reg:			address and size of the DMA controller's register set
+- interrupts:		interrupt spec for the DMA controller
+
+Optional properties:
+- #dma-cells:		must be <1>, describes the number of integer cells
+			needed to specify the 'dmas' property in client nodes,
+			strongly recommended since common client helper code
+			uses this property
+
+Example:
+
+	dma0: dma@14000 {
+		compatible = "fsl,mpc5121-dma";
+		reg = <0x14000 0x1800>;
+		interrupts = <65 0x8>;
+		#dma-cells = <1>;
+	};
+
+
+Client node properties:
+
+Required properties:
+- dmas:			list of DMA specifiers, consisting each of a handle
+			for the DMA controller and integer cells to specify
+			the channel used within the DMA controller
+- dma-names:		list of identifier strings for the DMA specifiers,
+			client device driver code uses these strings to
+			have DMA channels looked up at the controller
+
+Example:
+
+	sdhc@1500 {
+		compatible = "fsl,mpc5121-sdhc";
+		/* ... */
+		dmas = <&dma0 30>;
+		dma-names = "rx-tx";
+	};
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH RFC v8 3/5] dma: of: Add common xlate function for matching by channel id
From: Alexander Popov @ 2014-02-24 11:09 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Alexander Popov, linuxppc-dev,
	dmaengine
  Cc: devicetree
In-Reply-To: <1393240172-18769-1-git-send-email-a13xp0p0v88@gmail.com>

This patch adds a new common OF dma xlate callback function which will match a
channel by it's id. The binding expects one integer argument which it will use to
lookup the channel by the id.

Unlike of_dma_simple_xlate this function is able to handle a system with
multiple DMA controllers. When registering the of dma provider with
of_dma_controller_register a pointer to the dma_device struct which is
associated with the dt node needs to passed as the data parameter.
New function will use this pointer to match only channels which belong to the
specified DMA controller.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
---
 drivers/dma/of-dma.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/of_dma.h |  4 ++++
 2 files changed, 39 insertions(+)

diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index e8fe9dc..d5fbeaa 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dma_device *dev = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate = NULL;
+
+	if (!dev || dma_spec->args_count != 1)
+		return NULL;
+
+	list_for_each_entry(chan, &dev->channels, device_node)
+		if (chan->chan_id == dma_spec->args[0]) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index ae36298..56bc026 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -41,6 +41,8 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+extern struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma);
 #else
 static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
@@ -66,6 +68,8 @@ static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_s
 	return NULL;
 }
 
+#define of_dma_xlate_by_chan_id NULL
+
 #endif
 
 #endif /* __LINUX_OF_DMA_H */
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH RFC v8 2/5] dma: mpc512x: add support for peripheral transfers
From: Alexander Popov @ 2014-02-24 11:09 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Alexander Popov, linuxppc-dev,
	dmaengine
In-Reply-To: <1393240172-18769-1-git-send-email-a13xp0p0v88@gmail.com>

Introduce support for slave s/g transfer preparation and the associated
device control callback in the MPC512x DMA controller driver, which adds
support for data transfers between memory and peripheral I/O to the
previously supported mem-to-mem transfers.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
---
 drivers/dma/mpc512x_dma.c | 235 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 230 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 2ce248b..8f504cb 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -2,6 +2,7 @@
  * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
  * Copyright (C) Semihalf 2009
  * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2013
  *
  * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
  * (defines, structures and comments) was taken from MPC5121 DMA driver
@@ -29,8 +30,17 @@
  */
 
 /*
- * This is initial version of MPC5121 DMA driver. Only memory to memory
- * transfers are supported (tested using dmatest module).
+ * MPC512x and MPC8308 DMA driver. It supports
+ * memory to memory data transfers (tested using dmatest module) and
+ * data transfers between memory and peripheral I/O memory
+ * by means of slave s/g with these limitations:
+ *  - chunked transfers (transfers with more than one part) are refused
+ *     as long as proper support for scatter/gather is missing;
+ *  - transfers on MPC8308 always start from software as this SoC appears
+ *     not to have external request lines for peripheral flow control;
+ *  - minimal memory <-> I/O memory transfer chunk is 4 bytes and consequently
+ *     source and destination addresses must be 4-byte aligned
+ *     and transfer size must be aligned on (4 * maxburst) boundary;
  */
 
 #include <linux/module.h>
@@ -189,6 +199,7 @@ struct mpc_dma_desc {
 	dma_addr_t			tcd_paddr;
 	int				error;
 	struct list_head		node;
+	int				will_access_peripheral;
 };
 
 struct mpc_dma_chan {
@@ -201,6 +212,10 @@ struct mpc_dma_chan {
 	struct mpc_dma_tcd		*tcd;
 	dma_addr_t			tcd_paddr;
 
+	/* Settings for access to peripheral FIFO */
+	dma_addr_t			per_paddr;	/* FIFO address */
+	u32				tcd_nunits;
+
 	/* Lock for this structure */
 	spinlock_t			lock;
 };
@@ -251,8 +266,23 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan)
 	struct mpc_dma_desc *mdesc;
 	int cid = mchan->chan.chan_id;
 
-	/* Move all queued descriptors to active list */
-	list_splice_tail_init(&mchan->queued, &mchan->active);
+	while (!list_empty(&mchan->queued)) {
+		mdesc = list_first_entry(&mchan->queued,
+						struct mpc_dma_desc, node);
+		/*
+		 * Grab either several mem-to-mem transfer descriptors
+		 * or one peripheral transfer descriptor,
+		 * don't mix mem-to-mem and peripheral transfer descriptors
+		 * within the same 'active' list.
+		 */
+		if (mdesc->will_access_peripheral) {
+			if (list_empty(&mchan->active))
+				list_move_tail(&mdesc->node, &mchan->active);
+			break;
+		} else {
+			list_move_tail(&mdesc->node, &mchan->active);
+		}
+	}
 
 	/* Chain descriptors into one transaction */
 	list_for_each_entry(mdesc, &mchan->active, node) {
@@ -278,7 +308,17 @@ static void mpc_dma_execute(struct mpc_dma_chan *mchan)
 
 	if (first != prev)
 		mdma->tcd[cid].e_sg = 1;
-	out_8(&mdma->regs->dmassrt, cid);
+
+	if (mdma->is_mpc8308) {
+		/* MPC8308, no request lines, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	} else if (first->will_access_peripheral) {
+		/* peripherals involved, start by external request signal */
+		out_8(&mdma->regs->dmaserq, cid);
+	} else {
+		/* memory to memory transfer, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	}
 }
 
 /* Handle interrupt on one half of DMA controller (32 channels) */
@@ -596,6 +636,7 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
 	}
 
 	mdesc->error = 0;
+	mdesc->will_access_peripheral = 0;
 	tcd = mdesc->tcd;
 
 	/* Prepare Transfer Control Descriptor for this transaction */
@@ -643,6 +684,187 @@ mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
 	return &mdesc->desc;
 }
 
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	dma_addr_t per_paddr;
+	u32 tcd_nunits;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+	struct scatterlist *sg;
+	size_t len;
+	int iter, i;
+
+	/* currently there is no proper support for scatter/gather */
+	if (sg_len != 1)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		spin_lock_irqsave(&mchan->lock, iflags);
+
+		mdesc = list_first_entry(&mchan->free,
+						struct mpc_dma_desc, node);
+		if (!mdesc) {
+			spin_unlock_irqrestore(&mchan->lock, iflags);
+			/* try to free completed descriptors */
+			mpc_dma_process_completed(mdma);
+			return NULL;
+		}
+
+		list_del(&mdesc->node);
+
+		per_paddr = mchan->per_paddr;
+		tcd_nunits = mchan->tcd_nunits;
+
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+
+		if (per_paddr == 0 || tcd_nunits == 0)
+			goto err_prep;
+
+		mdesc->error = 0;
+		mdesc->will_access_peripheral = 1;
+		tcd = mdesc->tcd;
+
+		/* Prepare Transfer Control Descriptor for this transaction */
+
+		memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+		if (!IS_ALIGNED(sg_dma_address(sg), 4))
+			goto err_prep;
+
+		if (direction == DMA_DEV_TO_MEM) {
+			tcd->saddr = per_paddr;
+			tcd->daddr = sg_dma_address(sg);
+			tcd->soff = 0;
+			tcd->doff = 4;
+		} else if (direction == DMA_MEM_TO_DEV) {
+			tcd->saddr = sg_dma_address(sg);
+			tcd->daddr = per_paddr;
+			tcd->soff = 4;
+			tcd->doff = 0;
+		} else
+			goto err_prep;
+
+		tcd->ssize = MPC_DMA_TSIZE_4;
+		tcd->dsize = MPC_DMA_TSIZE_4;
+
+		len = sg_dma_len(sg);
+		tcd->nbytes = tcd_nunits * 4;
+		if (!IS_ALIGNED(len, tcd->nbytes))
+			goto err_prep;
+
+		iter = len / tcd->nbytes;
+		if (iter >= 1 << 15) {
+			/* len is too big */
+			goto err_prep;
+		} else {
+			/* citer_linkch contains the high bits of iter */
+			tcd->biter = iter & 0x1ff;
+			tcd->biter_linkch = iter >> 9;
+			tcd->citer = tcd->biter;
+			tcd->citer_linkch = tcd->biter_linkch;
+		}
+
+		tcd->e_sg = 0;
+		tcd->d_req = 1;
+
+		/* Place descriptor in prepared list */
+		spin_lock_irqsave(&mchan->lock, iflags);
+		list_add_tail(&mdesc->node, &mchan->prepared);
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+	}
+
+	return &mdesc->desc;
+
+err_prep:
+	/* Put the descriptor back */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return NULL;
+}
+
+static int mpc_dma_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+							unsigned long arg)
+{
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma *mdma;
+	struct dma_slave_config *cfg;
+	unsigned long flags;
+
+	mchan = dma_chan_to_mpc_dma_chan(chan);
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		/* disable channel requests */
+		mdma = dma_chan_to_mpc_dma(chan);
+
+		spin_lock_irqsave(&mchan->lock, flags);
+
+		out_8(&mdma->regs->dmacerq, chan->chan_id);
+		list_splice_tail_init(&mchan->prepared, &mchan->free);
+		list_splice_tail_init(&mchan->queued, &mchan->free);
+		list_splice_tail_init(&mchan->active, &mchan->free);
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		return 0;
+	case DMA_SLAVE_CONFIG:
+		/* Constraints:
+		 *  - only transfers between a peripheral device and
+		 *     memory are supported;
+		 *  - minimal transfer chunk is 4 bytes and consequently
+		 *     source and destination addresses must be 4-byte aligned
+		 *     and transfer size must be aligned on (4 * maxburst)
+		 *     boundary;
+		 *  - during the transfer RAM address is being incremented by
+		 *     the size of minimal transfer chunk;
+		 *  - peripheral port's address is constant during the transfer.
+		 */
+
+		cfg = (void *)arg;
+
+		if (cfg->direction != DMA_DEV_TO_MEM &&
+			cfg->direction != DMA_MEM_TO_DEV)
+			return -EINVAL;
+
+		if (cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES &&
+			cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return -EINVAL;
+
+		spin_lock_irqsave(&mchan->lock, flags);
+
+		if (cfg->direction == DMA_DEV_TO_MEM) {
+			mchan->per_paddr = cfg->src_addr;
+			mchan->tcd_nunits = cfg->src_maxburst;
+		} else {
+			mchan->per_paddr = cfg->dst_addr;
+			mchan->tcd_nunits = cfg->dst_maxburst;
+		}
+
+		if (!IS_ALIGNED(mchan->per_paddr, 4)) {
+			spin_unlock_irqrestore(&mchan->lock, flags);
+			return -EINVAL;
+		}
+
+		if (mchan->tcd_nunits == 0)
+			mchan->tcd_nunits = 1;	/* apply default */
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		return 0;
+	default:
+		return -ENOSYS;
+	}
+
+	return -EINVAL;
+}
+
 static int mpc_dma_probe(struct platform_device *op)
 {
 	struct device_node *dn = op->dev.of_node;
@@ -727,9 +949,12 @@ static int mpc_dma_probe(struct platform_device *op)
 	dma->device_issue_pending = mpc_dma_issue_pending;
 	dma->device_tx_status = mpc_dma_tx_status;
 	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+	dma->device_prep_slave_sg = mpc_dma_prep_slave_sg;
+	dma->device_control = mpc_dma_device_control;
 
 	INIT_LIST_HEAD(&dma->channels);
 	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
 
 	for (i = 0; i < dma->chancnt; i++) {
 		mchan = &mdma->channels[i];
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH RFC v8 0/5] MPC512x DMA slave s/g support, OF DMA lookup
From: Alexander Popov @ 2014-02-24 11:09 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Alexander Popov, linuxppc-dev,
	dmaengine
  Cc: devicetree

2013/7/14 Gerhard Sittig <gsi@denx.de>:
> this series
> - introduces slave s/g support (that's support for DMA transfers which
>    involve peripherals in contrast to mem-to-mem transfers)
> - adds device tree based lookup support for DMA channels
> - combines floating patches and related feedback which already covered
>    several aspects of what the suggested LPB driver needs, to demonstrate
>    how integration might be done
> - carries Q&D SD card support to enable another DMA client during test,
>    while this patch needs to get dropped upon pickup

Changes in v2:
> - re-order mpc8308 related code paths for improved readability, no
>    change in behaviour, introduction of symbolic channel names here
>    already
> - squash 'execute() start condition' and 'terminate all' into the
>    introduction of 'slave s/g prep' and 'device control' support; refuse
>    s/g lists with more than one item since slave support is operational
>    yet proper s/g support is missing (can get addressed later)
> - always start transfers from software on MPC8308 as there are no
>    external request lines for peripheral flow control
> - drop dt-bindings header file and symbolic channel names in OF nodes

Changes in v3 and v4:
 Part 1/5:
 - use #define instead of enum since individual channels don't require
    special handling.
 Part 2/5:
 - add a flag "will_access_peripheral" to DMA transfer descriptor
    according recommendations of Gerhard Sittig.
    This flag is set in mpc_dma_prep_memcpy() and mpc_dma_prep_slave_sg()
    and is evaluated in mpc_dma_execute() to choose a type of start for
    the transfer.
 - prevent descriptors of transfers which involve peripherals from
    being chained together;
    each of such transfers needs hardware initiated start.
 - add locking while working with struct mpc_dma_chan
    according recommendations of Lars-Peter Clausen.
 - remove default nbytes value. Client kernel modules must set
    src_maxburst and dst_maxburst fields of struct dma_slave_config (dmaengine.h).

Changes in v5:
 Part 2/5:
 - add and improve comments;
 - improve the code moving transfer descriptors from 'queued' to 'active' list
    in mpc_dma_execute();
 - allow mpc_dma_prep_slave_sg() to run with non-empty 'active' list;
 - take 'mdesc' back to 'free' list in case of error in mpc_dma_prep_slave_sg();
 - improve checks of the transfer parameters;
 - provide the default value for 'maxburst' in mpc_dma_device_control().

Changes in v6:
 Part 2/5:
 - remove doubtful comment;
 - fix coding style issues;
 - set default value for 'maxburst' to 1 which applies to most cases;
 Part 3/5:
 - use dma_get_slave_channel() instead of dma_request_channel()
    in new function of_dma_xlate_by_chan_id() according recommendations of
    Arnd Bergmann;
 Part 4/5:
 - set DMA_PRIVATE flag for MPC512x DMA controller since its driver relies on
    of_dma_xlate_by_chan_id() which doesn't use dma_request_channel()
    any more; (removed in v7)
 - resolve little patch conflict;
 Part 5/5:
 - resolve little patch conflict;

Changes in v7:
 Part 2:
 - improve comment;
 Part 4:
 - split in two separate patches. Part 4/6 contains device tree
    binding document and in part 5/6 MPC512x DMA controller is registered
    for device tree channel lookup;
 - remove setting DMA_PRIVATE flag for MPC512x DMA controller from part 5/6;

Changes in v8:
 Part 2:
 - improve comments;
 - fix style issues;
 Part 6:
 - remove since it has become obsolete;

> known issues:
> - it's yet to get confirmed whether MPC8308 can use slave support or
>    whether the DMA controller's driver shall actively reject it, the
>    information that's available so far suggests that peripheral transfers
>    to IP bus attached I/O is useful and shall not get blocked right away
 - adding support for transfers which don't increment the RAM address or
    do increment the peripheral "port's" address is easy with
    this implementation; but which options of the common API
    should be used for specifying such transfers?
2014/02/13 Gerhard Sittig <gsi@denx.de>:
> - The MPC512x DMA completely lacks a binding document, so one
>    should get added.
> - The MPC8308 hardware is similar and can re-use the MPC512x
>    binding, which should be stated.
> - The Linux implementation currently has no OF based channel
>    lookup support, so '#dma-cells' is "a future feature".  I guess
>    the binding can and should already discuss the feature,
>    regardless of whether all implementations support it.


Alexander Popov (3):
  dma: mpc512x: reorder mpc8308 specific instructions
  dma: mpc512x: add support for peripheral transfers
  dma: of: Add common xlate function for matching by channel id

Gerhard Sittig (2):
  dma: mpc512x: add device tree binding document
  dma: mpc512x: register for device tree channel lookup

 .../devicetree/bindings/dma/mpc512x-dma.txt        |  55 ++++
 arch/powerpc/boot/dts/mpc5121.dtsi                 |   1 +
 drivers/dma/mpc512x_dma.c                          | 298 +++++++++++++++++++--
 drivers/dma/of-dma.c                               |  35 +++
 include/linux/of_dma.h                             |   4 +
 5 files changed, 368 insertions(+), 25 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/dma/mpc512x-dma.txt

-- 
1.8.4.2

^ permalink raw reply

* [PATCH RFC v8 1/5] dma: mpc512x: reorder mpc8308 specific instructions
From: Alexander Popov @ 2014-02-24 11:09 UTC (permalink / raw)
  To: Gerhard Sittig, Dan Williams, Vinod Koul, Lars-Peter Clausen,
	Arnd Bergmann, Anatolij Gustschin, Alexander Popov, linuxppc-dev,
	dmaengine
In-Reply-To: <1393240172-18769-1-git-send-email-a13xp0p0v88@gmail.com>

Concentrate the specific code for MPC8308 in the 'if' branch
and handle MPC512x in the 'else' branch.
This modification only reorders instructions but doesn't change behaviour.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
Acked-by: Anatolij Gustschin <agust@denx.de>
Acked-by: Gerhard Sittig <gsi@denx.de>
---
 drivers/dma/mpc512x_dma.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 448750d..2ce248b 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -52,9 +52,17 @@
 #define MPC_DMA_DESCRIPTORS	64
 
 /* Macro definitions */
-#define MPC_DMA_CHANNELS	64
 #define MPC_DMA_TCD_OFFSET	0x1000
 
+/*
+ * Maximum channel counts for individual hardware variants
+ * and the maximum channel count over all supported controllers,
+ * used for data structure size
+ */
+#define MPC8308_DMACHAN_MAX	16
+#define MPC512x_DMACHAN_MAX	64
+#define MPC_DMA_CHANNELS	64
+
 /* Arbitration mode of group and channel */
 #define MPC_DMA_DMACR_EDCG	(1 << 31)
 #define MPC_DMA_DMACR_ERGA	(1 << 3)
@@ -710,10 +718,10 @@ static int mpc_dma_probe(struct platform_device *op)
 
 	dma = &mdma->dma;
 	dma->dev = dev;
-	if (!mdma->is_mpc8308)
-		dma->chancnt = MPC_DMA_CHANNELS;
+	if (mdma->is_mpc8308)
+		dma->chancnt = MPC8308_DMACHAN_MAX;
 	else
-		dma->chancnt = 16; /* MPC8308 DMA has only 16 channels */
+		dma->chancnt = MPC512x_DMACHAN_MAX;
 	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
 	dma->device_issue_pending = mpc_dma_issue_pending;
@@ -747,7 +755,19 @@ static int mpc_dma_probe(struct platform_device *op)
 	 * - Round-robin group arbitration,
 	 * - Round-robin channel arbitration.
 	 */
-	if (!mdma->is_mpc8308) {
+	if (mdma->is_mpc8308) {
+		/* MPC8308 has 16 channels and lacks some registers */
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+		/* enable snooping */
+		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmaintl, 0xFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+	} else {
 		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
 					MPC_DMA_DMACR_ERGA | MPC_DMA_DMACR_ERCA);
 
@@ -768,18 +788,6 @@ static int mpc_dma_probe(struct platform_device *op)
 		/* Route interrupts to IPIC */
 		out_be32(&mdma->regs->dmaihsa, 0);
 		out_be32(&mdma->regs->dmailsa, 0);
-	} else {
-		/* MPC8308 has 16 channels and lacks some registers */
-		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
-
-		/* enable snooping */
-		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
-		/* Disable error interrupts */
-		out_be32(&mdma->regs->dmaeeil, 0);
-
-		/* Clear interrupts status */
-		out_be32(&mdma->regs->dmaintl, 0xFFFF);
-		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
 	}
 
 	/* Register DMA engine */
-- 
1.8.4.2

^ permalink raw reply related

* [PATCH 3/3] dt/bindings: fsl-fec: add "per" to clock properties
From: Gerhard Sittig @ 2014-02-24 10:25 UTC (permalink / raw)
  To: devicetree, linux-arm-kernel, linuxppc-dev
  Cc: Mark Rutland, Anatolij Gustschin, Mike Turquette, Pawel Moll,
	Ian Campbell, Gerhard Sittig, Rob Herring, Shawn Guo
In-Reply-To: <1393237557-31406-1-git-send-email-gsi@denx.de>

a recent FEC binding document update that was motivated by i.MX
development revealed that ARM and PowerPC implementations in Linux
did not agree on the clock names to use for the FEC nodes

update the FEC (fast ethernet controller) binding to document the
"per" clock name as an obsolete alias for "ipg"

Signed-off-by: Gerhard Sittig <gsi@denx.de>
---

this patch depends on "dt/bindings: fsl-fec: add clock properties"
by Shawn Guo which introduces the context of this patch

the patch only is necessary if the MPC5121 .dtsi update (switch
FEC nodes from "per" to "ipg") won't make it for v3.14

---
 Documentation/devicetree/bindings/net/fsl-fec.txt |    2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index 468736d4323d..f59b58e29da6 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -24,6 +24,8 @@ Optional properties:
      or external oscillator via pad depending on board design.
    - "enet_out": the phy reference clock provided by SoC via pad, which
      is available on SoC like i.MX28.
+   - "per": obsolete alias for "ipg" for compatibility with early
+     MPC5121 implementations, not recommended for new .dts files
 - clock-names: Must contain the clock names described just above
 
 Example:
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 2/3] dts: mpc512x: adjust clock specs for FEC nodes
From: Gerhard Sittig @ 2014-02-24 10:25 UTC (permalink / raw)
  To: devicetree, linux-arm-kernel, linuxppc-dev
  Cc: Mark Rutland, Anatolij Gustschin, Mike Turquette, Pawel Moll,
	Ian Campbell, Gerhard Sittig, Rob Herring, Shawn Guo
In-Reply-To: <1393237557-31406-1-git-send-email-gsi@denx.de>

a recent FEC binding document update that was motivated by i.MX
development revealed that ARM and PowerPC implementations in Linux
did not agree on the clock names to use for the FEC nodes

change clock names from "per" to "ipg" in the FEC nodes of the
mpc5121.dtsi include file such that the .dts specs comply with
the common FEC binding

this "incompatible" change does not break operation, because
- COMMON_CLK support for MPC5121/23/25 and adjusted .dts files
  were only introduced in Linux v3.14-rc1, no mainline release
  provided these specs before
- if this change won't make it for v3.14, the MPC512x CCF support
  provides full backwards compability, and keeps operating with
  device trees which lack clock specs or don't match in the names

Signed-off-by: Gerhard Sittig <gsi@denx.de>
---
 arch/powerpc/boot/dts/mpc5121.dtsi |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 2c0e1552d20b..a5a375598ed8 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -281,7 +281,7 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 			clocks = <&clks MPC512x_CLK_FEC>;
-			clock-names = "per";
+			clock-names = "ipg";
 		};
 
 		eth0: ethernet@2800 {
@@ -291,7 +291,7 @@
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <4 0x8>;
 			clocks = <&clks MPC512x_CLK_FEC>;
-			clock-names = "per";
+			clock-names = "ipg";
 		};
 
 		/* USB1 using external ULPI PHY */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 1/3] fs_enet: update clock names to comply with FEC binding
From: Gerhard Sittig @ 2014-02-24 10:25 UTC (permalink / raw)
  To: devicetree, linux-arm-kernel, linuxppc-dev
  Cc: Mark Rutland, Anatolij Gustschin, Mike Turquette, Pawel Moll,
	Ian Campbell, Gerhard Sittig, Rob Herring, Shawn Guo

a recent FEC binding document update that was motivated by i.MX
development revealed that ARM and PowerPC implementations in Linux
did not agree on the clock names to use for the FEC nodes

change the OF clock lookup to prefer "ipg" over "per", which
improves compliance with the binding, and keeps compatibility
with former device trees

Signed-off-by: Gerhard Sittig <gsi@denx.de>
---
 drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c |   13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 62f042d4aaa9..ce20184b96cb 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -1037,11 +1037,20 @@ static int fs_enet_probe(struct platform_device *ofdev)
 			fpi->use_rmii = 1;
 	}
 
-	/* make clock lookup non-fatal (the driver is shared among platforms),
+	/* the driver is shared across several PowerPC platforms, not all
+	 * of them provide COMMON_CLK support, and newer kernels are supposed
+	 * to keep working with older DT blobs, so clock lookup is non-fatal
+	 *
 	 * but require enable to succeed when a clock was specified/found,
 	 * keep a reference to the clock upon successful acquisition
+	 *
+	 * the FEC binding is shared with ARM platforms, so we lookup several
+	 * clock names to prefer the common naming convention yet support
+	 * names that were used before unification
 	 */
-	clk = devm_clk_get(&ofdev->dev, "per");
+	clk = devm_clk_get(&ofdev->dev, "ipg");
+	if (IS_ERR(clk))
+		clk = devm_clk_get(&ofdev->dev, "per");
 	if (!IS_ERR(clk)) {
 		err = clk_prepare_enable(clk);
 		if (err) {
-- 
1.7.10.4

^ permalink raw reply related

* Re: [patch 03/26] powerpc: eeh: Kill another abuse of irq_desc
From: Gavin Shan @ 2014-02-24  7:56 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Gavin Shan, Peter Zijlstra, LKML, Ingo Molnar, ppc
In-Reply-To: <20140223212736.562906212@linutronix.de>

On Sun, Feb 23, 2014 at 09:40:09PM -0000, Thomas Gleixner wrote:
>commit 91150af3a (powerpc/eeh: Fix unbalanced enable for IRQ) is
>another brilliant example of trainwreck engineering.
>
>The patch "fixes" the issue of an unbalanced call to irq_enable()
>which causes a prominent warning by checking the disabled state of the
>interrupt line and call conditionally into the core code.
>
>This is wrong in two aspects:
>
>1) The warning is there to tell users, that they need to fix their
>   asymetric enable/disable patterns by finding the root cause and
>   solving it there.
>
>   It's definitely not meant to work around it by conditionally
>   calling into the core code depending on the random state of the irq
>   line.
>
>   Asymetric irq_disable/enable calls are a clear sign of wrong usage
>   of the interfaces which have to be cured at the root and not by
>   somehow hacking around it.
>
>2) The abuse of core internal data structure instead of using the
>   proper interfaces for retrieving the information for the 'hack
>   around'
>
>   irq_desc is core internal and it's clear enough stated.
>
>Replace at least the irq_desc abuse with the proper functions and add
>a big fat comment why this is absurd and completely wrong.
>

Thanks for pointing it out. I think we might have this patch for now
and I'll look into individual drivers to fix the unbalanced function
calls later one by one.

Thanks,
Gavin

>Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>Cc: Gavin Shan <shangw@linux.vnet.ibm.com>
>Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>Cc: ppc <linuxppc-dev@lists.ozlabs.org>
>---
> arch/powerpc/kernel/eeh_driver.c |   26 +++++++++++++++++++++-----
> 1 file changed, 21 insertions(+), 5 deletions(-)
>
>Index: tip/arch/powerpc/kernel/eeh_driver.c
>===================================================================
>--- tip.orig/arch/powerpc/kernel/eeh_driver.c
>+++ tip/arch/powerpc/kernel/eeh_driver.c
>@@ -143,15 +143,31 @@ static void eeh_disable_irq(struct pci_d
> static void eeh_enable_irq(struct pci_dev *dev)
> {
> 	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
>-	struct irq_desc *desc;
>
> 	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
> 		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
>-
>-		desc = irq_to_desc(dev->irq);
>-		if (desc && desc->depth > 0)
>+		/*
>+		 * FIXME !!!!!
>+		 *
>+		 * This is just ass backwards. This maze has
>+		 * unbalanced irq_enable/disable calls. So instead of
>+		 * finding the root cause it works around the warning
>+		 * in the irq_enable code by conditionally calling
>+		 * into it.
>+		 *
>+		 * That's just wrong.The warning in the core code is
>+		 * there to tell people to fix their assymetries in
>+		 * their own code, not by abusing the core information
>+		 * to avoid it.
>+		 *
>+		 * I so wish that the assymetry would be the other way
>+		 * round and a few more irq_disable calls render that
>+		 * shit unusable forever.
>+		 *
>+		 *	tglx
>+		 */
>+		if (irqd_irq_disabled(irq_get_irq_data(dev->irq))
> 			enable_irq(dev->irq);
>-	}
> }
>
> /**
>
>

^ permalink raw reply

* [PATCH] ppc476: Enable a linker work around for IBM errata #46
From: Alistair Popple @ 2014-02-24  7:00 UTC (permalink / raw)
  To: benh; +Cc: Alistair Popple, linuxppc-dev

This patch adds an option to enable a work around for an icache bug on
476 that can cause execution of stale instructions when falling
through pages (IBM errata #46). It requires a recent version of
binutils which supports the --ppc476-workaround option.

The work around enables the appropriate linker options and ensures
that all module output sections are aligned to 4K page boundaries. The
work around is only required when building modules.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 arch/powerpc/Makefile                         |    5 +++++
 arch/powerpc/platforms/44x/Kconfig            |   14 ++++++++++++++
 arch/powerpc/platforms/44x/ppc476_modules.lds |   15 +++++++++++++++
 3 files changed, 34 insertions(+)
 create mode 100644 arch/powerpc/platforms/44x/ppc476_modules.lds

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 0f4344e..2b13616 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -158,6 +158,11 @@ CHECKFLAGS	+= -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)
 
 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
 
+ifeq ($(CONFIG_476FPE_ERR46),y)
+	KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
+		-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
+endif
+
 # No AltiVec or VSX instructions when building kernel
 KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
 KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index d6c7506..b817bf58 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -324,6 +324,20 @@ config APM821xx
 	select IBM_EMAC_EMAC4
 	select IBM_EMAC_TAH
 
+config 476FPE_ERR46
+	depends on 476FPE
+	bool "Enable linker work around for PPC476FPE errata #46"
+	help
+	  This option enables a work around for an icache bug on 476
+	  that can cause execution of stale instructions when falling
+	  through pages (IBM errata #46). It requires a recent version
+	  of binutils which supports the --ppc476-workaround option.
+
+	  The work around enables the appropriate linker options and
+	  ensures that all module output sections are aligned to 4K
+	  page boundaries. The work around is only required when
+	  building modules.
+
 # 44x errata/workaround config symbols, selected by the CPU models above
 config IBM440EP_ERR42
 	bool
diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds
new file mode 100644
index 0000000..9fec5d3
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476_modules.lds
@@ -0,0 +1,15 @@
+SECTIONS
+{
+	.text : ALIGN(4096)
+	{
+		*(.text .text.* .fixup)
+	}
+	.init.text : ALIGN(4096)
+	{
+		*(.init.text .init.text.*)
+	}
+	.exit.text : ALIGN(4096)
+	{
+		*(.exit.text .exit.text.*)
+	}
+}
-- 
1.7.10.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox