Linux Power Management development

Linux Power Management development
 help / color / mirror / Atom feed

* [PATCH v2 1/9] soc: samsung: Add exynos chipid driver support
From: Sylwester Nawrocki @ 2019-07-18 14:30 UTC (permalink / raw)
  To: krzk
  Cc: robh+dt, vireshk, devicetree, kgene, pankaj.dubey,
	linux-samsung-soc, linux-arm-kernel, linux-kernel, linux-pm,
	b.zolnierkie, m.szyprowski, Sylwester Nawrocki
In-Reply-To: <20190718143044.25066-1-s.nawrocki@samsung.com>

From: Pankaj Dubey <pankaj.dubey@samsung.com>

Exynos SoCs have Chipid, for identification of product IDs and SoC
revisions. This patch intends to provide initialization code for all
these functionalities, at the same time it provides some sysfs entries
for accessing these information to user-space.

This driver uses existing binding for exynos-chipid.

Changes by Bartlomiej:
- fixed return values on errors
- removed bogus kfree_const()
- added missing Exynos4210 EVT0 id
- converted code to use EXYNOS_MASK define
- fixed np use after of_node_put()
- fixed too early use of dev_info()
- made driver fail for unknown SoC-s
- added SPDX tag
- updated Copyrights

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
[m.szyprowski: for suggestion and code snippet of product_id_to_soc_id]
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
[s.nawrocki: updated copyright date]
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
---
 drivers/soc/samsung/Kconfig         |   5 ++
 drivers/soc/samsung/Makefile        |   2 +
 drivers/soc/samsung/exynos-chipid.c | 111 ++++++++++++++++++++++++++++
 3 files changed, 118 insertions(+)
 create mode 100644 drivers/soc/samsung/exynos-chipid.c

diff --git a/drivers/soc/samsung/Kconfig b/drivers/soc/samsung/Kconfig
index 2186285fda92..2905f5262197 100644
--- a/drivers/soc/samsung/Kconfig
+++ b/drivers/soc/samsung/Kconfig
@@ -7,6 +7,11 @@ menuconfig SOC_SAMSUNG
 
 if SOC_SAMSUNG
 
+config EXYNOS_CHIPID
+	bool "Exynos Chipid controller driver" if COMPILE_TEST
+	depends on ARCH_EXYNOS || COMPILE_TEST
+	select SOC_BUS
+
 config EXYNOS_PMU
 	bool "Exynos PMU controller driver" if COMPILE_TEST
 	depends on ARCH_EXYNOS || ((ARM || ARM64) && COMPILE_TEST)
diff --git a/drivers/soc/samsung/Makefile b/drivers/soc/samsung/Makefile
index 29f294baac6e..3b6a8797416c 100644
--- a/drivers/soc/samsung/Makefile
+++ b/drivers/soc/samsung/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_EXYNOS_CHIPID)	+= exynos-chipid.o
 obj-$(CONFIG_EXYNOS_PMU)	+= exynos-pmu.o
 
 obj-$(CONFIG_EXYNOS_PMU_ARM_DRIVERS)	+= exynos3250-pmu.o exynos4-pmu.o \
diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
new file mode 100644
index 000000000000..78b123ee60c0
--- /dev/null
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd.
+ *	      http://www.samsung.com/
+ *
+ * EXYNOS - CHIP ID support
+ * Author: Pankaj Dubey <pankaj.dubey@samsung.com>
+ * Author: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sys_soc.h>
+
+#define EXYNOS_SUBREV_MASK	(0xF << 4)
+#define EXYNOS_MAINREV_MASK	(0xF << 0)
+#define EXYNOS_REV_MASK		(EXYNOS_SUBREV_MASK | EXYNOS_MAINREV_MASK)
+#define EXYNOS_MASK		0xFFFFF000
+
+static const struct exynos_soc_id {
+	const char *name;
+	unsigned int id;
+} soc_ids[] = {
+	{ "EXYNOS3250", 0xE3472000 },
+	{ "EXYNOS4210", 0x43200000 },	/* EVT0 revision */
+	{ "EXYNOS4210", 0x43210000 },
+	{ "EXYNOS4212", 0x43220000 },
+	{ "EXYNOS4412", 0xE4412000 },
+	{ "EXYNOS5250", 0x43520000 },
+	{ "EXYNOS5260", 0xE5260000 },
+	{ "EXYNOS5410", 0xE5410000 },
+	{ "EXYNOS5420", 0xE5420000 },
+	{ "EXYNOS5440", 0xE5440000 },
+	{ "EXYNOS5800", 0xE5422000 },
+	{ "EXYNOS7420", 0xE7420000 },
+	{ "EXYNOS5433", 0xE5433000 },
+};
+
+static const char * __init product_id_to_soc_id(unsigned int product_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(soc_ids); i++)
+		if ((product_id & EXYNOS_MASK) == soc_ids[i].id)
+			return soc_ids[i].name;
+	return NULL;
+}
+
+int __init exynos_chipid_early_init(void)
+{
+	struct soc_device_attribute *soc_dev_attr;
+	void __iomem *exynos_chipid_base;
+	struct soc_device *soc_dev;
+	struct device_node *root;
+	struct device_node *np;
+	u32 product_id;
+	u32 revision;
+
+	/* look up for chipid node */
+	np = of_find_compatible_node(NULL, NULL, "samsung,exynos4210-chipid");
+	if (!np)
+		return -ENODEV;
+
+	exynos_chipid_base = of_iomap(np, 0);
+	of_node_put(np);
+
+	if (!exynos_chipid_base) {
+		pr_err("Failed to map SoC chipid\n");
+		return -ENXIO;
+	}
+
+	product_id = readl_relaxed(exynos_chipid_base);
+	revision = product_id & EXYNOS_REV_MASK;
+	iounmap(exynos_chipid_base);
+
+	soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
+	if (!soc_dev_attr)
+		return -ENOMEM;
+
+	soc_dev_attr->family = "Samsung Exynos";
+
+	root = of_find_node_by_path("/");
+	of_property_read_string(root, "model", &soc_dev_attr->machine);
+	of_node_put(root);
+
+	soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%x", revision);
+	soc_dev_attr->soc_id = product_id_to_soc_id(product_id);
+	if (!soc_dev_attr->soc_id) {
+		pr_err("Unknown SoC\n");
+		return -ENODEV;
+	}
+
+	/* please note that the actual registration will be deferred */
+	soc_dev = soc_device_register(soc_dev_attr);
+	if (IS_ERR(soc_dev)) {
+		kfree(soc_dev_attr->revision);
+		kfree(soc_dev_attr);
+		return PTR_ERR(soc_dev);
+	}
+
+	/* it is too early to use dev_info() here (soc_dev is NULL) */
+	pr_info("soc soc0: Exynos: CPU[%s] PRO_ID[0x%x] REV[0x%x] Detected\n",
+		soc_dev_attr->soc_id, product_id, revision);
+
+	return 0;
+}
+early_initcall(exynos_chipid_early_init);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v2 0/9] Exynos Adaptive Supply Voltage support
From: Sylwester Nawrocki @ 2019-07-18 14:30 UTC (permalink / raw)
  To: krzk
  Cc: robh+dt, vireshk, devicetree, kgene, pankaj.dubey,
	linux-samsung-soc, linux-arm-kernel, linux-kernel, linux-pm,
	b.zolnierkie, m.szyprowski, Sylwester Nawrocki
In-Reply-To: <CGME20190718143117eucas1p1e534b9075d10fbbbe427c66192205eb1@eucas1p1.samsung.com>

This is second iteration of patch series adding ASV (Adaptive Supply 
Voltage) support for Exynos SoCs. The first one can be found at:
https://lore.kernel.org/lkml/20190404171735.12815-1-s.nawrocki@samsung.com

The main changes comparing to the first (RFC) version are:
 - moving ASV data tables from DT to the driver,
 - converting the chipid and the ASV drivers to use regmap,
 - converting the ASV driver to proper platform driver.

I tried the opp-supported-hw bitmask approach as in the Qualcomm CPUFreq
DT bindings but it resulted in too many OPPs and DT nodes, around 200
per CPU cluster. So the ASV OPP tables are now in the ASV driver, as in
downstream kernels.  I might give it a try and restrucure these tables
to avoid data repetition.

This patch set includes Exynos CHIPID driver posted by Pankaj Dubey and
futher improved by Bartlomiej Zolnierkiewicz [1].

Tested on Odroid XU3, XU3 Lite, XU4.

One of the things on TODO list is support for the Adaptive Body Bias.
This will require modifications on the cpufreq driver side in order to 
support multiple voltage regulators and changes in the OPP framework 
to support adding OPPs with multiple voltages.

[1] https://lkml.org/lkml/2018/11/15/908

Pankaj Dubey (3):
  soc: samsung: Add exynos chipid driver support
  ARM: EXYNOS: enable exynos_chipid for ARCH_EXYNOS
  ARM64: EXYNOS: enable exynos_chipid for ARCH_EXYNOS

Sylwester Nawrocki (6):
  soc: samsung: Convert exynos-chipid driver to use the regmap API
  soc: samsung: Add Exynos Adaptive Supply Voltage driver
  ARM: EXYNOS: Enable exynos-asv driver for ARCH_EXYNOS
  soc: samsung: Update the CHIP ID DT binding documentation
  ARM: dts: Add "syscon" compatible string to chipid node
  ARM: dts: Add samsung,asv-bin property for odroidxu3-lite

 .../bindings/arm/samsung/exynos-chipid.txt    |  10 +-
 arch/arm/boot/dts/exynos5.dtsi                |   4 +-
 .../boot/dts/exynos5422-odroidxu3-lite.dts    |   4 +
 arch/arm/mach-exynos/Kconfig                  |   2 +
 arch/arm64/Kconfig.platforms                  |   1 +
 drivers/soc/samsung/Kconfig                   |  16 +
 drivers/soc/samsung/Makefile                  |   5 +
 drivers/soc/samsung/exynos-asv.c              | 185 +++++++
 drivers/soc/samsung/exynos-asv.h              |  82 +++
 drivers/soc/samsung/exynos-chipid.c           | 104 ++++
 drivers/soc/samsung/exynos5422-asv.c          | 499 ++++++++++++++++++
 drivers/soc/samsung/exynos5422-asv.h          |  25 +
 include/linux/soc/samsung/exynos-chipid.h     |  48 ++
 13 files changed, 981 insertions(+), 4 deletions(-)
 create mode 100644 drivers/soc/samsung/exynos-asv.c
 create mode 100644 drivers/soc/samsung/exynos-asv.h
 create mode 100644 drivers/soc/samsung/exynos-chipid.c
 create mode 100644 drivers/soc/samsung/exynos5422-asv.c
 create mode 100644 drivers/soc/samsung/exynos5422-asv.h
 create mode 100644 include/linux/soc/samsung/exynos-chipid.h

-- 
2.17.1


^ permalink raw reply

* Re: [PATCH 10/18] drivers: firmware: psci: Add hierarchical domain idle states converter
From: Lorenzo Pieralisi @ 2019-07-18 13:36 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Sudeep Holla, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List
In-Reply-To: <CAPDyKFoBm9vWUcX5pjMMseYs7SAYr9v0Uc48YqsgaG9omA2MRQ@mail.gmail.com>

On Thu, Jul 18, 2019 at 01:43:44PM +0200, Ulf Hansson wrote:

[...]

> > > Anyway, as a suggestion to address your concern, how about this:
> > >
> > > 1. Move some things out to a PSCI cpuidle driver. We need to decide
> > > more exactly on what to move and find the right level for the
> > > interfaces.
> >
> > I will do it and post patches asap.
> 
> Okay, so I will wait for you to converting the cpuidle-arm driver into
> a cpuidle-psci driver (and all the changes that comes with it) and
> then base my re-base my series on top.
> 
> Then, would you mind sharing (even in an early phase) a
> branch/git-tree so I can start re-basing my series on top?

Sure, I should be able to post at -rc1 and will publish a branch
here [1].

> > > 2. Don't attach the CPU to the PM domain topology in case the PSCI PC
> > > mode is used. I think this makes it easier, at least as a first step,
> > > to understand when runtime PM needs to be used/enabled.
> >
> > In the PSCI CPUidle driver we can have two distinct struct
> > cpuidle_state->enter functions for PC and OSI, no overhead
> > for PC, runtime PM for OSI, decoupling done.
> 
> Good idea!
> 
> >
> > We can choose one or the other depending on whether:
> >
> > OSI iff:
> > - OSI is available
> > - hierarchical idle states are present in DT
> >
> > otherwise PC.
> >
> > That's what this patch does but we will do it in a unified file.
> 
> Sure, it makes sense.
> 
> >
> > > 3. Would it help if I volunteer to help you guys as a maintainer for
> > > PSCI. At least for the part of the new code that becomes introduced?
> >
> > We will do as described above if that makes sense.
> 
> Yep, I am okay with your suggestions, assuming I have understood them correctly.
> 
> BTW, have you considered to host a git tree for PSCI so we can have
> changes pre-integrated and tested in Stephen Rothwell's linux-next
> tree?

I will ask Stephen to pull when needed a branch in the tree below[1]

[1] https://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/linux.git/

^ permalink raw reply

* Re: [PATCH 14/18] drivers: firmware: psci: Manage runtime PM in the idle path for CPUs
From: Lorenzo Pieralisi @ 2019-07-18 13:30 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Sudeep Holla, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List
In-Reply-To: <CAPDyKFrJ75mo+s6GuUCTQ-nVv7C+9YJyTVmwuBZ2RKFOvOi3Nw@mail.gmail.com>

On Thu, Jul 18, 2019 at 12:35:07PM +0200, Ulf Hansson wrote:
> On Tue, 16 Jul 2019 at 17:53, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> >
> > On Mon, May 13, 2019 at 09:22:56PM +0200, Ulf Hansson wrote:
> > > When the hierarchical CPU topology layout is used in DT, let's allow the
> > > CPU to be power managed through its PM domain, via deploying runtime PM
> > > support.
> > >
> > > To know for which idle states runtime PM reference counting is needed,
> > > let's store the index of deepest idle state for the CPU, in a per CPU
> > > variable. This allows psci_cpu_suspend_enter() to compare this index with
> > > the requested idle state index and then act accordingly.
> >
> > I do not see why a system with two CPU CPUidle states, say CPU retention
> > and CPU shutdown, should not be calling runtime PM on CPU retention
> > entry.
> 
> If the CPU idle governor did select the CPU retention for the CPU, it
> was probably because the target residency for the CPU shutdown state
> could not be met.

The kernel does not know what those cpu states represent, so, this is an
assumption you are making and it must be made clear that this code works
as long as your assumption is valid.

If eg a "cluster" retention state has lower target_residency than
the deepest CPU idle state this assumption is wrong.

And CPUidle and genPD governor decisions are not synced anyway so,
again, this is an assumption, not a certainty.

> In this case, there is no point in allowing any other deeper idle
> states for cluster/package/system, since those have even greater
> residencies, hence calling runtime PM doesn't make sense.

On the systems you are testing on.

Lorenzo

> > The question then is what cluster/package/system states
> > are allowed for a given CPU idle state, to understand
> > what idle states can be actually entered at any hierarchy
> > level given the choice made for the CPU idle state.
> >
> > In the case above, a CPU entering retention state should prevent
> > runtime PM selecting a cluster shutdown state; most likely firmware
> > would demote the request to cluster retention but still, we should
> > find a way to describe these dependencies.
> 
> See above.
> 
> [...]
> 
> Kind regards
> Uffe

^ permalink raw reply

* Re: [PATCH 09/18] drivers: firmware: psci: Add support for PM domains using genpd
From: Sudeep Holla @ 2019-07-18 13:19 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Lorenzo Pieralisi, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List, Sudeep Holla, Lina Iyer
In-Reply-To: <CAPDyKFqaE2L419siFY=LGDsotAnpBt+H_vpmG62AqQw8UQJZJA@mail.gmail.com>

On Thu, Jul 18, 2019 at 01:04:03PM +0200, Ulf Hansson wrote:
> On Tue, 16 Jul 2019 at 17:05, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Mon, May 13, 2019 at 09:22:51PM +0200, Ulf Hansson wrote:
> > > When the hierarchical CPU topology layout is used in DT, we need to setup
> > > the corresponding PM domain data structures, as to allow a CPU and a group
> > > of CPUs to be power managed accordingly. Let's enable this by deploying
> > > support through the genpd interface.
> > >
> > > Additionally, when the OS initiated mode is supported by the PSCI FW, let's
> > > also parse the domain idle states DT bindings as to make genpd responsible
> > > for the state selection, when the states are compatible with
> > > "domain-idle-state". Otherwise, when only Platform Coordinated mode is
> > > supported, we rely solely on the state selection to be managed through the
> > > regular cpuidle framework.
> > >
> > > If the initialization of the PM domain data structures succeeds and the OS
> > > initiated mode is supported, we try to switch to it. In case it fails,
> > > let's fall back into a degraded mode, rather than bailing out and returning
> > > an error code.
> > >
> > > Due to that the OS initiated mode may become enabled, we need to adjust to
> > > maintain backwards compatibility for a kernel started through a kexec call.
> > > Do this by explicitly switch to Platform Coordinated mode during boot.
> > >
> > > Finally, the actual initialization of the PM domain data structures, is
> > > done via calling the new shared function, psci_dt_init_pm_domains().
> > > However, this is implemented by subsequent changes.
> > >
> > > Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
> > > Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
> > > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > > ---
> > >
> > > Changes:
> > >       - Simplify code setting domain_state at power off.
> > >       - Use the genpd ->free_state() callback to manage freeing of states.
> > >       - Fixup a bogus while loop.
> > >
> > > ---
> > >  drivers/firmware/psci/Makefile         |   2 +-
> > >  drivers/firmware/psci/psci.c           |   7 +-
> > >  drivers/firmware/psci/psci.h           |   5 +
> > >  drivers/firmware/psci/psci_pm_domain.c | 268 +++++++++++++++++++++++++
> > >  4 files changed, 280 insertions(+), 2 deletions(-)
> > >  create mode 100644 drivers/firmware/psci/psci_pm_domain.c
> > >
> >
> > [...]
> >
> > >  #endif /* __PSCI_H */
> > > diff --git a/drivers/firmware/psci/psci_pm_domain.c b/drivers/firmware/psci/psci_pm_domain.c
> > > new file mode 100644
> > > index 000000000000..3c6ca846caf4
> > > --- /dev/null
> > > +++ b/drivers/firmware/psci/psci_pm_domain.c
> > > @@ -0,0 +1,268 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * PM domains for CPUs via genpd - managed by PSCI.
> > > + *
> > > + * Copyright (C) 2019 Linaro Ltd.
> > > + * Author: Ulf Hansson <ulf.hansson@linaro.org>
> > > + *
> > > + */
> > > +
> >
> > [...]
> >
> > > +static int psci_pd_power_off(struct generic_pm_domain *pd)
> > > +{
> > > +     struct genpd_power_state *state = &pd->states[pd->state_idx];
> > > +     u32 *pd_state;
> > > +
> > > +     /* If we have failed to enable OSI mode, then abort power off. */
> > > +     if (psci_has_osi_support() && !osi_mode_enabled)
> > > +             return -EBUSY;
> > > +
> > > +     if (!state->data)
> > > +             return 0;
> > > +
> > > +     /* When OSI mode is enabled, set the corresponding domain state. */
> > > +     pd_state = state->data;
> > > +     psci_set_domain_state(*pd_state);
> >
> > I trying to understand how would this scale to level 2(cluster of
> > clusters or for simply system). The current code for psci_set_domain_state
> > just stores the value @pd_state into per-cpu domain_state. E.g.: Now if
> > the system level pd is getting called after cluster PD, it will set the
> > domain state to system level PD state. It won't work with original
> > format and it may work with extended format if it's carefully crafted.
> > In short, the point is just over-writing domain_state is asking for
> > troubles IMO.
>
> Thanks for spotting this!
>
> While walking upwards in the PM domain topology, I thought I was ORing
> the domain states, but clearly the code isn't doing that.
>
> In principle we need to do the below instead.
>
> pd_state = state->data;
> composite_pd_state = *pd_state | psci_get_domain_state();
> psci_set_domain_state(composite_pd_state);
>

Yes 2 different issues here:
1. The direct assignment overwriting the value is problem which you agree.
2. The OR logic on it's own is bit not so clear from the specification.
   Since firmware authors need to be aware of this to make all these
   work. So it's not implicit, either we set this requirement in form
   of binding. We were also thinking of stating composite state in the
   DT, still just a thought, need to come up with examples/illustrations.

--
Regards,
Sudeep

^ permalink raw reply

* Re: [PATCH 18/18] arm64: dts: hikey: Convert to the hierarchical CPU topology layout
From: Sudeep Holla @ 2019-07-18 13:11 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Lorenzo Pieralisi, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List, Wei Xu
In-Reply-To: <CAPDyKFpc26yL6rOnfwawL=eL649NsgTMrF1WrMHZv7AVd=3PCA@mail.gmail.com>

On Thu, Jul 18, 2019 at 12:48:14PM +0200, Ulf Hansson wrote:
> On Tue, 16 Jul 2019 at 16:47, Sudeep Holla <sudeep.holla@arm.com> wrote:
> >
> > On Mon, May 13, 2019 at 09:23:00PM +0200, Ulf Hansson wrote:
> > > To enable the OS to manage last-man standing activities for a CPU, while an
> > > idle state for a group of CPUs is selected, let's convert the Hikey
> > > platform into using the hierarchical CPU topology layout.
> > >
> > > Cc: Wei Xu <xuwei5@hisilicon.com>
> > > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > > ---
> > >
> > > Changes:
> > >       - None.
> > >
> > > ---
> > >  arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 87 ++++++++++++++++++++---
> > >  1 file changed, 76 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
> > > index 108e2a4227f6..36ff460f428f 100644
> > > --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
> > > +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
> > >       cpus {
> >
> > [...]
> >
> > > @@ -70,9 +128,8 @@
> > >                       };
> > >
> > >                       CLUSTER_SLEEP: cluster-sleep {
> > > -                             compatible = "arm,idle-state";
> > > -                             local-timer-stop;
> > > -                             arm,psci-suspend-param = <0x1010000>;
> > > +                             compatible = "domain-idle-state";
> > > +                             arm,psci-suspend-param = <0x1000000>;
> > >                               entry-latency-us = <1000>;
> > >                               exit-latency-us = <700>;
> > >                               min-residency-us = <2700>;
> >
> > Again this must be original format and as per PSCI spec, your patch
> > changes this cluster sleep state into cluster retention state which I
> > think is not what you intended.
>
> If the hierarchical topology is used, the parameter for cluster states
> are ORed with the deepest idle state for the CPU.
>
> CPU_SLEEP: 0x0010000
> CLUSTER_SLEEP: 0x1000000
>
> After the ORed operation
> CLUSTER_SLEEP: 0x1010000
>
> So, this indeed works as expected.
>

Yes, it works. But we are not XOR-ing so what's wrong in keeping the
StateType as required and be compliant to specification. Why do we need
to make the state param on it's own non-complaint.

What's wrong in retaining CLUSTER_SLEEP as 0x1010000 so that it reflects
the state level and type correctly on it's own ?

> However, are you saying that ORing the state parameters like above has
> other problems? I am reading your other replies...
>

Yes OR-ing may have other problems but the point I made here was more on
PSCI spec compliance for each suspend parameter values independently.

--
Regards,
Sudeep

^ permalink raw reply

* Re: [PATCH 10/18] drivers: firmware: psci: Add hierarchical domain idle states converter
From: Ulf Hansson @ 2019-07-18 11:43 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: Sudeep Holla, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List
In-Reply-To: <20190716145121.GA32490@e121166-lin.cambridge.arm.com>

On Tue, 16 Jul 2019 at 16:51, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Tue, Jul 16, 2019 at 10:45:49AM +0200, Ulf Hansson wrote:
>
> [...]
>
> > > > +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
> > > > +                     u32 *psci_state, struct genpd_power_state *state)
> > > > +{
> > > > +     u32 *state_data = state->data;
> > > > +     u64 target_residency_us = state->residency_ns;
> > > > +     u64 exit_latency_us = state->power_on_latency_ns +
> > > > +                     state->power_off_latency_ns;
> > > > +
> > > > +     *psci_state = *state_data;
> > > > +     do_div(target_residency_us, 1000);
> > > > +     idle_state->target_residency = target_residency_us;
> > > > +     do_div(exit_latency_us, 1000);
> > > > +     idle_state->exit_latency = exit_latency_us;
> > > > +     idle_state->enter = &psci_pd_enter_pc;
> > > > +     idle_state->enter_s2idle = &psci_pd_enter_s2idle_pc;
> > > > +     idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> > >
> > > This is arbitrary and not necessarily true.
> >
> > The arbitrary thing you refer to here, is that the
> > CPUIDLE_FLAG_TIMER_STOP? Or are you referring to the complete function
> > psci_pd_convert_states()?
>
> I refer to CPUIDLE_FLAG_TIMER_STOP. I think that on platform coordinated
> system we should not bother about the hierarchical representation of the
> states (I understand I asked you to make it work but it has become too
> complex, I would rather focus on making the hierarchical representation
> work for all idle states combination in OSI mode).
>
> Plus side, another level of complexity removed.

Oh, well, that's why I left it out in the first place, to start simple. :-)

Anyway, no problem, I will revert back to that option.

>
> > > I think that this patch is useful to represent my reservations about the
> > > current approach. As a matter of fact, idle state entry will always be a
> > > CPUidle decision.
> > >
> > > You only need PM domain information to understand when all CPUs
> > > in a power domain are actually idle but that's all genPD can do
> > > in this respect.
> > >
> > > I think this patchset would be much simpler if both CPUidle and
> > > genPD governor would work on *one* set of idle states, globally
> > > indexed (and that would be true for PSCI suspend parameters too).
> > >
> > > To work with a unified set of idle states between CPUidle and genPD
> > > (tossing some ideas around):
> > >
> > > - We can implement a genPD CPUidle governor that in its select method
> > >   takes into account genPD information (for instance by avoiding
> > >   selection of idle states that require multiple cpus to be in idle
> > >   to be effectively active)
> > > - We can use genPD to enable/disable CPUidle states through runtime
> > >   PM information
> >
> > I don't understand how to make this work.
> >
> > The CPUidle governor works on per CPU basis. The genpd governor works
> > on per PM domain basis, which typically can be a group of CPUs (and
> > even other devices) via subdomains, for example.
> >
> > 1.
> > In case of Linux being in *charge* of what idle state to pick for a
> > group of CPUs, that decision is best done by the genpd governor as it
> > operates on "groups" of CPUs. This is used for PSCI OSI mode. Of
> > course, there are idle states also per CPU, which potentially could be
> > managed by the genpd governor as well, but at this point I decided to
> > re-use the cpuidle governor as it's already doing the job.
> >
> > 2. In case the decision of what idle state to enter for the group is
> > done by the FW, we can rely solely on the cpuidle governor and let it
> > select states per CPU basis. This is used for PSCI PC mode.
> >
> > >
> > > There may be other ways. My point is that current code, with two (or
> > > more if the hierarchy grows) sets of idle states across two subsystems
> > > (CPUidle and genPD) is not very well defined and honestly very hard to
> > > grasp and prone to errors.
> >
> > The complexity is there, I admit that.
> >
> > I guess some deeper insight about genpd+its governor+runtime PM are
> > needed when reviewing this, of course. As an option, you may also have
> > a look at my slides [1] from OSPM (Pisa) in May this year, which via
> > flow charts try to describes things in more detail.
> >
> > In our offlist meeting, we discussed that perhaps moving some of the
> > new PSCI code introduced in this series, into a cpuidle driver
> > instead, could make things more clear. For sure, I can explore that
> > option, but before I go there, I think we should agree on it publicly.
>
> I will do it but given that the generic idle infrastructure basically
> is there for PSCI and:
>
> drivers/soc/qcom/spm.c
>
> if we create a PSCI CPUidle driver we can write one for qcom-spm and
> remove the generic idle infrastructure, there would not be much
> point in keeping it in the kernel; at least on ARM64 not using
> PSCI for CPUidle is not even an option.

To make it clear, I definitely like this idea!

I am not really fond of current cpuidle backend infrastructure for
ARM/ARM64, it's really hard to follow all the things that happens in
those corresponding callbacks.

>
> > In principle what it means is to invent a special cpuidle driver for
> > PSCI, so we would need access to some of the PSCI internal functions,
> > for example.
>
> Yes.
>
> > One thing though, the initialization of the PSCI PM domain topology is
> > a separate step, managed via the new initcall, psci_dt_topology_init()
> > (introduced in patch 11). That part still seems to be belong to the
> > PSCI code, don't you think?
>
> Yes but at least we can call it from a sensible place (well, sensible,
> most likely from an initcall given how idle drivers are initialized).

Okay.

>
> > > > +     strncpy(idle_state->name, to_of_node(state->fwnode)->name,
> > > > +             CPUIDLE_NAME_LEN - 1);
> > > > +     strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
> > > > +             CPUIDLE_NAME_LEN - 1);
> > > > +}
> > > > +
> > > > +static bool psci_pd_is_provider(struct device_node *np)
> > > > +{
> > > > +     struct psci_pd_provider *pd_prov, *it;
> > > > +
> > > > +     list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
> > > > +             if (pd_prov->node == np)
> > > > +                     return true;
> > > > +     }
> > > > +
> > > > +     return false;
> > > > +}
> > > > +
> > > >  static int psci_pd_init(struct device_node *np)
> > > >  {
> > > >       struct generic_pm_domain *pd;
> > > > @@ -265,4 +316,71 @@ int psci_dt_init_pm_domains(struct device_node *np)
> > > >       pr_err("failed to create CPU PM domains ret=%d\n", ret);
> > > >       return ret;
> > > >  }
> > > > +
> > > > +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> > > > +                     struct device_node *cpu_node, u32 *psci_states)
> > > > +{
> > > > +     struct genpd_power_state *pd_states;
> > > > +     struct of_phandle_args args;
> > > > +     int ret, pd_state_count, i, state_idx, psci_idx;
> > > > +     u32 cpu_psci_state = psci_states[drv->state_count - 2];
> > >
> > > This (-2) is very dodgy and I doubt it would work on hierarchies going
> > > above "cluster" level.
> > >
> > > As I say above, I think we should work towards a single array of
> > > idle states to be selected by a CPUidle governor using genPD
> > > runtime information to bias the results according to the number
> > > of CPUs in a genPD that entered/exit idle.
> > >
> > > To be more precise, all idles states should be "domain-idle-state"
> > > compatible, even the CPU ones, the distinction between what CPUidle
> > > and genPD manage is a bit stretched IMO in this patchset.
> > >
> > > We will have a chance to talk about this but I thought I would
> > > comment publically if anyone else is willing to chime in, this
> > > is not a PSCI problem at all, it is a CPUidle/genPD coexistence
> > > design problem which is much broader.
> >
> > To move this forward, I think we need to move from vague ideas to
> > clear and distinct plans. Whatever that means. :-)
>
> See above.
>
> > I understand you are concerned about the level of changes introduced
> > to the PSCI code. As I stated somewhere in earlier replies, I picked
> > that approach as I thought it was better to implement things in a PSCI
> > specific manner to start with, then we could move things around, when
> > we realize that it make sense.
>
> I am also concerned about how the idle states are managed in
> this patchset and I am pretty certain it will break when we
> move away from a simple hierarchy with one CPU state and one
> cluster state, we will comment on the specifics.

The intent with the series is that this should be supported, no matter
of the number of states or the level of hierarchy.

Well, there are some limitations/bugs in genpd (and the genpd
governor) to support a greater level than 2, but that is on my TODO
list to fix. Again, some of my slides from OSPM Pisa explains this.

More importantly, the current deployment to PSCI should remain
unchanged after this series (unless there is a bug somewhere, as also
was pointed out by Sudeep in another reply).

>
> Moving PSCI code into a CPUidle driver will cater for the rest.

Great news, we seems to have a plan!

>
> > Anyway, as a suggestion to address your concern, how about this:
> >
> > 1. Move some things out to a PSCI cpuidle driver. We need to decide
> > more exactly on what to move and find the right level for the
> > interfaces.
>
> I will do it and post patches asap.

Okay, so I will wait for you to converting the cpuidle-arm driver into
a cpuidle-psci driver (and all the changes that comes with it) and
then base my re-base my series on top.

Then, would you mind sharing (even in an early phase) a
branch/git-tree so I can start re-basing my series on top?

>
> > 2. Don't attach the CPU to the PM domain topology in case the PSCI PC
> > mode is used. I think this makes it easier, at least as a first step,
> > to understand when runtime PM needs to be used/enabled.
>
> In the PSCI CPUidle driver we can have two distinct struct
> cpuidle_state->enter functions for PC and OSI, no overhead
> for PC, runtime PM for OSI, decoupling done.

Good idea!

>
> We can choose one or the other depending on whether:
>
> OSI iff:
> - OSI is available
> - hierarchical idle states are present in DT
>
> otherwise PC.
>
> That's what this patch does but we will do it in a unified file.

Sure, it makes sense.

>
> > 3. Would it help if I volunteer to help you guys as a maintainer for
> > PSCI. At least for the part of the new code that becomes introduced?
>
> We will do as described above if that makes sense.

Yep, I am okay with your suggestions, assuming I have understood them correctly.

BTW, have you considered to host a git tree for PSCI so we can have
changes pre-integrated and tested in Stephen Rothwell's linux-next
tree?

Kind regards
Uffe

^ permalink raw reply

* Re: [PATCH 09/18] drivers: firmware: psci: Add support for PM domains using genpd
From: Ulf Hansson @ 2019-07-18 11:04 UTC (permalink / raw)
  To: Sudeep Holla
  Cc: Lorenzo Pieralisi, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List, Lina Iyer
In-Reply-To: <20190716150533.GD7250@e107155-lin>

On Tue, 16 Jul 2019 at 17:05, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Mon, May 13, 2019 at 09:22:51PM +0200, Ulf Hansson wrote:
> > When the hierarchical CPU topology layout is used in DT, we need to setup
> > the corresponding PM domain data structures, as to allow a CPU and a group
> > of CPUs to be power managed accordingly. Let's enable this by deploying
> > support through the genpd interface.
> >
> > Additionally, when the OS initiated mode is supported by the PSCI FW, let's
> > also parse the domain idle states DT bindings as to make genpd responsible
> > for the state selection, when the states are compatible with
> > "domain-idle-state". Otherwise, when only Platform Coordinated mode is
> > supported, we rely solely on the state selection to be managed through the
> > regular cpuidle framework.
> >
> > If the initialization of the PM domain data structures succeeds and the OS
> > initiated mode is supported, we try to switch to it. In case it fails,
> > let's fall back into a degraded mode, rather than bailing out and returning
> > an error code.
> >
> > Due to that the OS initiated mode may become enabled, we need to adjust to
> > maintain backwards compatibility for a kernel started through a kexec call.
> > Do this by explicitly switch to Platform Coordinated mode during boot.
> >
> > Finally, the actual initialization of the PM domain data structures, is
> > done via calling the new shared function, psci_dt_init_pm_domains().
> > However, this is implemented by subsequent changes.
> >
> > Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
> > Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
> > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > ---
> >
> > Changes:
> >       - Simplify code setting domain_state at power off.
> >       - Use the genpd ->free_state() callback to manage freeing of states.
> >       - Fixup a bogus while loop.
> >
> > ---
> >  drivers/firmware/psci/Makefile         |   2 +-
> >  drivers/firmware/psci/psci.c           |   7 +-
> >  drivers/firmware/psci/psci.h           |   5 +
> >  drivers/firmware/psci/psci_pm_domain.c | 268 +++++++++++++++++++++++++
> >  4 files changed, 280 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/firmware/psci/psci_pm_domain.c
> >
>
> [...]
>
> >  #endif /* __PSCI_H */
> > diff --git a/drivers/firmware/psci/psci_pm_domain.c b/drivers/firmware/psci/psci_pm_domain.c
> > new file mode 100644
> > index 000000000000..3c6ca846caf4
> > --- /dev/null
> > +++ b/drivers/firmware/psci/psci_pm_domain.c
> > @@ -0,0 +1,268 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * PM domains for CPUs via genpd - managed by PSCI.
> > + *
> > + * Copyright (C) 2019 Linaro Ltd.
> > + * Author: Ulf Hansson <ulf.hansson@linaro.org>
> > + *
> > + */
> > +
>
> [...]
>
> > +static int psci_pd_power_off(struct generic_pm_domain *pd)
> > +{
> > +     struct genpd_power_state *state = &pd->states[pd->state_idx];
> > +     u32 *pd_state;
> > +
> > +     /* If we have failed to enable OSI mode, then abort power off. */
> > +     if (psci_has_osi_support() && !osi_mode_enabled)
> > +             return -EBUSY;
> > +
> > +     if (!state->data)
> > +             return 0;
> > +
> > +     /* When OSI mode is enabled, set the corresponding domain state. */
> > +     pd_state = state->data;
> > +     psci_set_domain_state(*pd_state);
>
> I trying to understand how would this scale to level 2(cluster of
> clusters or for simply system). The current code for psci_set_domain_state
> just stores the value @pd_state into per-cpu domain_state. E.g.: Now if
> the system level pd is getting called after cluster PD, it will set the
> domain state to system level PD state. It won't work with original
> format and it may work with extended format if it's carefully crafted.
> In short, the point is just over-writing domain_state is asking for
> troubles IMO.

Thanks for spotting this!

While walking upwards in the PM domain topology, I thought I was ORing
the domain states, but clearly the code isn't doing that.

In principle we need to do the below instead.

pd_state = state->data;
composite_pd_state = *pd_state | psci_get_domain_state();
psci_set_domain_state(composite_pd_state);

Kind regards
Uffe

^ permalink raw reply

* Re: [PATCH 18/18] arm64: dts: hikey: Convert to the hierarchical CPU topology layout
From: Ulf Hansson @ 2019-07-18 10:48 UTC (permalink / raw)
  To: Sudeep Holla
  Cc: Lorenzo Pieralisi, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List, Wei Xu
In-Reply-To: <20190716144744.GB7250@e107155-lin>

On Tue, 16 Jul 2019 at 16:47, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Mon, May 13, 2019 at 09:23:00PM +0200, Ulf Hansson wrote:
> > To enable the OS to manage last-man standing activities for a CPU, while an
> > idle state for a group of CPUs is selected, let's convert the Hikey
> > platform into using the hierarchical CPU topology layout.
> >
> > Cc: Wei Xu <xuwei5@hisilicon.com>
> > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > ---
> >
> > Changes:
> >       - None.
> >
> > ---
> >  arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 87 ++++++++++++++++++++---
> >  1 file changed, 76 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
> > index 108e2a4227f6..36ff460f428f 100644
> > --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
> > +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
> >       cpus {
>
> [...]
>
> > @@ -70,9 +128,8 @@
> >                       };
> >
> >                       CLUSTER_SLEEP: cluster-sleep {
> > -                             compatible = "arm,idle-state";
> > -                             local-timer-stop;
> > -                             arm,psci-suspend-param = <0x1010000>;
> > +                             compatible = "domain-idle-state";
> > +                             arm,psci-suspend-param = <0x1000000>;
> >                               entry-latency-us = <1000>;
> >                               exit-latency-us = <700>;
> >                               min-residency-us = <2700>;
>
> Again this must be original format and as per PSCI spec, your patch
> changes this cluster sleep state into cluster retention state which I
> think is not what you intended.

If the hierarchical topology is used, the parameter for cluster states
are ORed with the deepest idle state for the CPU.

CPU_SLEEP: 0x0010000
CLUSTER_SLEEP: 0x1000000

After the ORed operation
CLUSTER_SLEEP: 0x1010000

So, this indeed works as expected.

However, are you saying that ORing the state parameters like above has
other problems? I am reading your other replies...

Kind regards
Uffe

^ permalink raw reply

* Re: [PATCH 14/18] drivers: firmware: psci: Manage runtime PM in the idle path for CPUs
From: Ulf Hansson @ 2019-07-18 10:35 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: Sudeep Holla, Mark Rutland, Linux ARM, Rafael J . Wysocki,
	Daniel Lezcano, Raju P . L . S . S . S . N, Amit Kucheria,
	Bjorn Andersson, Stephen Boyd, Niklas Cassel, Tony Lindgren,
	Kevin Hilman, Lina Iyer, Viresh Kumar, Vincent Guittot,
	Geert Uytterhoeven, Souvik Chakravarty, Linux PM, linux-arm-msm,
	Linux Kernel Mailing List
In-Reply-To: <20190716155317.GB32490@e121166-lin.cambridge.arm.com>

On Tue, 16 Jul 2019 at 17:53, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Mon, May 13, 2019 at 09:22:56PM +0200, Ulf Hansson wrote:
> > When the hierarchical CPU topology layout is used in DT, let's allow the
> > CPU to be power managed through its PM domain, via deploying runtime PM
> > support.
> >
> > To know for which idle states runtime PM reference counting is needed,
> > let's store the index of deepest idle state for the CPU, in a per CPU
> > variable. This allows psci_cpu_suspend_enter() to compare this index with
> > the requested idle state index and then act accordingly.
>
> I do not see why a system with two CPU CPUidle states, say CPU retention
> and CPU shutdown, should not be calling runtime PM on CPU retention
> entry.

If the CPU idle governor did select the CPU retention for the CPU, it
was probably because the target residency for the CPU shutdown state
could not be met.

In this case, there is no point in allowing any other deeper idle
states for cluster/package/system, since those have even greater
residencies, hence calling runtime PM doesn't make sense.

>
> The question then is what cluster/package/system states
> are allowed for a given CPU idle state, to understand
> what idle states can be actually entered at any hierarchy
> level given the choice made for the CPU idle state.
>
> In the case above, a CPU entering retention state should prevent
> runtime PM selecting a cluster shutdown state; most likely firmware
> would demote the request to cluster retention but still, we should
> find a way to describe these dependencies.

See above.

[...]

Kind regards
Uffe

^ permalink raw reply

* Re: [PATCH] Revert "cpufreq: schedutil: Don't set next_freq to UINT_MAX"
From: Viresh Kumar @ 2019-07-18 10:28 UTC (permalink / raw)
  To: Doug Smythies; +Cc: rjw, joel, linux-kernel, linux-pm, dsmythies
In-Reply-To: <1563431200-3042-1-git-send-email-dsmythies@telus.net>

On 17-07-19, 23:26, Doug Smythies wrote:
> This reverts commit ecd2884291261e3fddbc7651ee11a20d596bb514.
> 
> The commit caused a regression whereby reducing the maximum
> CPU clock frequency is ineffective while busy, and the CPU
> clock remains unchanged. Once the system has experienced
> some idle time, the new limit is implemented.

Can you explain why this patch caused that issue ? I am sorry but I couldn't
understand it from your email. How are we trying to reduce the frequency? Is
clk_set_rate() getting called with that finally and not working ?

> A consequence is that any thermal throttling monitoring
> and control based on max freq limits fail to respond
> in a timely manor, if at all, to a thermal temperature
> trip on a busy system.

-- 
viresh

^ permalink raw reply

* Re: [PATCH v4 05/24] PM / devfreq: tegra30: Set up watermarks properly
From: Chanwoo Choi @ 2019-07-18 10:17 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-6-digetx@gmail.com>

On 19. 7. 8. 오전 7:32, Dmitry Osipenko wrote:
> The current implementation is inaccurate and results in very intensive
> interrupt activity, which neglects the whole idea of polling offload to
> hardware. The reason of the shortcoming is that watermarks are not set
> up correctly and this results in ACTMON constantly asking to change freq
> and then these requests are ignored. The end result of this patch is that
> there are few hundreds of ACTMON's interrupts instead of tens thousands
> after few minutes of a working devfreq, meanwhile the transitions activity
> stays about the same and governor becomes more reactive.
> 
> Since watermarks are set precisely correct now, the boosting logic is
> changed a tad to accommodate the change. The "average sustain coefficient"
> multiplier is gone now since there is no need to compensate the improper
> watermarks and EMC frequency-bump happens once boosting hits the upper
> watermark enough times, depending on the per-device boosting threshold.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 293 +++++++++++++++++++++---------
>  1 file changed, 209 insertions(+), 84 deletions(-)
> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 4be7858c33bc..16f7e6cf3b99 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -47,6 +47,8 @@
>  
>  #define ACTMON_DEV_INTR_CONSECUTIVE_UPPER			BIT(31)
>  #define ACTMON_DEV_INTR_CONSECUTIVE_LOWER			BIT(30)
> +#define ACTMON_DEV_INTR_AVG_BELOW_WMARK				BIT(25)
> +#define ACTMON_DEV_INTR_AVG_ABOVE_WMARK				BIT(24)
>  
>  #define ACTMON_ABOVE_WMARK_WINDOW				1
>  #define ACTMON_BELOW_WMARK_WINDOW				3
> @@ -63,9 +65,8 @@
>   * ACTMON_AVERAGE_WINDOW_LOG2: default value for @DEV_CTRL_K_VAL, which
>   * translates to 2 ^ (K_VAL + 1). ex: 2 ^ (6 + 1) = 128
>   */
> -#define ACTMON_AVERAGE_WINDOW_LOG2			6
> -#define ACTMON_SAMPLING_PERIOD				12 /* ms */
> -#define ACTMON_DEFAULT_AVG_BAND				6  /* 1/10 of % */
> +#define ACTMON_AVERAGE_WINDOW_LOG2				6
> +#define ACTMON_SAMPLING_PERIOD					12 /* ms */
>  
>  #define KHZ							1000
>  
> @@ -142,9 +143,6 @@ struct tegra_devfreq_device {
>  	 * watermark breaches.
>  	 */
>  	unsigned long boost_freq;
> -
> -	/* Optimal frequency calculated from the stats for this device */
> -	unsigned long target_freq;
>  };
>  
>  struct tegra_devfreq {
> @@ -156,7 +154,6 @@ struct tegra_devfreq {
>  
>  	struct clk		*emc_clock;
>  	unsigned long		max_freq;
> -	unsigned long		cur_freq;
>  	struct notifier_block	rate_change_nb;
>  
>  	struct tegra_devfreq_device devices[ARRAY_SIZE(actmon_device_configs)];
> @@ -205,42 +202,182 @@ static unsigned long do_percent(unsigned long val, unsigned int pct)
>  	return val * pct / 100;
>  }
>  
> +static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra)
> +{
> +	struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
> +	unsigned int cpu_freq = cpufreq_get(0);
> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
> +		if (cpu_freq >= ratio->cpu_freq) {
> +			if (ratio->emc_freq >= tegra->max_freq)
> +				return tegra->max_freq;
> +			else
> +				return ratio->emc_freq;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static unsigned long
> +tegra_actmon_account_cpu_freq(struct tegra_devfreq *tegra,
> +			      struct tegra_devfreq_device *dev,
> +			      unsigned long target_freq)
> +{
> +	unsigned long static_cpu_emc_freq;
> +
> +	if (dev->config->avg_dependency_threshold &&
> +	    dev->config->avg_dependency_threshold < dev->avg_count) {
> +		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra);
> +		target_freq = max(target_freq, static_cpu_emc_freq);
> +	}
> +
> +	return target_freq;
> +}
> +
> +static unsigned long tegra_actmon_lower_freq(struct tegra_devfreq *tegra,
> +					     unsigned long target_freq)
> +{
> +	unsigned long lower = target_freq;
> +	struct dev_pm_opp *opp;
> +
> +	opp = dev_pm_opp_find_freq_floor(tegra->devfreq->dev.parent, &lower);
> +	if (IS_ERR(opp))
> +		lower = 0;
> +	else
> +		dev_pm_opp_put(opp);
> +
> +	return lower;
> +}
> +
> +static unsigned long tegra_actmon_upper_freq(struct tegra_devfreq *tegra,
> +					     unsigned long target_freq)
> +{
> +	unsigned long upper = target_freq + 1;
> +	struct dev_pm_opp *opp;
> +
> +	opp = dev_pm_opp_find_freq_ceil(tegra->devfreq->dev.parent, &upper);
> +	if (IS_ERR(opp))
> +		upper = ULONG_MAX;
> +	else
> +		dev_pm_opp_put(opp);
> +
> +	return upper;
> +}
> +
> +static void tegra_actmon_get_lower_upper(struct tegra_devfreq *tegra,
> +					 struct tegra_devfreq_device *dev,
> +					 unsigned long target_freq,
> +					 unsigned long *lower,
> +					 unsigned long *upper)
> +{
> +	/*
> +	 * Memory frequencies are guaranteed to have 1MHz granularity
> +	 * and thus we need this rounding down to get a proper watermarks
> +	 * range in a case where target_freq falls into a range of
> +	 * next_possible_opp_freq - 1MHz.
> +	 */
> +	target_freq = round_down(target_freq, 1000000);
> +
> +	/* watermarks are set at the borders of the corresponding OPPs */
> +	*lower = tegra_actmon_lower_freq(tegra, target_freq);
> +	*upper = tegra_actmon_upper_freq(tegra, target_freq);
> +
> +	*lower /= KHZ;
> +	*upper /= KHZ;
> +
> +	/*
> +	 * The upper watermark should take into account CPU's frequency
> +	 * because cpu_to_emc_rate() may override the target_freq with
> +	 * a higher value and thus upper watermark need to be set up
> +	 * accordingly to avoid parasitic upper-events.
> +	 */
> +	*upper = tegra_actmon_account_cpu_freq(tegra, dev, *upper);
> +
> +	*lower *= ACTMON_SAMPLING_PERIOD;
> +	*upper *= ACTMON_SAMPLING_PERIOD;
> +}
> +
>  static void tegra_devfreq_update_avg_wmark(struct tegra_devfreq *tegra,
>  					   struct tegra_devfreq_device *dev)
>  {
> -	u32 avg = dev->avg_count;
> -	u32 avg_band_freq = tegra->max_freq * ACTMON_DEFAULT_AVG_BAND / KHZ;
> -	u32 band = avg_band_freq * ACTMON_SAMPLING_PERIOD;
> +	unsigned long lower, upper, freq;
>  
> -	device_writel(dev, avg + band, ACTMON_DEV_AVG_UPPER_WMARK);
> +	freq = dev->avg_count / ACTMON_SAMPLING_PERIOD * KHZ;
> +	tegra_actmon_get_lower_upper(tegra, dev, freq, &lower, &upper);
>  
> -	avg = max(dev->avg_count, band);
> -	device_writel(dev, avg - band, ACTMON_DEV_AVG_LOWER_WMARK);
> +	/*
> +	 * We want to get interrupts when MCCPU client crosses the
> +	 * dependency threshold in order to take into / out of account
> +	 * the CPU's freq.
> +	 */
> +	if (lower < dev->config->avg_dependency_threshold &&
> +	    upper > dev->config->avg_dependency_threshold) {
> +		if (dev->avg_count < dev->config->avg_dependency_threshold)
> +			upper = dev->config->avg_dependency_threshold;
> +		else
> +			lower = dev->config->avg_dependency_threshold;
> +	}
> +
> +	device_writel(dev, lower, ACTMON_DEV_AVG_LOWER_WMARK);
> +	device_writel(dev, upper, ACTMON_DEV_AVG_UPPER_WMARK);
>  }
>  
>  static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
> -				       struct tegra_devfreq_device *dev)
> +				       struct tegra_devfreq_device *dev,
> +				       unsigned long freq)
>  {
> -	u32 val = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
> +	unsigned long lower, upper, delta;
> +
> +	/*
> +	 * Boosting logic kicks-in once lower / upper watermark is hit.
> +	 * The watermarks are based on the updated EMC rate and the
> +	 * average activity.
> +	 *
> +	 * The higher watermark is set in accordance to the EMC rate
> +	 * because we want to set it to the highest mark here and EMC rate
> +	 * represents that mark. The consecutive-upper interrupts are
> +	 * always enabled and we don't want to receive them if they won't
> +	 * do anything useful, hence the upper watermark is capped to maximum.
> +	 * Note that the EMC rate is changed once boosting pushed the rate
> +	 * too high, in that case boosting-up will be stopped because
> +	 * upper watermark is much higher now and it is *important* to
> +	 * stop the unwanted interrupts.
> +	 */
> +	tegra_actmon_get_lower_upper(tegra, dev, freq - 1, &lower, &upper);
> +
> +	delta = do_percent(upper - lower, dev->config->boost_up_threshold);
> +	device_writel(dev, lower + delta, ACTMON_DEV_UPPER_WMARK);
>  
> -	device_writel(dev, do_percent(val, dev->config->boost_up_threshold),
> -		      ACTMON_DEV_UPPER_WMARK);
> +	/*
> +	 * Meanwhile the lower mark is based on the average value
> +	 * because it is the lowest possible consecutive-mark for this
> +	 * device. Once that mark is hit and boosting is stopped, the
> +	 * interrupt is disabled by ISR.
> +	 */
> +	freq = dev->avg_count / ACTMON_SAMPLING_PERIOD * KHZ;
> +	tegra_actmon_get_lower_upper(tegra, dev, freq, &lower, &upper);
>  
> -	device_writel(dev, do_percent(val, dev->config->boost_down_threshold),
> -		      ACTMON_DEV_LOWER_WMARK);
> +	delta = do_percent(upper - lower, dev->config->boost_down_threshold);
> +	device_writel(dev, lower + delta, ACTMON_DEV_LOWER_WMARK);
>  }
>  
>  static void actmon_isr_device(struct tegra_devfreq *tegra,
>  			      struct tegra_devfreq_device *dev)
>  {
> -	u32 intr_status, dev_ctrl;
> +	u32 intr_status, dev_ctrl, avg_intr_mask;
>  
>  	dev->avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT);
> -	tegra_devfreq_update_avg_wmark(tegra, dev);
> -
>  	intr_status = device_readl(dev, ACTMON_DEV_INTR_STATUS);
>  	dev_ctrl = device_readl(dev, ACTMON_DEV_CTRL);
>  
> +	avg_intr_mask = ACTMON_DEV_INTR_AVG_BELOW_WMARK |
> +			ACTMON_DEV_INTR_AVG_ABOVE_WMARK;
> +
> +	if (intr_status & avg_intr_mask)
> +		tegra_devfreq_update_avg_wmark(tegra, dev);
> +
>  	if (intr_status & ACTMON_DEV_INTR_CONSECUTIVE_UPPER) {
>  		/*
>  		 * new_boost = min(old_boost * up_coef + step, max_freq)
> @@ -253,8 +390,6 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
>  
>  		if (dev->boost_freq >= tegra->max_freq)
>  			dev->boost_freq = tegra->max_freq;
> -		else
> -			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
>  	} else if (intr_status & ACTMON_DEV_INTR_CONSECUTIVE_LOWER) {
>  		/*
>  		 * new_boost = old_boost * down_coef
> @@ -263,63 +398,37 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
>  		dev->boost_freq = do_percent(dev->boost_freq,
>  					     dev->config->boost_down_coeff);
>  
> -		dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_ABOVE_WMARK_EN;
> -
>  		if (dev->boost_freq < (ACTMON_BOOST_FREQ_STEP >> 1))
>  			dev->boost_freq = 0;
> -		else
> -			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
>  	}
>  
> -	if (dev->config->avg_dependency_threshold) {
> -		if (dev->avg_count >= dev->config->avg_dependency_threshold)
> -			dev_ctrl |= ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
> -		else if (dev->boost_freq == 0)
> -			dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
> +	if (intr_status & avg_intr_mask) {
> +		/*
> +		 * Once average watermark is hit, it means that the memory
> +		 * activity changed significantly and thus boosting-up shall
> +		 * be reset because EMC clock rate will be changed and
> +		 * boosting will restart in this case.
> +		 */
> +		dev->boost_freq = 0;
>  	}
>  
> -	device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
> +	/* no boosting => no need for consecutive-down interrupt */
> +	if (dev->boost_freq == 0)
> +		dev_ctrl &= ~ACTMON_DEV_CTRL_CONSECUTIVE_BELOW_WMARK_EN;
>  
> +	device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
>  	device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
>  }
>  
> -static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
> -					    unsigned long cpu_freq)
> -{
> -	unsigned int i;
> -	struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
> -
> -	for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
> -		if (cpu_freq >= ratio->cpu_freq) {
> -			if (ratio->emc_freq >= tegra->max_freq)
> -				return tegra->max_freq;
> -			else
> -				return ratio->emc_freq;
> -		}
> -	}
> -
> -	return 0;
> -}
> -
> -static void actmon_update_target(struct tegra_devfreq *tegra,
> -				 struct tegra_devfreq_device *dev)
> +static unsigned long actmon_update_target(struct tegra_devfreq *tegra,
> +					  struct tegra_devfreq_device *dev)
>  {
> -	unsigned long cpu_freq = 0;
> -	unsigned long static_cpu_emc_freq = 0;
> -	unsigned int avg_sustain_coef;
> -
> -	if (dev->config->avg_dependency_threshold) {
> -		cpu_freq = cpufreq_get(0);
> -		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
> -	}
> +	unsigned long target_freq;
>  
> -	dev->target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD;
> -	avg_sustain_coef = 100 * 100 / dev->config->boost_up_threshold;
> -	dev->target_freq = do_percent(dev->target_freq, avg_sustain_coef);
> -	dev->target_freq += dev->boost_freq;
> +	target_freq = dev->avg_count / ACTMON_SAMPLING_PERIOD + dev->boost_freq;
> +	target_freq = tegra_actmon_account_cpu_freq(tegra, dev, target_freq);
>  
> -	if (dev->avg_count >= dev->config->avg_dependency_threshold)
> -		dev->target_freq = max(dev->target_freq, static_cpu_emc_freq);
> +	return target_freq;
>  }
>  
>  static irqreturn_t actmon_thread_isr(int irq, void *data)
> @@ -351,8 +460,8 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
>  				       unsigned long action, void *ptr)
>  {
>  	struct clk_notifier_data *data = ptr;
> -	struct tegra_devfreq *tegra;
>  	struct tegra_devfreq_device *dev;
> +	struct tegra_devfreq *tegra;
>  	unsigned int i;
>  
>  	if (action != POST_RATE_CHANGE)
> @@ -360,12 +469,28 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
>  
>  	tegra = container_of(nb, struct tegra_devfreq, rate_change_nb);
>  
> -	tegra->cur_freq = data->new_rate / KHZ;
> -
> +	/*
> +	 * EMC rate could change due to three reasons:
> +	 *
> +	 *    1. Average watermark hit
> +	 *    2. Boosting overflow
> +	 *    3. CPU freq change
> +	 *
> +	 * Once rate is changed, the consecutive watermarks need to be
> +	 * updated in order for boosting to work properly and to avoid
> +	 * unnecessary interrupts. Note that the consecutive range is set for
> +	 * all of devices using the same rate, hence if CPU is doing much
> +	 * less than the other memory clients, then its upper watermark will
> +	 * be very high in comparison to the actual activity (lower watermark)
> +	 * and thus unnecessary upper-interrupts will be suppressed.
> +	 *
> +	 * The average watermarks also should be updated because of 3.
> +	 */
>  	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
>  		dev = &tegra->devices[i];
>  
> -		tegra_devfreq_update_wmark(tegra, dev);
> +		tegra_devfreq_update_avg_wmark(tegra, dev);
> +		tegra_devfreq_update_wmark(tegra, dev, data->new_rate);
>  	}
>  
>  	return NOTIFY_OK;
> @@ -374,15 +499,14 @@ static int tegra_actmon_rate_notify_cb(struct notifier_block *nb,
>  static void tegra_actmon_configure_device(struct tegra_devfreq *tegra,
>  					  struct tegra_devfreq_device *dev)
>  {
> -	u32 val = 0;
> -
> -	dev->target_freq = tegra->cur_freq;
> +	u32 val = 0, target_freq;
>  
> -	dev->avg_count = tegra->cur_freq * ACTMON_SAMPLING_PERIOD;
> +	target_freq = clk_get_rate(tegra->emc_clock) / KHZ;
> +	dev->avg_count = target_freq * ACTMON_SAMPLING_PERIOD;
>  	device_writel(dev, dev->avg_count, ACTMON_DEV_INIT_AVG);
>  
>  	tegra_devfreq_update_avg_wmark(tegra, dev);
> -	tegra_devfreq_update_wmark(tegra, dev);
> +	tegra_devfreq_update_wmark(tegra, dev, target_freq);
>  
>  	device_writel(dev, ACTMON_COUNT_WEIGHT, ACTMON_DEV_COUNT_WEIGHT);
>  	device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
> @@ -469,13 +593,13 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
>  	struct tegra_devfreq_device *actmon_dev;
>  	unsigned long cur_freq;
>  
> -	cur_freq = READ_ONCE(tegra->cur_freq);
> +	cur_freq = clk_get_rate(tegra->emc_clock);
>  
>  	/* To be used by the tegra governor */
>  	stat->private_data = tegra;
>  
>  	/* The below are to be used by the other governors */
> -	stat->current_frequency = cur_freq * KHZ;
> +	stat->current_frequency = cur_freq;
>  
>  	actmon_dev = &tegra->devices[MCALL];
>  
> @@ -486,7 +610,7 @@ static int tegra_devfreq_get_dev_status(struct device *dev,
>  	stat->busy_time *= 100 / BUS_SATURATION_RATIO;
>  
>  	/* Number of cycles in a sampling period */
> -	stat->total_time = ACTMON_SAMPLING_PERIOD * cur_freq;
> +	stat->total_time = cur_freq / KHZ * ACTMON_SAMPLING_PERIOD;
>  
>  	stat->busy_time = min(stat->busy_time, stat->total_time);
>  
> @@ -505,6 +629,7 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
>  	struct devfreq_dev_status *stat;
>  	struct tegra_devfreq *tegra;
>  	struct tegra_devfreq_device *dev;
> +	unsigned long dev_target_freq;
>  	unsigned long target_freq = 0;
>  	unsigned int i;
>  	int err;
> @@ -520,9 +645,9 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
>  	for (i = 0; i < ARRAY_SIZE(tegra->devices); i++) {
>  		dev = &tegra->devices[i];
>  
> -		actmon_update_target(tegra, dev);
> +		dev_target_freq = actmon_update_target(tegra, dev);
>  
> -		target_freq = max(target_freq, dev->target_freq);
> +		target_freq = max(target_freq, dev_target_freq);
>  	}
>  
>  	*freq = target_freq * KHZ;
> @@ -642,7 +767,6 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
>  		return rate;
>  	}
>  
> -	tegra->cur_freq = clk_get_rate(tegra->emc_clock) / KHZ;
>  	tegra->max_freq = rate / KHZ;
>  
>  	for (i = 0; i < ARRAY_SIZE(actmon_device_configs); i++) {
> @@ -671,7 +795,8 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
>  	platform_set_drvdata(pdev, tegra);
>  
>  	tegra->rate_change_nb.notifier_call = tegra_actmon_rate_notify_cb;
> -	err = clk_notifier_register(tegra->emc_clock, &tegra->rate_change_nb);
> +	err = clk_notifier_register(tegra->emc_clock,
> +				    &tegra->rate_change_nb);
>  	if (err) {
>  		dev_err(&pdev->dev,
>  			"Failed to register rate change notifier\n");
> 


Maybe, it is possible to merge patch4/patch19/patch20 to one patch.


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 21/24] PM / devfreq: tegra30: Synchronize average count on target's update
From: Chanwoo Choi @ 2019-07-18 10:15 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-22-digetx@gmail.com>

On 19. 7. 8. 오전 7:33, Dmitry Osipenko wrote:
> The average count may get out of sync if interrupt was disabled / avoided
> for a long time due to upper watermark optimization, hence it should be
> re-synced on each target's update to ensure that watermarks are set up
> correctly on EMC rate-change notification and that a correct frequency
> is selected for device.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 4d582809acb6..8a674fad26be 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -466,11 +466,41 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
>  			      dev->boost_freq, cpufreq_get(0));
>  }
>  
> +static void tegra_devfreq_sync_avg_count(struct tegra_devfreq *tegra,
> +					 struct tegra_devfreq_device *dev)
> +{
> +	u32 avg_count, avg_freq, old_upper, new_upper;
> +
> +	avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT);
> +	avg_freq = avg_count / ACTMON_SAMPLING_PERIOD;
> +
> +	old_upper = tegra_actmon_upper_freq(tegra, dev->avg_freq);
> +	new_upper = tegra_actmon_upper_freq(tegra, avg_freq);
> +
> +	/* similar to ISR, see comments in actmon_isr_device() */
> +	if (old_upper != new_upper) {
> +		dev->avg_freq = avg_freq;
> +		dev->boost_freq = 0;
> +	}
> +}
> +
>  static unsigned long actmon_update_target(struct tegra_devfreq *tegra,
>  					  struct tegra_devfreq_device *dev)
>  {
>  	unsigned long target_freq;
>  
> +	/*
> +	 * The avg_count / avg_freq is getting snapshoted on device's
> +	 * interrupt, but there are cases where actual value need to
> +	 * be utilized on target's update, like CPUFreq boosting and
> +	 * overriding the min freq via /sys/class/devfreq/devfreq0/min_freq
> +	 * because we're optimizing the upper watermark based on the
> +	 * actual EMC frequency. This means that interrupt may be
> +	 * inactive for a long time and thus making snapshoted value
> +	 * outdated.
> +	 */
> +	tegra_devfreq_sync_avg_count(tegra, dev);

I think that you don't need to add the separate function to calculate
the 'dev->avg_freq'. It is enough with your detailed comment to add
this code in this function.

> +
>  	target_freq = min(dev->avg_freq + dev->boost_freq, KHZ_MAX);
>  	target_freq = tegra_actmon_account_cpu_freq(tegra, dev, target_freq);
>  
> 

And also, is it impossible to squash this patch with patch19/patch20?

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 23/24] PM / devfreq: tegra30: Increase sampling period to 16ms
From: Chanwoo Choi @ 2019-07-18 10:00 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-24-digetx@gmail.com>

On 19. 7. 8. 오전 7:33, Dmitry Osipenko wrote:
> Increase sampling period by 4ms to get a nicer pow2 value, converting
> diving into shifts in the code. That's more preferable for Tegra30 that
> doesn't have hardware divider instructions because of older Cortex-A9 CPU.
> In a result boosting events are delayed by 4ms, which is not sensible in
> practice at all.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 19e872a64148..bde19b758643 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -71,7 +71,7 @@
>   * translates to 2 ^ (K_VAL + 1). ex: 2 ^ (6 + 1) = 128
>   */
>  #define ACTMON_AVERAGE_WINDOW_LOG2				6
> -#define ACTMON_SAMPLING_PERIOD					12 /* ms */
> +#define ACTMON_SAMPLING_PERIOD					16 /* ms */
>  
>  #define KHZ							1000
>  
> 

Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 22/24] PM / devfreq: tegra30: Include appropriate header
From: Chanwoo Choi @ 2019-07-18  9:58 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-23-digetx@gmail.com>

On 19. 7. 8. 오전 7:33, Dmitry Osipenko wrote:
> It's not very correct to include mod_devicetable.h for the OF device
> drivers and of_device.h should be included instead.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 8a674fad26be..19e872a64148 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -13,7 +13,7 @@
>  #include <linux/io.h>
>  #include <linux/irq.h>
>  #include <linux/module.h>
> -#include <linux/mod_devicetable.h>
> +#include <linux/of_device.h>
>  #include <linux/platform_device.h>
>  #include <linux/pm_opp.h>
>  #include <linux/reset.h>
> 

Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>

Is not there other unused header file anymore?

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 24/24] PM / devfreq: tegra20/30: Add Dmitry as a maintainer
From: Chanwoo Choi @ 2019-07-18  9:56 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-25-digetx@gmail.com>

On 19. 7. 8. 오전 7:33, Dmitry Osipenko wrote:
> I was contributing to the NVIDIA Tegra20+ devfreq drivers recently and
> want to help keep them working and evolving in the future.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  MAINTAINERS | 9 +++++++++
>  1 file changed, 9 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 95d4bd85df44..4e47ce737376 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -10356,6 +10356,15 @@ F:	include/linux/memblock.h
>  F:	mm/memblock.c
>  F:	Documentation/core-api/boot-time-mm.rst
>  
> +MEMORY FREQUENCY SCALING DRIVERS FOR NVIDIA TEGRA
> +M:	Dmitry Osipenko <digetx@gmail.com>
> +L:	linux-pm@vger.kernel.org
> +L:	linux-tegra@vger.kernel.org
> +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
> +S:	Maintained
> +F:	drivers/devfreq/tegra20-devfreq.c
> +F:	drivers/devfreq/tegra30-devfreq.c
> +
>  MEMORY MANAGEMENT
>  L:	linux-mm@kvack.org
>  W:	http://www.linux-mm.org
> 

Acked-by: Chanwoo Choi <cw00.choi@samsung.com>


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 19/24] PM / devfreq: tegra30: Optimize upper consecutive watermark selection
From: Chanwoo Choi @ 2019-07-18  9:51 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-20-digetx@gmail.com>

On 19. 7. 8. 오전 7:32, Dmitry Osipenko wrote:
> The memory activity counter may get a bit higher than a watermark which
> is selected based on OPP that corresponds to a highest EMC rate, in this
> case watermark is lower than the actual memory activity is and thus
> results in unwanted "upper" interrupts.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 13 ++++++++++++-
>  1 file changed, 12 insertions(+), 1 deletion(-)

It seems that you can combine patch19 with patch20.

> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 8d6bf6e9f1ae..c3cf87231d25 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -363,7 +363,18 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
>  	tegra_actmon_get_lower_upper(tegra, dev, freq - 1, &lower, &upper);
>  
>  	delta = do_percent(upper - lower, dev->config->boost_up_threshold);
> -	device_writel(dev, lower + delta, ACTMON_DEV_UPPER_WMARK);
> +
> +	/*
> +	 * The memory events count could go a bit higher than the maximum
> +	 * defined by the OPPs, hence make the upper watermark infinitely
> +	 * high to avoid unnecessary upper interrupts in that case.
> +	 */
> +	if (freq == tegra->max_freq)
> +		upper = ULONG_MAX;
> +	else
> +		upper = lower + delta;
> +
> +	device_writel(dev, upper, ACTMON_DEV_UPPER_WMARK);
>  
>  	/*
>  	 * Meanwhile the lower mark is based on the average value
> 


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 18/24] PM / devfreq: tegra30: Optimize CPUFreq notifier
From: Chanwoo Choi @ 2019-07-18  9:48 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-19-digetx@gmail.com>

On 19. 7. 8. 오전 7:32, Dmitry Osipenko wrote:
> When CPU's memory activity is low or memory activity is high such that
> CPU's frequency contribution to the boosting is not taken into account,
> then there is no need to schedule devfreq's update. This eliminates
> unnecessary CPU activity during of idling caused by the scheduled work.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c | 73 +++++++++++++++++++++++++++----
>  1 file changed, 64 insertions(+), 9 deletions(-)

Patch4 add the 'cpufreq notifier' and this patch optimize the cpufreq notifier.
I think t hat you can combine two patches.

> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 43c9c5fbfe91..8d6bf6e9f1ae 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -216,10 +216,10 @@ static inline unsigned long do_percent(unsigned long val, unsigned int pct)
>  	return val * pct / 100;
>  }
>  
> -static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra)
> +static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra,
> +					    unsigned int cpu_freq)
>  {
>  	const struct tegra_actmon_emc_ratio *ratio = actmon_emc_ratios;
> -	unsigned int cpu_freq = cpufreq_get(0);
>  	unsigned int i;
>  
>  	for (i = 0; i < ARRAY_SIZE(actmon_emc_ratios); i++, ratio++) {
> @@ -239,15 +239,15 @@ tegra_actmon_account_cpu_freq(struct tegra_devfreq *tegra,
>  			      struct tegra_devfreq_device *dev,
>  			      unsigned long target_freq)
>  {
> -	unsigned long static_cpu_emc_freq;
> +	unsigned long cpu_emc_freq = 0;
>  
> -	if (dev->config->avg_dependency_threshold &&
> -	    dev->config->avg_dependency_threshold < dev->avg_freq) {
> -		static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra);
> -		target_freq = max(target_freq, static_cpu_emc_freq);
> -	}
> +	if (!dev->config->avg_dependency_threshold)
> +		return target_freq;
>  
> -	return target_freq;
> +	if (dev->avg_freq > dev->config->avg_dependency_threshold)
> +		cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpufreq_get(0));
> +
> +	return max(target_freq, cpu_emc_freq);
>  }
>  
>  static unsigned long tegra_actmon_lower_freq(struct tegra_devfreq *tegra,
> @@ -531,16 +531,71 @@ static void tegra_actmon_delayed_update(struct work_struct *work)
>  	mutex_unlock(&tegra->devfreq->lock);
>  }
>  
> +static unsigned long
> +tegra_actmon_cpufreq_contribution(struct tegra_devfreq *tegra,
> +				  unsigned int cpu_freq)
> +{
> +	unsigned long freq, static_cpu_emc_freq;
> +
> +	/* check whether CPU's freq is taken into account at all */
> +	if (tegra->devices[MCCPU].avg_freq <=
> +	    tegra->devices[MCCPU].config->avg_dependency_threshold)
> +		return 0;
> +
> +	static_cpu_emc_freq = actmon_cpu_to_emc_rate(tegra, cpu_freq);
> +
> +	/* compare static CPU-EMC freq with MCALL */
> +	freq = tegra->devices[MCALL].avg_freq +
> +	       tegra->devices[MCALL].boost_freq;
> +
> +	freq = tegra_actmon_upper_freq(tegra, freq);
> +
> +	if (freq == tegra->max_freq || freq >= static_cpu_emc_freq)
> +		return 0;
> +
> +	/* compare static CPU-EMC freq with MCCPU */
> +	freq = tegra->devices[MCCPU].avg_freq +
> +	       tegra->devices[MCCPU].boost_freq;
> +
> +	freq = tegra_actmon_upper_freq(tegra, freq);
> +
> +	if (freq == tegra->max_freq || freq >= static_cpu_emc_freq)
> +		return 0;
> +
> +	return static_cpu_emc_freq;
> +}
> +
>  static int tegra_actmon_cpu_notify_cb(struct notifier_block *nb,
>  				      unsigned long action, void *ptr)
>  {
> +	struct cpufreq_freqs *freqs = ptr;
>  	struct tegra_devfreq *tegra;
> +	unsigned long old, new;
>  
>  	if (action != CPUFREQ_POSTCHANGE)
>  		return NOTIFY_OK;
>  
>  	tegra = container_of(nb, struct tegra_devfreq, cpu_rate_change_nb);
>  
> +	/*
> +	 * Quickly check whether CPU frequency should be taken into account
> +	 * at all, without blocking CPUFreq's core.
> +	 */
> +	if (mutex_trylock(&tegra->devfreq->lock)) {
> +		old = tegra_actmon_cpufreq_contribution(tegra, freqs->old);
> +		new = tegra_actmon_cpufreq_contribution(tegra, freqs->new);
> +		mutex_unlock(&tegra->devfreq->lock);
> +
> +		/*
> +		 * If CPU's frequency shouldn't be taken into account at
> +		 * the moment, then there is no need to update the devfreq's
> +		 * state because ISR will re-check CPU's frequency on the
> +		 * next interrupt.
> +		 */
> +		if (old == new)
> +			return NOTIFY_OK;
> +	}
> +
>  	/*
>  	 * CPUFreq driver should support CPUFREQ_ASYNC_NOTIFICATION in order
>  	 * to allow asynchronous notifications. This means we can't block
> 


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 17/24] PM / devfreq: tegra30: Use tracepoints for debugging
From: Chanwoo Choi @ 2019-07-18  9:47 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <20190707223303.6755-18-digetx@gmail.com>

On 19. 7. 8. 오전 7:32, Dmitry Osipenko wrote:
> Debug messages create too much CPU and memory activity by themselves,
> so it's difficult to debug lower rates and catch unwanted interrupts
> that happen rarely. Tracepoints are ideal in that regards because they
> do not contribute to the sampled date at all. This allowed me to catch
> few problems which are fixed by the followup patches, without tracepoints
> it would be much harder to do.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/devfreq/tegra30-devfreq.c      |  43 +++-------
>  include/trace/events/tegra30_devfreq.h | 105 +++++++++++++++++++++++++
>  2 files changed, 117 insertions(+), 31 deletions(-)
>  create mode 100644 include/trace/events/tegra30_devfreq.h

As I knew, 'include/trace/events' don't include the header file
for only one device driver. Usually, the trace event is provided
by framework instead of each devic driver.

> 
> diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
> index 6ebf0f505767..43c9c5fbfe91 100644
> --- a/drivers/devfreq/tegra30-devfreq.c
> +++ b/drivers/devfreq/tegra30-devfreq.c
> @@ -19,6 +19,9 @@
>  #include <linux/reset.h>
>  #include <linux/workqueue.h>
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/tegra30_devfreq.h>
> +
>  #include "governor.h"
>  
>  #define ACTMON_GLB_STATUS					0x0
> @@ -283,9 +286,6 @@ static void tegra_actmon_get_lower_upper(struct tegra_devfreq *tegra,
>  					 unsigned long *lower,
>  					 unsigned long *upper)
>  {
> -	struct device *ddev = tegra->devfreq->dev.parent;
> -	u32 offset = dev->config->offset;
> -
>  	/*
>  	 * Memory frequencies are guaranteed to have 1MHz granularity
>  	 * and thus we need this rounding down to get a proper watermarks
> @@ -298,8 +298,8 @@ static void tegra_actmon_get_lower_upper(struct tegra_devfreq *tegra,
>  	*lower = tegra_actmon_lower_freq(tegra, target_freq);
>  	*upper = tegra_actmon_upper_freq(tegra, target_freq);
>  
> -	dev_dbg(ddev, "%03x: target_freq %lu lower freq %lu upper freq %lu\n",
> -		offset, target_freq, *lower, *upper);
> +	trace_device_lower_upper(dev->config->offset, target_freq,
> +				 *lower, *upper);
>  
>  	/*
>  	 * The upper watermark should take into account CPU's frequency
> @@ -377,30 +377,13 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra,
>  	device_writel(dev, lower + delta, ACTMON_DEV_LOWER_WMARK);
>  }
>  
> -static void actmon_device_debug(struct tegra_devfreq *tegra,
> -				struct tegra_devfreq_device *dev,
> -				const char *prefix)
> -{
> -	dev_dbg(tegra->devfreq->dev.parent,
> -		"%03x: %s: 0x%08x 0x%08x a %u %u %u c %u %u %u b %lu cpu %u\n",
> -		dev->config->offset, prefix,
> -		device_readl(dev, ACTMON_DEV_INTR_STATUS),
> -		device_readl(dev, ACTMON_DEV_CTRL),
> -		device_readl(dev, ACTMON_DEV_AVG_COUNT),
> -		device_readl(dev, ACTMON_DEV_AVG_LOWER_WMARK),
> -		device_readl(dev, ACTMON_DEV_AVG_UPPER_WMARK),
> -		device_readl(dev, ACTMON_DEV_COUNT),
> -		device_readl(dev, ACTMON_DEV_LOWER_WMARK),
> -		device_readl(dev, ACTMON_DEV_UPPER_WMARK),
> -		dev->boost_freq, cpufreq_get(0));
> -}
> -
>  static void actmon_isr_device(struct tegra_devfreq *tegra,
>  			      struct tegra_devfreq_device *dev)
>  {
>  	u32 intr_status, dev_ctrl, avg_intr_mask, avg_count;
>  
> -	actmon_device_debug(tegra, dev, "isr+");
> +	trace_device_isr_enter(tegra->regs, dev->config->offset,
> +			       dev->boost_freq, cpufreq_get(0));
>  
>  	intr_status = device_readl(dev, ACTMON_DEV_INTR_STATUS);
>  	avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT);
> @@ -455,7 +438,8 @@ static void actmon_isr_device(struct tegra_devfreq *tegra,
>  	device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL);
>  	device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS);
>  
> -	actmon_device_debug(tegra, dev, "isr-");
> +	trace_device_isr_exit(tegra->regs, dev->config->offset,
> +			      dev->boost_freq, cpufreq_get(0));
>  }
>  
>  static unsigned long actmon_update_target(struct tegra_devfreq *tegra,
> @@ -749,7 +733,6 @@ static struct devfreq_dev_profile tegra_devfreq_profile = {
>  static int tegra_governor_get_target(struct devfreq *devfreq,
>  				     unsigned long *freq)
>  {
> -	struct device *ddev = devfreq->dev.parent;
>  	struct devfreq_dev_status *stat;
>  	struct tegra_devfreq *tegra;
>  	struct tegra_devfreq_device *dev;
> @@ -770,13 +753,11 @@ static int tegra_governor_get_target(struct devfreq *devfreq,
>  		dev = &tegra->devices[i];
>  
>  		dev_target_freq = actmon_update_target(tegra, dev);
> -
>  		target_freq = max(target_freq, dev_target_freq);
>  
> -		dev_dbg(ddev, "%03x: upd: dev_target_freq %lu\n",
> -			dev->config->offset, dev_target_freq);
> -
> -		actmon_device_debug(tegra, dev, "upd");
> +		trace_device_target_freq(dev->config->offset, dev_target_freq);
> +		trace_device_target_update(tegra->regs, dev->config->offset,
> +					   dev->boost_freq, cpufreq_get(0));
>  	}
>  
>  	*freq = target_freq;
> diff --git a/include/trace/events/tegra30_devfreq.h b/include/trace/events/tegra30_devfreq.h
> new file mode 100644
> index 000000000000..8f264a489daf
> --- /dev/null
> +++ b/include/trace/events/tegra30_devfreq.h
> @@ -0,0 +1,105 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM tegra30_devfreq
> +
> +#if !defined(_TRACE_TEGRA30_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_TEGRA30_DEVFREQ_H
> +
> +#include <linux/io.h>
> +#include <linux/tracepoint.h>
> +#include <linux/types.h>
> +
> +DECLARE_EVENT_CLASS(device_state,
> +	TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq),
> +	TP_ARGS(base, offset, boost, cpufreq),
> +	TP_STRUCT__entry(
> +		__field(u32, offset)
> +		__field(u32, intr_status)
> +		__field(u32, ctrl)
> +		__field(u32, avg_count)
> +		__field(u32, avg_lower)
> +		__field(u32, avg_upper)
> +		__field(u32, count)
> +		__field(u32, lower)
> +		__field(u32, upper)
> +		__field(u32, boost_freq)
> +		__field(u32, cpu_freq)
> +	),
> +	TP_fast_assign(
> +		__entry->offset		= offset;
> +		__entry->intr_status	= readl_relaxed(base + offset + 0x24);
> +		__entry->ctrl		= readl_relaxed(base + offset + 0x0);
> +		__entry->avg_count	= readl_relaxed(base + offset + 0x20);
> +		__entry->avg_lower	= readl_relaxed(base + offset + 0x14);
> +		__entry->avg_upper	= readl_relaxed(base + offset + 0x10);
> +		__entry->count		= readl_relaxed(base + offset + 0x1c);
> +		__entry->lower		= readl_relaxed(base + offset + 0x8);
> +		__entry->upper		= readl_relaxed(base + offset + 0x4);
> +		__entry->boost_freq	= boost;
> +		__entry->cpu_freq	= cpufreq;
> +	),
> +	TP_printk("%03x: intr 0x%08x ctrl 0x%08x avg %010u %010u %010u cnt %010u %010u %010u boost %010u cpu %u",
> +		__entry->offset,
> +		__entry->intr_status,
> +		__entry->ctrl,
> +		__entry->avg_count,
> +		__entry->avg_lower,
> +		__entry->avg_upper,
> +		__entry->count,
> +		__entry->lower,
> +		__entry->upper,
> +		__entry->boost_freq,
> +		__entry->cpu_freq)
> +);
> +
> +DEFINE_EVENT(device_state, device_isr_enter,
> +	TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq),
> +	TP_ARGS(base, offset, boost, cpufreq));
> +
> +DEFINE_EVENT(device_state, device_isr_exit,
> +	TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq),
> +	TP_ARGS(base, offset, boost, cpufreq));
> +
> +DEFINE_EVENT(device_state, device_target_update,
> +	TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq),
> +	TP_ARGS(base, offset, boost, cpufreq));
> +
> +TRACE_EVENT(device_lower_upper,
> +	TP_PROTO(u32 offset, u32 target, u32 lower, u32 upper),
> +	TP_ARGS(offset, target, lower, upper),
> +	TP_STRUCT__entry(
> +		__field(u32, offset)
> +		__field(u32, target)
> +		__field(u32, lower)
> +		__field(u32, upper)
> +	),
> +	TP_fast_assign(
> +		__entry->offset = offset;
> +		__entry->target = target;
> +		__entry->lower = lower;
> +		__entry->upper = upper;
> +	),
> +	TP_printk("%03x: freq %010u lower freq %010u upper freq %010u",
> +		__entry->offset,
> +		__entry->target,
> +		__entry->lower,
> +		__entry->upper)
> +);
> +
> +TRACE_EVENT(device_target_freq,
> +	TP_PROTO(u32 offset, u32 target),
> +	TP_ARGS(offset, target),
> +	TP_STRUCT__entry(
> +		__field(u32, offset)
> +		__field(u32, target)
> +	),
> +	TP_fast_assign(
> +		__entry->offset = offset;
> +		__entry->target = target;
> +	),
> +	TP_printk("%03x: freq %010u", __entry->offset, __entry->target)
> +);
> +#endif /* _TRACE_TEGRA30_DEVFREQ_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> 


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* [GIT PULL] More power management updates for v5.3-rc1
From: Rafael J. Wysocki @ 2019-07-18  9:06 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Linux PM, ACPI Devel Maling List, Linux Kernel Mailing List

Hi Linus,

Please pull from the tag

 git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git \
 pm-5.3-rc1-2

with top-most commit 918e162e6a71e924a343b41f71789ad14e1e3229

 Merge branch 'pm-cpufreq'

on top of commit cf2d213e49fdf47e4c10dc629a3659e0026a54b8

 Merge tag 'pm-5.3-rc1' of
git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

to receive more power management updates for 5.3-rc1.

These modify the Intel RAPL driver to allow it to use an MMIO
interface to the hardware, make the int340X thermal driver provide
such an interface for it, add Intel Ice Lake CPU IDs to the RAPL
driver (these changes depend on the previously merged x86 arch
changes), update cpufreq to use the PM QoS framework for managing
the min and max frequency limits, and add update the imx-cpufreq-dt
cpufreq driver to support i.MX8MN.

Specifics:

 - Add MMIO interface support to the Intel RAPL power capping
   driver and update the int340X thermal driver to provide a
   RAPL MMIO interface (Zhang Rui, Stephen Rothwell).

 - Add Intel Ice Lake CPU IDs to the RAPL driver (Zhang Rui,
   Rajneesh Bhardwaj).

 - Make cpufreq use the PM QoS framework (instead of notifiers) for
   managing the min and max frequency constraints (Viresh Kumar).

 - Add i.MX8MN support to the imx-cpufreq-dt cpufreq driver (Anson
   Huang).

Thanks!


---------------

Anson Huang (1):
      cpufreq: imx-cpufreq-dt: Add i.MX8MN support

Rajneesh Bhardwaj (1):
      powercap/rapl: Add Ice Lake NNPI support to RAPL driver

Stephen Rothwell (1):
      intel_rapl: need linux/cpuhotplug.h for enum cpuhp_state

Viresh Kumar (8):
      PM / QOS: Pass request type to dev_pm_qos_{add|remove}_notifier()
      PM / QOS: Rename __dev_pm_qos_read_value() and dev_pm_qos_raw_read_value()
      PM / QOS: Pass request type to dev_pm_qos_read_value()
      PM / QoS: Add support for MIN/MAX frequency constraints
      cpufreq: Register notifiers with the PM QoS framework
      cpufreq: intel_pstate: Reuse refresh_frequency_limits()
      cpufreq: Add QoS requests for userspace constraints
      cpufreq: Make cpufreq_generic_init() return void

Zhang Rui (16):
      intel_rapl: use reg instead of msr
      intel_rapl: remove hardcoded register index
      intel_rapl: introduce intel_rapl.h
      intel_rapl: introduce struct rapl_if_private
      intel_rapl: abstract register address
      intel_rapl: abstract register access operations
      intel_rapl: cleanup some functions
      intel_rapl: cleanup hardcoded MSR access
      intel_rapl: abstract RAPL common code
      intel_rapl: support 64 bit register
      intel_rapl: support two power limits for every RAPL domain
      int340X/processor_thermal_device: add support for MMIO RAPL
      intel_rapl: Fix module autoloading issue
      powercap/intel_rapl: add support for IceLake desktop
      powercap/intel_rapl: add support for ICX
      powercap/intel_rapl: add support for ICX-D

---------------

 Documentation/power/pm_qos_interface.txt           |  12 +-
 MAINTAINERS                                        |   1 +
 drivers/base/power/domain.c                        |   8 +-
 drivers/base/power/domain_governor.c               |   4 +-
 drivers/base/power/qos.c                           | 135 +++-
 drivers/base/power/runtime.c                       |   2 +-
 drivers/cpufreq/bmips-cpufreq.c                    |  17 +-
 drivers/cpufreq/cpufreq.c                          | 216 ++++--
 drivers/cpufreq/davinci-cpufreq.c                  |   3 +-
 drivers/cpufreq/imx-cpufreq-dt.c                   |   3 +-
 drivers/cpufreq/imx6q-cpufreq.c                    |   6 +-
 drivers/cpufreq/intel_pstate.c                     |   7 +-
 drivers/cpufreq/kirkwood-cpufreq.c                 |   3 +-
 drivers/cpufreq/loongson1-cpufreq.c                |   8 +-
 drivers/cpufreq/loongson2_cpufreq.c                |   3 +-
 drivers/cpufreq/maple-cpufreq.c                    |   3 +-
 drivers/cpufreq/omap-cpufreq.c                     |  15 +-
 drivers/cpufreq/pasemi-cpufreq.c                   |   3 +-
 drivers/cpufreq/pmac32-cpufreq.c                   |   3 +-
 drivers/cpufreq/pmac64-cpufreq.c                   |   3 +-
 drivers/cpufreq/s3c2416-cpufreq.c                  |   9 +-
 drivers/cpufreq/s3c64xx-cpufreq.c                  |  15 +-
 drivers/cpufreq/s5pv210-cpufreq.c                  |   3 +-
 drivers/cpufreq/sa1100-cpufreq.c                   |   3 +-
 drivers/cpufreq/sa1110-cpufreq.c                   |   3 +-
 drivers/cpufreq/spear-cpufreq.c                    |   3 +-
 drivers/cpufreq/tegra20-cpufreq.c                  |   8 +-
 drivers/cpuidle/governor.c                         |   2 +-
 drivers/powercap/Kconfig                           |  11 +-
 drivers/powercap/Makefile                          |   3 +-
 .../powercap/{intel_rapl.c => intel_rapl_common.c} | 801 ++++++++-------------
 drivers/powercap/intel_rapl_msr.c                  | 183 +++++
 drivers/thermal/intel/int340x_thermal/Kconfig      |   6 +
 .../int340x_thermal/processor_thermal_device.c     | 173 ++++-
 include/linux/cpufreq.h                            |  14 +-
 include/linux/intel_rapl.h                         | 155 ++++
 include/linux/pm_qos.h                             |  48 +-
 37 files changed, 1196 insertions(+), 699 deletions(-)

^ permalink raw reply

* Re: [PATCH v4 12/24] PM / devfreq: tegra30: Inline all one-line functions
From: Chanwoo Choi @ 2019-07-18  9:09 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <b7da3fa2-00d1-5bd6-408c-202c85be917d@gmail.com>

On 19. 7. 16. 오후 10:35, Dmitry Osipenko wrote:
> 16.07.2019 15:26, Chanwoo Choi пишет:
>> Hi Dmitry,
>>
>> I'm not sure that it is necessary.
>> As I knew, usally, the 'inline' is used on header file
>> to define the empty functions.
>>
>> Do we have to change it with 'inline' keyword?
> 
> The 'inline' attribute tells compiler that instead of jumping into the
> function, it should take the function's code and replace the function's
> invocation with that code. This is done in order to help compiler
> optimize code properly, please see [1]. There is absolutely no need to
> create a function call into a function that consists of a single
> instruction.
> 
> [1] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Inline.html
> 

If you want to add 'inline' keyword, I recommend that 
you better to remove the modified function in this patch
and then just call the 'write_relaxed or read_relaxed' function
directly. It is same result when using inline keyword.


-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* Re: [PATCH v4 11/24] PM / devfreq: tegra30: Add debug messages
From: Chanwoo Choi @ 2019-07-18  9:07 UTC (permalink / raw)
  To: Dmitry Osipenko, Thierry Reding, MyungJoo Ham, Kyungmin Park,
	Jonathan Hunter, Tomeu Vizoso
  Cc: linux-pm, linux-tegra, linux-kernel
In-Reply-To: <f819c226-4328-c85d-5da3-932391fa6747@gmail.com>

On 19. 7. 18. 오전 12:46, Dmitry Osipenko wrote:
> 17.07.2019 9:45, Chanwoo Choi пишет:
>> On 19. 7. 16. 오후 10:26, Dmitry Osipenko wrote:
>>> 16.07.2019 15:23, Chanwoo Choi пишет:
>>>> Hi Dmitry,
>>>>
>>>> Usually, the kernel log print for all users
>>>> such as changing the frequency, fail or success.
>>>>
>>>> But, if the log just show the register dump,
>>>> it is not useful for all users. It is just used
>>>> for only specific developer.
>>>>
>>>> I recommend that you better to add more exception handling
>>>> code on many points instead of just showing the register dump.
>>>
>>> The debug messages are not users, but for developers. Yes, I primarily
>>> made the debugging to be useful for myself and will be happy to change
>>> the way debugging is done if there will be any other active developer
>>> for this driver. The registers dump is more than enough in order to
>>> understand what's going on, I don't see any real need to change anything
>>> here for now.
>>
>> Basically, we have to develop code and add the log for anyone.
>> As you commented, even if there are no other developer, we never
>> guarantee this assumption forever. And also, if added debug message
>> for only you, you can add them when testing it temporarily.
>>
>> If you want to add the just register dump log for you,
>> I can't agree. Once again, I hope that anyone understand
>> the meaning of debug message as much possible as.
>>
> 
> The registers dump should be good for everyone because it's a
> self-explanatory information for anyone who is familiar with the
> hardware. I don't think there is a need for anything else than what is
> proposed in this patch, at least for now. I also simply don't see any
> other better way to debug the state of this particular hardware, again
> this logging is for the driver developers and not for users.
> 
> Initially, I was temporarily adding the debug messages. Now they are
> pretty much mandatory for verifying that driver is working properly. And
> of course the debugging messages got into the shape of this patch after
> several iterations of refinements. So again, I suppose that this should
> be good enough for everyone who is familiar with the hardware. And of
> course I'm open to the constructive suggestions, the debugging aid is
> not an ABI and could be changed/improved at any time.
> 
> You're suggesting to break down the debugging into several smaller
> pieces, but I'm finding that as not a constructive suggestion because
> the information about the full hardware state is actually necessary for
> the productive debugging.
> 
> 

Sorry for that as I saie, I cannot agree this patch. In my case,
I don't understand what is meaning of register dump of this patch.
I knew that just register dump are useful for real developer.

If you want to show the register dump, you better to add some feature
with debugfs for devfreq framework in order to read the register dump.
As I knew, sound framework (alsa) has the similar feature for checking
the register dump.

-- 
Best Regards,
Chanwoo Choi
Samsung Electronics

^ permalink raw reply

* [PATCH] cpuidle: menu: Allow tick to be stopped if PM QoS is used
From: Rafael J. Wysocki @ 2019-07-18  8:53 UTC (permalink / raw)
  To: Linux PM; +Cc: Thomas Lindroth, LKML, Peter Zijlstra, Frederic Weisbecker

From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

After commit 554c8aa8ecad ("sched: idle: Select idle state before
stopping the tick") the menu governor prevents the scheduler tick from
being stopped (unless stopped already) if there is a PM QoS latency
constraint for the given CPU and the target residency of the deepest
idle state matching that constraint is below the tick boundary.

However, that is problematic if CPUs with PM QoS latency constraints
are idle for long times, because it effectively causes the tick to
run on them all the time which is wasteful.  [It is also confusing
and questionable if they are full dynticks CPUs.]

To address that issue, make the menu governor allow the tick to be
stopped only if the idle duration predicted by it is beyond the tick
boundary, except when the shallowest idle state is selected upfront
and it is not a "polling" one.

Fixes: 554c8aa8ecad ("sched: idle: Select idle state before stopping the tick")
Link: https://lore.kernel.org/lkml/79b247b3-e056-610e-9a07-e685dfdaa6c9@gmail.com/
Reported-by: Thomas Lindroth <thomas.lindroth@gmail.com>
Tested-by: Thomas Lindroth <thomas.lindroth@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c |   16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

Index: linux-pm/drivers/cpuidle/governors/menu.c
===================================================================
--- linux-pm.orig/drivers/cpuidle/governors/menu.c
+++ linux-pm/drivers/cpuidle/governors/menu.c
@@ -302,9 +302,10 @@ static int menu_select(struct cpuidle_dr
 	     !drv->states[0].disabled && !dev->states_usage[0].disable)) {
 		/*
 		 * In this case state[0] will be used no matter what, so return
-		 * it right away and keep the tick running.
+		 * it right away and keep the tick running if state[0] is a
+		 * polling one.
 		 */
-		*stop_tick = false;
+		*stop_tick = !(drv->states[0].flags & CPUIDLE_FLAG_POLLING);
 		return 0;
 	}

@@ -395,16 +396,9 @@ static int menu_select(struct cpuidle_dr

 			return idx;
 		}
-		if (s->exit_latency > latency_req) {
-			/*
-			 * If we break out of the loop for latency reasons, use
-			 * the target residency of the selected state as the
-			 * expected idle duration so that the tick is retained
-			 * as long as that target residency is low enough.
-			 */
-			predicted_us = drv->states[idx].target_residency;
+		if (s->exit_latency > latency_req)
 			break;
-		}
+
 		idx = i;
 	}

^ permalink raw reply

* Re: [PATCH V3] cpufreq: Make cpufreq_generic_init() return void
From: Rafael J. Wysocki @ 2019-07-18  8:47 UTC (permalink / raw)
  To: Viresh Kumar
  Cc: Markus Mayer, bcm-kernel-feedback-list, Shawn Guo, Sascha Hauer,
	Pengutronix Kernel Team, Fabio Estevam, NXP Linux Team,
	Keguang Zhang, Jiaxun Yang, Kevin Hilman, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Ellerman, Kukjin Kim, Krzysztof Kozlowski,
	Thierry Reding, Jonathan Hunter, linux-pm, Vincent Guittot,
	linux-kernel, linux-arm-kernel, linux-mips, linux-omap,
	linuxppc-dev, linux-samsung-soc, linux-tegra
In-Reply-To: <770b46d99e2fa88bc8cdfd95388374284c8b3cf8.1563249700.git.viresh.kumar@linaro.org>

On Tuesday, July 16, 2019 6:06:08 AM CEST Viresh Kumar wrote:
> It always returns 0 (success) and its return type should really be void.
> Over that, many drivers have added error handling code based on its
> return value, which is not required at all.
> 
> change its return type to void and update all the callers.
> 
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
> V2->V3:
> - Update bmips cpufreq driver to avoid "warning: 'ret' may be used
>   uninitialized".
> - Build bot reported this issue almost after 4 days of posting this
>   patch, I was expecting this a lot earlier :)
> 
>  drivers/cpufreq/bmips-cpufreq.c     | 17 ++++++-----------
>  drivers/cpufreq/cpufreq.c           |  4 +---
>  drivers/cpufreq/davinci-cpufreq.c   |  3 ++-
>  drivers/cpufreq/imx6q-cpufreq.c     |  6 ++----
>  drivers/cpufreq/kirkwood-cpufreq.c  |  3 ++-
>  drivers/cpufreq/loongson1-cpufreq.c |  8 +++-----
>  drivers/cpufreq/loongson2_cpufreq.c |  3 ++-
>  drivers/cpufreq/maple-cpufreq.c     |  3 ++-
>  drivers/cpufreq/omap-cpufreq.c      | 15 +++++----------
>  drivers/cpufreq/pasemi-cpufreq.c    |  3 ++-
>  drivers/cpufreq/pmac32-cpufreq.c    |  3 ++-
>  drivers/cpufreq/pmac64-cpufreq.c    |  3 ++-
>  drivers/cpufreq/s3c2416-cpufreq.c   |  9 ++-------
>  drivers/cpufreq/s3c64xx-cpufreq.c   | 15 +++------------
>  drivers/cpufreq/s5pv210-cpufreq.c   |  3 ++-
>  drivers/cpufreq/sa1100-cpufreq.c    |  3 ++-
>  drivers/cpufreq/sa1110-cpufreq.c    |  3 ++-
>  drivers/cpufreq/spear-cpufreq.c     |  3 ++-
>  drivers/cpufreq/tegra20-cpufreq.c   |  8 +-------
>  include/linux/cpufreq.h             |  2 +-
>  20 files changed, 46 insertions(+), 71 deletions(-)
> 
> diff --git a/drivers/cpufreq/bmips-cpufreq.c b/drivers/cpufreq/bmips-cpufreq.c
> index 56a4ebbf00e0..f7c23fa468f0 100644
> --- a/drivers/cpufreq/bmips-cpufreq.c
> +++ b/drivers/cpufreq/bmips-cpufreq.c
> @@ -131,23 +131,18 @@ static int bmips_cpufreq_exit(struct cpufreq_policy *policy)
>  static int bmips_cpufreq_init(struct cpufreq_policy *policy)
>  {
>  	struct cpufreq_frequency_table *freq_table;
> -	int ret;
>  
>  	freq_table = bmips_cpufreq_get_freq_table(policy);
>  	if (IS_ERR(freq_table)) {
> -		ret = PTR_ERR(freq_table);
> -		pr_err("%s: couldn't determine frequency table (%d).\n",
> -			BMIPS_CPUFREQ_NAME, ret);
> -		return ret;
> +		pr_err("%s: couldn't determine frequency table (%ld).\n",
> +			BMIPS_CPUFREQ_NAME, PTR_ERR(freq_table));
> +		return PTR_ERR(freq_table);
>  	}
>  
> -	ret = cpufreq_generic_init(policy, freq_table, TRANSITION_LATENCY);
> -	if (ret)
> -		bmips_cpufreq_exit(policy);
> -	else
> -		pr_info("%s: registered\n", BMIPS_CPUFREQ_NAME);
> +	cpufreq_generic_init(policy, freq_table, TRANSITION_LATENCY);
> +	pr_info("%s: registered\n", BMIPS_CPUFREQ_NAME);
>  
> -	return ret;
> +	return 0;
>  }
>  
>  static struct cpufreq_driver bmips_cpufreq_driver = {
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 4d6043ee7834..8dda62367816 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(arch_set_freq_scale);
>   * - set policies transition latency
>   * - policy->cpus with all possible CPUs
>   */
> -int cpufreq_generic_init(struct cpufreq_policy *policy,
> +void cpufreq_generic_init(struct cpufreq_policy *policy,
>  		struct cpufreq_frequency_table *table,
>  		unsigned int transition_latency)
>  {
> @@ -171,8 +171,6 @@ int cpufreq_generic_init(struct cpufreq_policy *policy,
>  	 * share the clock and voltage and clock.
>  	 */
>  	cpumask_setall(policy->cpus);
> -
> -	return 0;
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_generic_init);
>  
> diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
> index 3de48ae60c29..297d23cad8b5 100644
> --- a/drivers/cpufreq/davinci-cpufreq.c
> +++ b/drivers/cpufreq/davinci-cpufreq.c
> @@ -90,7 +90,8 @@ static int davinci_cpu_init(struct cpufreq_policy *policy)
>  	 * Setting the latency to 2000 us to accommodate addition of drivers
>  	 * to pre/post change notification list.
>  	 */
> -	return cpufreq_generic_init(policy, freq_table, 2000 * 1000);
> +	cpufreq_generic_init(policy, freq_table, 2000 * 1000);
> +	return 0;
>  }
>  
>  static struct cpufreq_driver davinci_driver = {
> diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
> index 47ccfa6b17b7..648a09a1778a 100644
> --- a/drivers/cpufreq/imx6q-cpufreq.c
> +++ b/drivers/cpufreq/imx6q-cpufreq.c
> @@ -190,14 +190,12 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
>  
>  static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
>  {
> -	int ret;
> -
>  	policy->clk = clks[ARM].clk;
> -	ret = cpufreq_generic_init(policy, freq_table, transition_latency);
> +	cpufreq_generic_init(policy, freq_table, transition_latency);
>  	policy->suspend_freq = max_freq;
>  	dev_pm_opp_of_register_em(policy->cpus);
>  
> -	return ret;
> +	return 0;
>  }
>  
>  static struct cpufreq_driver imx6q_cpufreq_driver = {
> diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
> index 7ab564c1f7ae..cb74bdc5baaa 100644
> --- a/drivers/cpufreq/kirkwood-cpufreq.c
> +++ b/drivers/cpufreq/kirkwood-cpufreq.c
> @@ -85,7 +85,8 @@ static int kirkwood_cpufreq_target(struct cpufreq_policy *policy,
>  /* Module init and exit code */
>  static int kirkwood_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> -	return cpufreq_generic_init(policy, kirkwood_freq_table, 5000);
> +	cpufreq_generic_init(policy, kirkwood_freq_table, 5000);
> +	return 0;
>  }
>  
>  static struct cpufreq_driver kirkwood_cpufreq_driver = {
> diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c
> index 21c9ce8526c0..0ea88778882a 100644
> --- a/drivers/cpufreq/loongson1-cpufreq.c
> +++ b/drivers/cpufreq/loongson1-cpufreq.c
> @@ -81,7 +81,7 @@ static int ls1x_cpufreq_init(struct cpufreq_policy *policy)
>  	struct device *cpu_dev = get_cpu_device(policy->cpu);
>  	struct cpufreq_frequency_table *freq_tbl;
>  	unsigned int pll_freq, freq;
> -	int steps, i, ret;
> +	int steps, i;
>  
>  	pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000;
>  
> @@ -103,11 +103,9 @@ static int ls1x_cpufreq_init(struct cpufreq_policy *policy)
>  	freq_tbl[i].frequency = CPUFREQ_TABLE_END;
>  
>  	policy->clk = cpufreq->clk;
> -	ret = cpufreq_generic_init(policy, freq_tbl, 0);
> -	if (ret)
> -		kfree(freq_tbl);
> +	cpufreq_generic_init(policy, freq_tbl, 0);
>  
> -	return ret;
> +	return 0;
>  }
>  
>  static int ls1x_cpufreq_exit(struct cpufreq_policy *policy)
> diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
> index da344696beed..890813e0bb76 100644
> --- a/drivers/cpufreq/loongson2_cpufreq.c
> +++ b/drivers/cpufreq/loongson2_cpufreq.c
> @@ -95,7 +95,8 @@ static int loongson2_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  	}
>  
>  	policy->clk = cpuclk;
> -	return cpufreq_generic_init(policy, &loongson2_clockmod_table[0], 0);
> +	cpufreq_generic_init(policy, &loongson2_clockmod_table[0], 0);
> +	return 0;
>  }
>  
>  static int loongson2_cpufreq_exit(struct cpufreq_policy *policy)
> diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
> index f5220b3d4ec5..28d346062166 100644
> --- a/drivers/cpufreq/maple-cpufreq.c
> +++ b/drivers/cpufreq/maple-cpufreq.c
> @@ -140,7 +140,8 @@ static unsigned int maple_cpufreq_get_speed(unsigned int cpu)
>  
>  static int maple_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> -	return cpufreq_generic_init(policy, maple_cpu_freqs, 12000);
> +	cpufreq_generic_init(policy, maple_cpu_freqs, 12000);
> +	return 0;
>  }
>  
>  static struct cpufreq_driver maple_cpufreq_driver = {
> diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
> index 29643f06a3c3..8d14b42a8c6f 100644
> --- a/drivers/cpufreq/omap-cpufreq.c
> +++ b/drivers/cpufreq/omap-cpufreq.c
> @@ -122,23 +122,18 @@ static int omap_cpu_init(struct cpufreq_policy *policy)
>  			dev_err(mpu_dev,
>  				"%s: cpu%d: failed creating freq table[%d]\n",
>  				__func__, policy->cpu, result);
> -			goto fail;
> +			clk_put(policy->clk);
> +			return result;
>  		}
>  	}
>  
>  	atomic_inc_return(&freq_table_users);
>  
>  	/* FIXME: what's the actual transition time? */
> -	result = cpufreq_generic_init(policy, freq_table, 300 * 1000);
> -	if (!result) {
> -		dev_pm_opp_of_register_em(policy->cpus);
> -		return 0;
> -	}
> +	cpufreq_generic_init(policy, freq_table, 300 * 1000);
> +	dev_pm_opp_of_register_em(policy->cpus);
>  
> -	freq_table_free();
> -fail:
> -	clk_put(policy->clk);
> -	return result;
> +	return 0;
>  }
>  
>  static int omap_cpu_exit(struct cpufreq_policy *policy)
> diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
> index 6b1e4abe3248..93f39a1d4c3d 100644
> --- a/drivers/cpufreq/pasemi-cpufreq.c
> +++ b/drivers/cpufreq/pasemi-cpufreq.c
> @@ -196,7 +196,8 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  	policy->cur = pas_freqs[cur_astate].frequency;
>  	ppc_proc_freq = policy->cur * 1000ul;
>  
> -	return cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency());
> +	cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency());
> +	return 0;
>  
>  out_unmap_sdcpwr:
>  	iounmap(sdcpwr_mapbase);
> diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
> index 650104d729f3..73621bc11976 100644
> --- a/drivers/cpufreq/pmac32-cpufreq.c
> +++ b/drivers/cpufreq/pmac32-cpufreq.c
> @@ -372,7 +372,8 @@ static int pmac_cpufreq_target(	struct cpufreq_policy *policy,
>  
>  static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> -	return cpufreq_generic_init(policy, pmac_cpu_freqs, transition_latency);
> +	cpufreq_generic_init(policy, pmac_cpu_freqs, transition_latency);
> +	return 0;
>  }
>  
>  static u32 read_gpio(struct device_node *np)
> diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
> index 1af3492a000d..d7542a106e6b 100644
> --- a/drivers/cpufreq/pmac64-cpufreq.c
> +++ b/drivers/cpufreq/pmac64-cpufreq.c
> @@ -321,7 +321,8 @@ static unsigned int g5_cpufreq_get_speed(unsigned int cpu)
>  
>  static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> -	return cpufreq_generic_init(policy, g5_cpu_freqs, transition_latency);
> +	cpufreq_generic_init(policy, g5_cpu_freqs, transition_latency);
> +	return 0;
>  }
>  
>  static struct cpufreq_driver g5_cpufreq_driver = {
> diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c
> index f7ff1ed7fef1..106910351c41 100644
> --- a/drivers/cpufreq/s3c2416-cpufreq.c
> +++ b/drivers/cpufreq/s3c2416-cpufreq.c
> @@ -447,21 +447,16 @@ static int s3c2416_cpufreq_driver_init(struct cpufreq_policy *policy)
>  	/* Datasheet says PLL stabalisation time must be at least 300us,
>  	 * so but add some fudge. (reference in LOCKCON0 register description)
>  	 */
> -	ret = cpufreq_generic_init(policy, s3c_freq->freq_table,
> +	cpufreq_generic_init(policy, s3c_freq->freq_table,
>  			(500 * 1000) + s3c_freq->regulator_latency);
> -	if (ret)
> -		goto err_freq_table;
> -
>  	register_reboot_notifier(&s3c2416_cpufreq_reboot_notifier);
>  
>  	return 0;
>  
> -err_freq_table:
>  #ifdef CONFIG_ARM_S3C2416_CPUFREQ_VCORESCALE
> -	regulator_put(s3c_freq->vddarm);
>  err_vddarm:
> -#endif
>  	clk_put(s3c_freq->armclk);
> +#endif
>  err_armclk:
>  	clk_put(s3c_freq->hclk);
>  err_hclk:
> diff --git a/drivers/cpufreq/s3c64xx-cpufreq.c b/drivers/cpufreq/s3c64xx-cpufreq.c
> index 37df2d892eb0..af0c00dabb22 100644
> --- a/drivers/cpufreq/s3c64xx-cpufreq.c
> +++ b/drivers/cpufreq/s3c64xx-cpufreq.c
> @@ -144,7 +144,6 @@ static void s3c64xx_cpufreq_config_regulator(void)
>  
>  static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
>  {
> -	int ret;
>  	struct cpufreq_frequency_table *freq;
>  
>  	if (policy->cpu != 0)
> @@ -165,8 +164,7 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
>  #ifdef CONFIG_REGULATOR
>  	vddarm = regulator_get(NULL, "vddarm");
>  	if (IS_ERR(vddarm)) {
> -		ret = PTR_ERR(vddarm);
> -		pr_err("Failed to obtain VDDARM: %d\n", ret);
> +		pr_err("Failed to obtain VDDARM: %ld\n", PTR_ERR(vddarm));
>  		pr_err("Only frequency scaling available\n");
>  		vddarm = NULL;
>  	} else {
> @@ -196,16 +194,9 @@ static int s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
>  	 * the PLLs, which we don't currently) is ~300us worst case,
>  	 * but add some fudge.
>  	 */
> -	ret = cpufreq_generic_init(policy, s3c64xx_freq_table,
> +	cpufreq_generic_init(policy, s3c64xx_freq_table,
>  			(500 * 1000) + regulator_latency);
> -	if (ret != 0) {
> -		pr_err("Failed to configure frequency table: %d\n",
> -		       ret);
> -		regulator_put(vddarm);
> -		clk_put(policy->clk);
> -	}
> -
> -	return ret;
> +	return 0;
>  }
>  
>  static struct cpufreq_driver s3c64xx_cpufreq_driver = {
> diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
> index e5cb17d4be7b..5d10030f2560 100644
> --- a/drivers/cpufreq/s5pv210-cpufreq.c
> +++ b/drivers/cpufreq/s5pv210-cpufreq.c
> @@ -541,7 +541,8 @@ static int s5pv210_cpu_init(struct cpufreq_policy *policy)
>  	s5pv210_dram_conf[1].freq = clk_get_rate(dmc1_clk);
>  
>  	policy->suspend_freq = SLEEP_FREQ;
> -	return cpufreq_generic_init(policy, s5pv210_freq_table, 40000);
> +	cpufreq_generic_init(policy, s5pv210_freq_table, 40000);
> +	return 0;
>  
>  out_dmc1:
>  	clk_put(dmc0_clk);
> diff --git a/drivers/cpufreq/sa1100-cpufreq.c b/drivers/cpufreq/sa1100-cpufreq.c
> index ab5cab93e638..5c075ef6adc0 100644
> --- a/drivers/cpufreq/sa1100-cpufreq.c
> +++ b/drivers/cpufreq/sa1100-cpufreq.c
> @@ -181,7 +181,8 @@ static int sa1100_target(struct cpufreq_policy *policy, unsigned int ppcr)
>  
>  static int __init sa1100_cpu_init(struct cpufreq_policy *policy)
>  {
> -	return cpufreq_generic_init(policy, sa11x0_freq_table, 0);
> +	cpufreq_generic_init(policy, sa11x0_freq_table, 0);
> +	return 0;
>  }
>  
>  static struct cpufreq_driver sa1100_driver __refdata = {
> diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c
> index dab54e051c0e..d9d04d935b3a 100644
> --- a/drivers/cpufreq/sa1110-cpufreq.c
> +++ b/drivers/cpufreq/sa1110-cpufreq.c
> @@ -303,7 +303,8 @@ static int sa1110_target(struct cpufreq_policy *policy, unsigned int ppcr)
>  
>  static int __init sa1110_cpu_init(struct cpufreq_policy *policy)
>  {
> -	return cpufreq_generic_init(policy, sa11x0_freq_table, 0);
> +	cpufreq_generic_init(policy, sa11x0_freq_table, 0);
> +	return 0;
>  }
>  
>  /* sa1110_driver needs __refdata because it must remain after init registers
> diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c
> index 4074e2615522..73bd8dc47074 100644
> --- a/drivers/cpufreq/spear-cpufreq.c
> +++ b/drivers/cpufreq/spear-cpufreq.c
> @@ -153,8 +153,9 @@ static int spear_cpufreq_target(struct cpufreq_policy *policy,
>  static int spear_cpufreq_init(struct cpufreq_policy *policy)
>  {
>  	policy->clk = spear_cpufreq.clk;
> -	return cpufreq_generic_init(policy, spear_cpufreq.freq_tbl,
> +	cpufreq_generic_init(policy, spear_cpufreq.freq_tbl,
>  			spear_cpufreq.transition_latency);
> +	return 0;
>  }
>  
>  static struct cpufreq_driver spear_cpufreq_driver = {
> diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c
> index 3c32cc7b0671..f84ecd22f488 100644
> --- a/drivers/cpufreq/tegra20-cpufreq.c
> +++ b/drivers/cpufreq/tegra20-cpufreq.c
> @@ -118,17 +118,11 @@ static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
>  static int tegra_cpu_init(struct cpufreq_policy *policy)
>  {
>  	struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data();
> -	int ret;
>  
>  	clk_prepare_enable(cpufreq->cpu_clk);
>  
>  	/* FIXME: what's the actual transition time? */
> -	ret = cpufreq_generic_init(policy, freq_table, 300 * 1000);
> -	if (ret) {
> -		clk_disable_unprepare(cpufreq->cpu_clk);
> -		return ret;
> -	}
> -
> +	cpufreq_generic_init(policy, freq_table, 300 * 1000);
>  	policy->clk = cpufreq->cpu_clk;
>  	policy->suspend_freq = freq_table[0].frequency;
>  	return 0;
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index d757a56a74dc..536a049d7ecc 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -992,7 +992,7 @@ extern struct freq_attr *cpufreq_generic_attr[];
>  int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy);
>  
>  unsigned int cpufreq_generic_get(unsigned int cpu);
> -int cpufreq_generic_init(struct cpufreq_policy *policy,
> +void cpufreq_generic_init(struct cpufreq_policy *policy,
>  		struct cpufreq_frequency_table *table,
>  		unsigned int transition_latency);
>  #endif /* _LINUX_CPUFREQ_H */
> 

Applied, thanks!




^ permalink raw reply

* Re: [PATCH] thermal: intel: int340x_thermal: Remove unnecessary acpi_has_method() uses
From: Rafael J. Wysocki @ 2019-07-18  8:36 UTC (permalink / raw)
  To: Kelsey Skunberg
  Cc: rui.zhang, edubezval, daniel.lezcano, linux-pm, linux-kernel,
	bjorn, skhan, linux-kernel-mentees
In-Reply-To: <20190717192639.90092-1-skunberg.kelsey@gmail.com>

On Wednesday, July 17, 2019 9:26:39 PM CEST Kelsey Skunberg wrote:
> acpi_evaluate_object() will already return in error if the method does not
> exist. Checking if the method is absent before the acpi_evaluate_object()
> call is not needed. Remove acpi_has_method() calls to avoid additional
> work.
> 
> Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>

LGTM:

Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

> ---
>  drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c | 6 ------
>  1 file changed, 6 deletions(-)
> 
> diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
> index 9716bc3abaf9..7130e90773ed 100644
> --- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
> +++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
> @@ -77,9 +77,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
>  	struct acpi_buffer element = { 0, NULL };
>  	struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
>  
> -	if (!acpi_has_method(handle, "_TRT"))
> -		return -ENODEV;
> -
>  	status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
>  	if (ACPI_FAILURE(status))
>  		return -ENODEV;
> @@ -158,9 +155,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
>  	struct acpi_buffer art_format =	{
>  		sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
>  
> -	if (!acpi_has_method(handle, "_ART"))
> -		return -ENODEV;
> -
>  	status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
>  	if (ACPI_FAILURE(status))
>  		return -ENODEV;
> 





^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox