linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 09/10] thermal: Add PCIe cooling driver
       [not found] <20230817121708.53213-1-ilpo.jarvinen@linux.intel.com>
@ 2023-08-17 12:17 ` Ilpo Järvinen
  2023-08-23 19:47   ` Rafael J. Wysocki
       [not found] ` <fa5a20d0-77db-58bd-3956-ac664dffa587@quicinc.com>
  1 sibling, 1 reply; 7+ messages in thread
From: Ilpo Järvinen @ 2023-08-17 12:17 UTC (permalink / raw)
  To: linux-pci, Bjorn Helgaas, Lorenzo Pieralisi, Rob Herring,
	Krzysztof Wilczyński, Lukas Wunner, Alexandru Gagniuc,
	Ilpo Järvinen, Bjorn Helgaas, Rafael J. Wysocki,
	Daniel Lezcano, Amit Kucheria, Zhang Rui, linux-kernel, linux-pm
  Cc: Krishna chaitanya chundru, Srinivas Pandruvada, Alex Deucher

Add a thermal cooling driver to provide path to access PCIe bandwidth
controller using the usual thermal interfaces.

A cooling device is instantiated for controllable PCIe ports from the
bwctrl service driver.

The thermal side state 0 means no throttling, i.e., maximum supported
PCIe speed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
---
 MAINTAINERS                    |   1 +
 drivers/pci/pcie/bwctrl.c      |  11 ++++
 drivers/thermal/Kconfig        |  10 +++
 drivers/thermal/Makefile       |   2 +
 drivers/thermal/pcie_cooling.c | 107 +++++++++++++++++++++++++++++++++
 include/linux/pci-bwctrl.h     |  15 +++++
 6 files changed, 146 insertions(+)
 create mode 100644 drivers/thermal/pcie_cooling.c

diff --git a/MAINTAINERS b/MAINTAINERS
index d2eed2883a43..a0b40253fd5a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16421,6 +16421,7 @@ PCIE BANDWIDTH CONTROLLER
 M:	Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
 S:	Supported
 F:	drivers/pci/pcie/bwctrl.c
+F:	drivers/thermal/pcie_cooling.c
 F:	include/linux/pci-bwctrl.h
 
 PCIE DRIVER FOR AMAZON ANNAPURNA LABS
diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
index e3172d69476f..13c73546244e 100644
--- a/drivers/pci/pcie/bwctrl.c
+++ b/drivers/pci/pcie/bwctrl.c
@@ -34,9 +34,11 @@
 /**
  * struct bwctrl_service_data - PCIe Port Bandwidth Controller
  * @set_speed_mutex: serializes link speed changes
+ * @cdev: thermal cooling device associated with the port
  */
 struct bwctrl_service_data {
 	struct mutex set_speed_mutex;
+	struct thermal_cooling_device *cdev;
 };
 
 static bool bwctrl_valid_pcie_speed(enum pci_bus_speed speed)
@@ -253,8 +255,16 @@ static int pcie_bandwidth_notification_probe(struct pcie_device *srv)
 	pcie_enable_link_bandwidth_notification(port);
 	pci_info(port, "enabled with IRQ %d\n", srv->irq);
 
+	data->cdev = pcie_cooling_device_register(port, srv);
+	if (IS_ERR(data->cdev)) {
+		ret = PTR_ERR(data->cdev);
+		goto disable_notifications;
+	}
 	return 0;
 
+disable_notifications:
+	pcie_disable_link_bandwidth_notification(srv->port);
+	kfree(data);
 free_irq:
 	free_irq(srv->irq, srv);
 	return ret;
@@ -264,6 +274,7 @@ static void pcie_bandwidth_notification_remove(struct pcie_device *srv)
 {
 	struct bwctrl_service_data *data = get_service_data(srv);
 
+	pcie_cooling_device_unregister(data->cdev);
 	pcie_disable_link_bandwidth_notification(srv->port);
 	free_irq(srv->irq, srv);
 	mutex_destroy(&data->set_speed_mutex);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 19a4b33cb564..7deda3a0237d 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -219,6 +219,16 @@ config DEVFREQ_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config PCIE_THERMAL
+	bool "PCIe cooling support"
+	depends on PCIEPORTBUS
+	select PCIE_BW
+	help
+	  This implements PCIe cooling mechanism through bandwidth reduction
+	  for PCIe devices.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 058664bc3ec0..065972a08c84 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -30,6 +30,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL)	+= cpuidle_cooling.o
 # devfreq cooling
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
+thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o
+
 obj-$(CONFIG_K3_THERMAL)	+= k3_bandgap.o k3_j72xx_bandgap.o
 # platform thermal drivers
 obj-y				+= broadcom/
diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c
new file mode 100644
index 000000000000..d86265c03e80
--- /dev/null
+++ b/drivers/thermal/pcie_cooling.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe cooling device
+ *
+ * Copyright (C) 2023 Intel Corporation.
+ */
+
+#include <linux/build_bug.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/pci-bwctrl.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/thermal.h>
+
+#define COOLING_DEV_TYPE_PREFIX		"PCIe_Port_"
+
+struct pcie_cooling_device {
+	struct pci_dev *port;
+	struct pcie_device *pdev;
+};
+
+static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+
+	/* cooling state 0 is same as the maximum PCIe speed */
+	*state = pcie_cdev->port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT;
+
+	return 0;
+}
+
+static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+
+	/* cooling state 0 is same as the maximum PCIe speed */
+	*state = cdev->max_state -
+		 (pcie_cdev->port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT);
+
+	return 0;
+}
+
+static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+	enum pci_bus_speed speed;
+
+	/* cooling state 0 is same as the maximum PCIe speed */
+	speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT;
+
+	return bwctrl_set_current_speed(pcie_cdev->pdev, speed);
+}
+
+static struct thermal_cooling_device_ops pcie_cooling_ops = {
+	.get_max_state = pcie_cooling_get_max_level,
+	.get_cur_state = pcie_cooling_get_cur_level,
+	.set_cur_state = pcie_cooling_set_cur_level,
+};
+
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
+							    struct pcie_device *pdev)
+{
+	struct pcie_cooling_device *pcie_cdev;
+	struct thermal_cooling_device *cdev;
+	size_t name_len;
+	char *name;
+
+	pcie_cdev = kzalloc(sizeof(*pcie_cdev), GFP_KERNEL);
+	if (!pcie_cdev)
+		return ERR_PTR(-ENOMEM);
+
+	pcie_cdev->port = port;
+	pcie_cdev->pdev = pdev;
+
+	name_len = strlen(COOLING_DEV_TYPE_PREFIX) + strlen(pci_name(port)) + 1;
+	name = kzalloc(name_len, GFP_KERNEL);
+	if (!name) {
+		kfree(pcie_cdev);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	snprintf(name, name_len, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port));
+	cdev = thermal_cooling_device_register(name, pcie_cdev, &pcie_cooling_ops);
+	kfree(name);
+
+	return cdev;
+}
+
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+	struct pcie_cooling_device *pcie_cdev = cdev->devdata;
+
+	thermal_cooling_device_unregister(cdev);
+	kfree(pcie_cdev);
+}
+
+/* For bus_speed <-> state arithmetic */
+static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT);
+static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT);
+static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT);
+static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT);
+static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT);
+
+MODULE_AUTHOR("Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>");
+MODULE_DESCRIPTION("PCIe cooling driver");
diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h
index 46026fa25deb..366445517b72 100644
--- a/include/linux/pci-bwctrl.h
+++ b/include/linux/pci-bwctrl.h
@@ -15,4 +15,19 @@ struct thermal_cooling_device;
 
 int bwctrl_set_current_speed(struct pcie_device *srv, enum pci_bus_speed speed);
 
+#ifdef CONFIG_PCIE_THERMAL
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
+							    struct pcie_device *pdev);
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev);
+#else
+static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
+									  struct pcie_device *pdev)
+{
+	return NULL;
+}
+static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+}
+#endif
+
 #endif
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 09/10] thermal: Add PCIe cooling driver
  2023-08-17 12:17 ` [PATCH 09/10] thermal: Add PCIe cooling driver Ilpo Järvinen
@ 2023-08-23 19:47   ` Rafael J. Wysocki
  0 siblings, 0 replies; 7+ messages in thread
From: Rafael J. Wysocki @ 2023-08-23 19:47 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: linux-pci, Bjorn Helgaas, Lorenzo Pieralisi, Rob Herring,
	Krzysztof Wilczyński, Lukas Wunner, Alexandru Gagniuc,
	Bjorn Helgaas, Rafael J. Wysocki, Daniel Lezcano, Amit Kucheria,
	Zhang Rui, linux-kernel, linux-pm, Krishna chaitanya chundru,
	Srinivas Pandruvada, Alex Deucher

On Thu, Aug 17, 2023 at 2:18 PM Ilpo Järvinen
<ilpo.jarvinen@linux.intel.com> wrote:
>
> Add a thermal cooling driver to provide path to access PCIe bandwidth
> controller using the usual thermal interfaces.
>
> A cooling device is instantiated for controllable PCIe ports from the
> bwctrl service driver.
>
> The thermal side state 0 means no throttling, i.e., maximum supported
> PCIe speed.
>
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>

From the cooling device interface perspective

Acked-by: Rafael J. Wysocki <rafael@kernel.org>

> ---
>  MAINTAINERS                    |   1 +
>  drivers/pci/pcie/bwctrl.c      |  11 ++++
>  drivers/thermal/Kconfig        |  10 +++
>  drivers/thermal/Makefile       |   2 +
>  drivers/thermal/pcie_cooling.c | 107 +++++++++++++++++++++++++++++++++
>  include/linux/pci-bwctrl.h     |  15 +++++
>  6 files changed, 146 insertions(+)
>  create mode 100644 drivers/thermal/pcie_cooling.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d2eed2883a43..a0b40253fd5a 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -16421,6 +16421,7 @@ PCIE BANDWIDTH CONTROLLER
>  M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
>  S:     Supported
>  F:     drivers/pci/pcie/bwctrl.c
> +F:     drivers/thermal/pcie_cooling.c
>  F:     include/linux/pci-bwctrl.h
>
>  PCIE DRIVER FOR AMAZON ANNAPURNA LABS
> diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
> index e3172d69476f..13c73546244e 100644
> --- a/drivers/pci/pcie/bwctrl.c
> +++ b/drivers/pci/pcie/bwctrl.c
> @@ -34,9 +34,11 @@
>  /**
>   * struct bwctrl_service_data - PCIe Port Bandwidth Controller
>   * @set_speed_mutex: serializes link speed changes
> + * @cdev: thermal cooling device associated with the port
>   */
>  struct bwctrl_service_data {
>         struct mutex set_speed_mutex;
> +       struct thermal_cooling_device *cdev;
>  };
>
>  static bool bwctrl_valid_pcie_speed(enum pci_bus_speed speed)
> @@ -253,8 +255,16 @@ static int pcie_bandwidth_notification_probe(struct pcie_device *srv)
>         pcie_enable_link_bandwidth_notification(port);
>         pci_info(port, "enabled with IRQ %d\n", srv->irq);
>
> +       data->cdev = pcie_cooling_device_register(port, srv);
> +       if (IS_ERR(data->cdev)) {
> +               ret = PTR_ERR(data->cdev);
> +               goto disable_notifications;
> +       }
>         return 0;
>
> +disable_notifications:
> +       pcie_disable_link_bandwidth_notification(srv->port);
> +       kfree(data);
>  free_irq:
>         free_irq(srv->irq, srv);
>         return ret;
> @@ -264,6 +274,7 @@ static void pcie_bandwidth_notification_remove(struct pcie_device *srv)
>  {
>         struct bwctrl_service_data *data = get_service_data(srv);
>
> +       pcie_cooling_device_unregister(data->cdev);
>         pcie_disable_link_bandwidth_notification(srv->port);
>         free_irq(srv->irq, srv);
>         mutex_destroy(&data->set_speed_mutex);
> diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> index 19a4b33cb564..7deda3a0237d 100644
> --- a/drivers/thermal/Kconfig
> +++ b/drivers/thermal/Kconfig
> @@ -219,6 +219,16 @@ config DEVFREQ_THERMAL
>
>           If you want this support, you should say Y here.
>
> +config PCIE_THERMAL
> +       bool "PCIe cooling support"
> +       depends on PCIEPORTBUS
> +       select PCIE_BW
> +       help
> +         This implements PCIe cooling mechanism through bandwidth reduction
> +         for PCIe devices.
> +
> +         If you want this support, you should say Y here.
> +
>  config THERMAL_EMULATION
>         bool "Thermal emulation mode support"
>         help
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 058664bc3ec0..065972a08c84 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -30,6 +30,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL)        += cpuidle_cooling.o
>  # devfreq cooling
>  thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
>
> +thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o
> +
>  obj-$(CONFIG_K3_THERMAL)       += k3_bandgap.o k3_j72xx_bandgap.o
>  # platform thermal drivers
>  obj-y                          += broadcom/
> diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c
> new file mode 100644
> index 000000000000..d86265c03e80
> --- /dev/null
> +++ b/drivers/thermal/pcie_cooling.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * PCIe cooling device
> + *
> + * Copyright (C) 2023 Intel Corporation.
> + */
> +
> +#include <linux/build_bug.h>
> +#include <linux/err.h>
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/pci-bwctrl.h>
> +#include <linux/slab.h>
> +#include <linux/string.h>
> +#include <linux/thermal.h>
> +
> +#define COOLING_DEV_TYPE_PREFIX                "PCIe_Port_"
> +
> +struct pcie_cooling_device {
> +       struct pci_dev *port;
> +       struct pcie_device *pdev;
> +};
> +
> +static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state)
> +{
> +       struct pcie_cooling_device *pcie_cdev = cdev->devdata;
> +
> +       /* cooling state 0 is same as the maximum PCIe speed */
> +       *state = pcie_cdev->port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT;
> +
> +       return 0;
> +}
> +
> +static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state)
> +{
> +       struct pcie_cooling_device *pcie_cdev = cdev->devdata;
> +
> +       /* cooling state 0 is same as the maximum PCIe speed */
> +       *state = cdev->max_state -
> +                (pcie_cdev->port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT);
> +
> +       return 0;
> +}
> +
> +static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state)
> +{
> +       struct pcie_cooling_device *pcie_cdev = cdev->devdata;
> +       enum pci_bus_speed speed;
> +
> +       /* cooling state 0 is same as the maximum PCIe speed */
> +       speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT;
> +
> +       return bwctrl_set_current_speed(pcie_cdev->pdev, speed);
> +}
> +
> +static struct thermal_cooling_device_ops pcie_cooling_ops = {
> +       .get_max_state = pcie_cooling_get_max_level,
> +       .get_cur_state = pcie_cooling_get_cur_level,
> +       .set_cur_state = pcie_cooling_set_cur_level,
> +};
> +
> +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
> +                                                           struct pcie_device *pdev)
> +{
> +       struct pcie_cooling_device *pcie_cdev;
> +       struct thermal_cooling_device *cdev;
> +       size_t name_len;
> +       char *name;
> +
> +       pcie_cdev = kzalloc(sizeof(*pcie_cdev), GFP_KERNEL);
> +       if (!pcie_cdev)
> +               return ERR_PTR(-ENOMEM);
> +
> +       pcie_cdev->port = port;
> +       pcie_cdev->pdev = pdev;
> +
> +       name_len = strlen(COOLING_DEV_TYPE_PREFIX) + strlen(pci_name(port)) + 1;
> +       name = kzalloc(name_len, GFP_KERNEL);
> +       if (!name) {
> +               kfree(pcie_cdev);
> +               return ERR_PTR(-ENOMEM);
> +       }
> +
> +       snprintf(name, name_len, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port));
> +       cdev = thermal_cooling_device_register(name, pcie_cdev, &pcie_cooling_ops);
> +       kfree(name);
> +
> +       return cdev;
> +}
> +
> +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
> +{
> +       struct pcie_cooling_device *pcie_cdev = cdev->devdata;
> +
> +       thermal_cooling_device_unregister(cdev);
> +       kfree(pcie_cdev);
> +}
> +
> +/* For bus_speed <-> state arithmetic */
> +static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT);
> +static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT);
> +static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT);
> +static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT);
> +static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT);
> +
> +MODULE_AUTHOR("Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>");
> +MODULE_DESCRIPTION("PCIe cooling driver");
> diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h
> index 46026fa25deb..366445517b72 100644
> --- a/include/linux/pci-bwctrl.h
> +++ b/include/linux/pci-bwctrl.h
> @@ -15,4 +15,19 @@ struct thermal_cooling_device;
>
>  int bwctrl_set_current_speed(struct pcie_device *srv, enum pci_bus_speed speed);
>
> +#ifdef CONFIG_PCIE_THERMAL
> +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
> +                                                           struct pcie_device *pdev);
> +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev);
> +#else
> +static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port,
> +                                                                         struct pcie_device *pdev)
> +{
> +       return NULL;
> +}
> +static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
> +{
> +}
> +#endif
> +
>  #endif
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 00/10] Add PCIe Bandwidth Controller
       [not found]     ` <647e2b5e-6064-dbfa-bb56-f74358efd1fe@quicinc.com>
@ 2023-09-11 15:47       ` Ilpo Järvinen
  2023-09-11 16:14         ` srinivas pandruvada
  0 siblings, 1 reply; 7+ messages in thread
From: Ilpo Järvinen @ 2023-09-11 15:47 UTC (permalink / raw)
  To: Krishna Chaitanya Chundru, Rafael J. Wysocki, Srinivas Pandruvada,
	Daniel Lezcano, Zhang Rui, linux-pm
  Cc: linux-pci, Bjorn Helgaas, Lorenzo Pieralisi, Rob Herring,
	Krzysztof Wilczyński, Lukas Wunner, Alexandru Gagniuc, LKML,
	Alex Deucher

[-- Attachment #1: Type: text/plain, Size: 2777 bytes --]

+ thermal people.

On Mon, 11 Sep 2023, Krishna Chaitanya Chundru wrote:
> On 9/4/2023 4:46 PM, Ilpo Järvinen wrote:
> > On Mon, 4 Sep 2023, Krishna Chaitanya Chundru wrote:
> > > On 8/17/2023 5:46 PM, Ilpo Järvinen wrote:
> > > > 
> > > > This series adds PCIe bandwidth controller (bwctrl) and associated PCIe
> > > > cooling driver to the thermal core side for limiting PCIe link speed
> > > > due to thermal reasons. PCIe bandwidth controller is a PCI express bus
> > > > port service driver. A cooling device is created for each port the
> > > > service driver finds if they support changing speeds.
> > > I see we had support for only link speed changes here but we need to add
> > > support for
> > > 
> > > link width change also as bandwidth notification from PCIe supports both
> > > link
> > > speed and link width.
> > Hi,
> > 
> > Thanks for the comment. In case you mean that the changes in Link Width
> > should be reported correctly, they already are since the sysfs interface
> > reads them directly from LNKSTA register.
> > 
> > Or did you perhaps mean that Bandwidth Controller should support also
> > changing Link Width? If this is the case I don't know how it can be
> > realized so a pointer on how it can be achieved would be appreciated.
> 
> I didn't have any idea on how thermal framework works.
> 
> But as we are adding bandwidth controller support we need to add support for
> width change also, may be we are not using this now, but we may need it in the
> future.
> 
> We had similar use case based on the bandwidth requirement on devices like
> WLAN, the client try to reduce or increase the link speed and link width.
> 
> So in the bandwidth controller driver we can add support for link width also.
> So any client can easily use the driver to change link speed or width or both
> to reduce the power consumption.
> 
> Adding link width support should be similar to how you added the link speed
> supported.
> 
> Please correct me if I misunderstood something here.

Hi,

Okay, thanks for the clarification. So the point is to plan for adding 
support for Link Width later and currently only support throttling Link 
Speed. In any case, the Link Width control seems to be controlled using 
a different approach (Link Width change does not require Link Retraining).

I don't know either how such 2 dimensioned throttling (Link Speed and 
Link Width) is supposed to be realized using the thermal/cooling device 
interface which only provides a single integer as the current state. That 
is, whether to provide a single cooling device (with a single integer 
exposed to userspace) or separate cooling device for each dimension?

Perhaps thermal people could provide some insight on this? Is there some 
precedent I could take look at?

-- 
 i.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 00/10] Add PCIe Bandwidth Controller
  2023-09-11 15:47       ` [PATCH 00/10] Add PCIe Bandwidth Controller Ilpo Järvinen
@ 2023-09-11 16:14         ` srinivas pandruvada
  2023-09-12 12:52           ` Ilpo Järvinen
  0 siblings, 1 reply; 7+ messages in thread
From: srinivas pandruvada @ 2023-09-11 16:14 UTC (permalink / raw)
  To: Ilpo Järvinen, Krishna Chaitanya Chundru, Rafael J. Wysocki,
	Daniel Lezcano, Zhang Rui, linux-pm
  Cc: linux-pci, Bjorn Helgaas, Lorenzo Pieralisi, Rob Herring,
	Krzysztof Wilczyński, Lukas Wunner, Alexandru Gagniuc, LKML,
	Alex Deucher

On Mon, 2023-09-11 at 18:47 +0300, Ilpo Järvinen wrote:
> + thermal people.
> 
> 

...

> Hi,
> 
> Okay, thanks for the clarification. So the point is to plan for
> adding 
> support for Link Width later and currently only support throttling
> Link 
> Speed. In any case, the Link Width control seems to be controlled
> using 
> a different approach (Link Width change does not require Link
> Retraining).
> 
> I don't know either how such 2 dimensioned throttling (Link Speed and
> Link Width) is supposed to be realized using the thermal/cooling
> device 
> interface which only provides a single integer as the current state.
> That 
> is, whether to provide a single cooling device (with a single integer
> exposed to userspace) or separate cooling device for each dimension?
> 
> Perhaps thermal people could provide some insight on this? Is there
> some 
> precedent I could take look at?
Yes. The processor cooling device does similar. 1-3 are reserved for P-
state and and 4-7 for T-states.

But I don't suggest using such method. This causes confusion and
difficult to change. For example if we increase range of P-state
control, then there is no way to know what is the start point of T-
states.

It is best to create to separate cooling devices for BW and link width.

Also there is a requirement that anything you add to thermal sysfs, it
should have some purpose for thermal control. I hope Link width control
is targeted to similar use case BW control.

Thanks,
Srinivas


> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 00/10] Add PCIe Bandwidth Controller
  2023-09-11 16:14         ` srinivas pandruvada
@ 2023-09-12 12:52           ` Ilpo Järvinen
  2023-09-12 17:45             ` srinivas pandruvada
  0 siblings, 1 reply; 7+ messages in thread
From: Ilpo Järvinen @ 2023-09-12 12:52 UTC (permalink / raw)
  To: srinivas pandruvada
  Cc: Krishna Chaitanya Chundru, Rafael J. Wysocki, Daniel Lezcano,
	Zhang Rui, linux-pm, linux-pci, Bjorn Helgaas, Lorenzo Pieralisi,
	Rob Herring, Krzysztof Wilczyński, Lukas Wunner,
	Alexandru Gagniuc, LKML, Alex Deucher

[-- Attachment #1: Type: text/plain, Size: 2201 bytes --]

On Mon, 11 Sep 2023, srinivas pandruvada wrote:
> On Mon, 2023-09-11 at 18:47 +0300, Ilpo Järvinen wrote:
> > 
> > Okay, thanks for the clarification. So the point is to plan for
> > adding 
> > support for Link Width later and currently only support throttling
> > Link 
> > Speed. In any case, the Link Width control seems to be controlled
> > using 
> > a different approach (Link Width change does not require Link
> > Retraining).
> > 
> > I don't know either how such 2 dimensioned throttling (Link Speed and
> > Link Width) is supposed to be realized using the thermal/cooling
> > device 
> > interface which only provides a single integer as the current state.
> > That 
> > is, whether to provide a single cooling device (with a single integer
> > exposed to userspace) or separate cooling device for each dimension?
> > 
> > Perhaps thermal people could provide some insight on this? Is there
> > some 
> > precedent I could take look at?
>
> Yes. The processor cooling device does similar. 1-3 are reserved for P-
> state and and 4-7 for T-states.
> 
> But I don't suggest using such method. This causes confusion and
> difficult to change. For example if we increase range of P-state
> control, then there is no way to know what is the start point of T-
> states.

Yes. I understand it would be confusing.

> It is best to create to separate cooling devices for BW and link width.

Okay. If that's the case, then I see no reason to add the Link Width 
cooling device now as it could do nothing besides reporting the current 
link width.

The only question that then remains is how to take this into account in 
the naming of the cooling devices, currently PCIe_Port_<pci_name()> is 
used but perhaps it would be better to change that to 
PCIe_Port_Link_Speed_... to allow PCI_Port_Link_Width_... to be added 
later beside it?

> Also there is a requirement that anything you add to thermal sysfs, it
> should have some purpose for thermal control. I hope Link width control
> is targeted to similar use case BW control.

Ability to control Link Width seems to be part of PCIe 6.0 L0p. AFAICT, 
the reasons are to lower/control power consumption so it seems to be 
within scope.


-- 
 i.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 00/10] Add PCIe Bandwidth Controller
  2023-09-12 12:52           ` Ilpo Järvinen
@ 2023-09-12 17:45             ` srinivas pandruvada
  2023-09-12 18:08               ` srinivas pandruvada
  0 siblings, 1 reply; 7+ messages in thread
From: srinivas pandruvada @ 2023-09-12 17:45 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Krishna Chaitanya Chundru, Rafael J. Wysocki, Daniel Lezcano,
	Zhang Rui, linux-pm, linux-pci, Bjorn Helgaas, Lorenzo Pieralisi,
	Rob Herring, Krzysztof Wilczyński, Lukas Wunner,
	Alexandru Gagniuc, LKML, Alex Deucher

On Tue, 2023-09-12 at 15:52 +0300, Ilpo Järvinen wrote:
> On Mon, 11 Sep 2023, srinivas pandruvada wrote:
> > On Mon, 2023-09-11 at 18:47 +0300, Ilpo Järvinen wrote:
> > 
> > 

[...]

> > But I don't suggest using such method. This causes confusion and
> > difficult to change. For example if we increase range of P-state
> > control, then there is no way to know what is the start point of T-
> > states.
> 
> Yes. I understand it would be confusing.
> 
> > It is best to create to separate cooling devices for BW and link
> > width.
> 
> Okay. If that's the case, then I see no reason to add the Link Width 
> cooling device now as it could do nothing besides reporting the
> current 
> link width.
> 
> The only question that then remains is how to take this into account
> in 
> the naming of the cooling devices, currently PCIe_Port_<pci_name()>
> is 
> used but perhaps it would be better to change that to 
> PCIe_Port_Link_Speed_... to allow PCI_Port_Link_Width_... to be added
> later beside it?
It is better in that way to add BW controller later.

Also adding separate cooling device will let thermal configuration,
choose different method at different thermal thresholds or all
together.

Thanks,
Srinivas

> 
> > Also there is a requirement that anything you add to thermal sysfs,
> > it
> > should have some purpose for thermal control. I hope Link width
> > control
> > is targeted to similar use case BW control.
> 
> Ability to control Link Width seems to be part of PCIe 6.0 L0p.
> AFAICT, 
> the reasons are to lower/control power consumption so it seems to be 
> within scope.
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 00/10] Add PCIe Bandwidth Controller
  2023-09-12 17:45             ` srinivas pandruvada
@ 2023-09-12 18:08               ` srinivas pandruvada
  0 siblings, 0 replies; 7+ messages in thread
From: srinivas pandruvada @ 2023-09-12 18:08 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Krishna Chaitanya Chundru, Rafael J. Wysocki, Daniel Lezcano,
	Zhang Rui, linux-pm, linux-pci, Bjorn Helgaas, Lorenzo Pieralisi,
	Rob Herring, Krzysztof Wilczyński, Lukas Wunner,
	Alexandru Gagniuc, LKML, Alex Deucher

On Tue, 2023-09-12 at 10:45 -0700, srinivas pandruvada wrote:
> On Tue, 2023-09-12 at 15:52 +0300, Ilpo Järvinen wrote:
> > On Mon, 11 Sep 2023, srinivas pandruvada wrote:
> > > On Mon, 2023-09-11 at 18:47 +0300, Ilpo Järvinen wrote:
> > > 
> > > 
> 
> [...]
> 
> > > But I don't suggest using such method. This causes confusion and
> > > difficult to change. For example if we increase range of P-state
> > > control, then there is no way to know what is the start point of
> > > T-
> > > states.
> > 
> > Yes. I understand it would be confusing.
> > 
> > > It is best to create to separate cooling devices for BW and link
> > > width.
> > 
> > Okay. If that's the case, then I see no reason to add the Link
> > Width 
> > cooling device now as it could do nothing besides reporting the
> > current 
> > link width.
> > 
> > The only question that then remains is how to take this into
> > account
> > in 
> > the naming of the cooling devices, currently PCIe_Port_<pci_name()>
> > is 
> > used but perhaps it would be better to change that to 
> > PCIe_Port_Link_Speed_... to allow PCI_Port_Link_Width_... to be
> > added
> > later beside it?
> It is better in that way to add BW 
sorry, link width controller

> controller later.
> 
> Also adding separate cooling device will let thermal configuration,
> choose different method at different thermal thresholds or all
> together.
> 
> Thanks,
> Srinivas
> 
> > 
> > > Also there is a requirement that anything you add to thermal
> > > sysfs,
> > > it
> > > should have some purpose for thermal control. I hope Link width
> > > control
> > > is targeted to similar use case BW control.
> > 
> > Ability to control Link Width seems to be part of PCIe 6.0 L0p.
> > AFAICT, 
> > the reasons are to lower/control power consumption so it seems to
> > be 
> > within scope.
> > 
> > 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-09-12 18:08 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20230817121708.53213-1-ilpo.jarvinen@linux.intel.com>
2023-08-17 12:17 ` [PATCH 09/10] thermal: Add PCIe cooling driver Ilpo Järvinen
2023-08-23 19:47   ` Rafael J. Wysocki
     [not found] ` <fa5a20d0-77db-58bd-3956-ac664dffa587@quicinc.com>
     [not found]   ` <21b95d9-86a5-dcb0-9dda-3f1cdd426b9e@linux.intel.com>
     [not found]     ` <647e2b5e-6064-dbfa-bb56-f74358efd1fe@quicinc.com>
2023-09-11 15:47       ` [PATCH 00/10] Add PCIe Bandwidth Controller Ilpo Järvinen
2023-09-11 16:14         ` srinivas pandruvada
2023-09-12 12:52           ` Ilpo Järvinen
2023-09-12 17:45             ` srinivas pandruvada
2023-09-12 18:08               ` srinivas pandruvada

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).