netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>,
	Sandipan Patra <spatra@nvidia.com>, Gal Pressman <gal@nvidia.com>
Subject: [net-next 04/15] net/mlx5: Implement thermal zone
Date: Mon, 13 Mar 2023 22:42:23 -0700	[thread overview]
Message-ID: <20230314054234.267365-5-saeed@kernel.org> (raw)
In-Reply-To: <20230314054234.267365-1-saeed@kernel.org>

From: Sandipan Patra <spatra@nvidia.com>

Implement thermal zone support for mlx5 based HW. The NIC
uses temperature sensor provided by ASIC to report current temperature
to thermal core.

Signed-off-by: Sandipan Patra <spatra@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   1 +
 .../net/ethernet/mellanox/mlx5/core/main.c    |   6 +
 .../net/ethernet/mellanox/mlx5/core/thermal.c | 108 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/thermal.h |  20 ++++
 include/linux/mlx5/driver.h                   |   3 +
 include/linux/mlx5/mlx5_ifc.h                 |  26 +++++
 6 files changed, 164 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/thermal.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/thermal.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8d4e25cc54ea..6c2f1d4a58ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -77,6 +77,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 
 mlx5_core-$(CONFIG_MLX5_BRIDGE)    += esw/bridge.o en/rep/bridge.o
 
+mlx5_core-$(CONFIG_THERMAL)        += thermal.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0ff0eb660495..644c889f9a32 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -52,6 +52,7 @@
 #include <linux/version.h>
 #include <net/devlink.h>
 #include "mlx5_core.h"
+#include "thermal.h"
 #include "lib/eq.h"
 #include "fs_core.h"
 #include "lib/mpfs.h"
@@ -1768,6 +1769,10 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (err)
 		dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
 
+	err = mlx5_thermal_init(dev);
+	if (err)
+		dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err);
+
 	pci_save_state(pdev);
 	devlink_register(devlink);
 	return 0;
@@ -1796,6 +1801,7 @@ static void remove_one(struct pci_dev *pdev)
 	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
 	devlink_unregister(devlink);
 	mlx5_sriov_disable(pdev);
+	mlx5_thermal_uninit(dev);
 	mlx5_crdump_disable(dev);
 	mlx5_drain_health_wq(dev);
 	mlx5_uninit_one(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
new file mode 100644
index 000000000000..e47fa6fb836f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/thermal.h>
+#include <linux/err.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "thermal.h"
+
+#define MLX5_THERMAL_POLL_INT_MSEC	1000
+#define MLX5_THERMAL_NUM_TRIPS		0
+#define MLX5_THERMAL_ASIC_SENSOR_INDEX	0
+
+/* Bit string indicating the writeablility of trip points if any */
+#define MLX5_THERMAL_TRIP_MASK	(BIT(MLX5_THERMAL_NUM_TRIPS) - 1)
+
+struct mlx5_thermal {
+	struct mlx5_core_dev *mdev;
+	struct thermal_zone_device *tzdev;
+};
+
+static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp)
+{
+	u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {};
+	u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {};
+	int err;
+
+	MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id);
+
+	err = mlx5_core_access_reg(mdev, mtmp_in,  sizeof(mtmp_in),
+				   mtmp_out, sizeof(mtmp_out),
+				   MLX5_REG_MTMP, 0, 0);
+
+	if (err)
+		return err;
+
+	*p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature);
+
+	return 0;
+}
+
+static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev,
+				 int *p_temp)
+{
+	struct mlx5_thermal *thermal = tzdev->devdata;
+	struct mlx5_core_dev *mdev = thermal->mdev;
+	int err;
+
+	err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp);
+
+	if (err)
+		return err;
+
+	/* The unit of temp returned is in 0.125 C. The thermal
+	 * framework expects the value in 0.001 C.
+	 */
+	*p_temp *= 125;
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlx5_thermal_ops = {
+	.get_temp = mlx5_thermal_get_temp,
+};
+
+int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_thermal *thermal;
+	struct thermal_zone_device *tzd;
+	const char *data = "mlx5";
+
+	tzd = thermal_zone_get_zone_by_name(data);
+	if (!IS_ERR(tzd))
+		return 0;
+
+	thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
+	if (!thermal)
+		return -ENOMEM;
+
+	thermal->mdev = mdev;
+	thermal->tzdev = thermal_zone_device_register(data,
+						      MLX5_THERMAL_NUM_TRIPS,
+						      MLX5_THERMAL_TRIP_MASK,
+						      thermal,
+						      &mlx5_thermal_ops,
+						      NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
+	if (IS_ERR(thermal->tzdev)) {
+		dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
+			data, PTR_ERR(thermal->tzdev));
+		kfree(thermal);
+		return -EINVAL;
+	}
+
+	mdev->thermal = thermal;
+	return 0;
+}
+
+void mlx5_thermal_uninit(struct mlx5_core_dev *mdev)
+{
+	if (!mdev->thermal)
+		return;
+
+	thermal_zone_device_unregister(mdev->thermal->tzdev);
+	kfree(mdev->thermal);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h
new file mode 100644
index 000000000000..7d752c122192
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __MLX5_THERMAL_DRIVER_H
+#define __MLX5_THERMAL_DRIVER_H
+
+#if IS_ENABLED(CONFIG_THERMAL)
+int mlx5_thermal_init(struct mlx5_core_dev *mdev);
+void mlx5_thermal_uninit(struct mlx5_core_dev *mdev);
+#else
+static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+{
+	mdev->thermal = NULL;
+	return 0;
+}
+
+static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { }
+#endif
+
+#endif /* __MLX5_THERMAL_DRIVER_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f33389b42209..7a898113b6b7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -134,6 +134,7 @@ enum {
 	MLX5_REG_PCAM		 = 0x507f,
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
+	MLX5_REG_MTMP		 = 0x900A,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MFRL		 = 0x9028,
 	MLX5_REG_MLCR		 = 0x902b,
@@ -731,6 +732,7 @@ struct mlx5_fw_tracer;
 struct mlx5_vxlan;
 struct mlx5_geneve;
 struct mlx5_hv_vhca;
+struct mlx5_thermal;
 
 #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
 #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -808,6 +810,7 @@ struct mlx5_core_dev {
 	struct mlx5_rsc_dump    *rsc_dump;
 	u32                      vsc_addr;
 	struct mlx5_hv_vhca	*hv_vhca;
+	struct mlx5_thermal	*thermal;
 };
 
 struct mlx5_db {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 66d76e97a087..d2c164f0778c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10869,6 +10869,31 @@ struct mlx5_ifc_mrtc_reg_bits {
 	u8         time_l[0x20];
 };
 
+struct mlx5_ifc_mtmp_reg_bits {
+	u8         reserved_at_0[0x14];
+	u8         sensor_index[0xc];
+
+	u8         reserved_at_20[0x10];
+	u8         temperature[0x10];
+
+	u8         mte[0x1];
+	u8         mtr[0x1];
+	u8         reserved_at_42[0xe];
+	u8         max_temperature[0x10];
+
+	u8         tee[0x2];
+	u8         reserved_at_62[0xe];
+	u8         temp_threshold_hi[0x10];
+
+	u8         reserved_at_80[0x10];
+	u8         temp_threshold_lo[0x10];
+
+	u8         reserved_at_a0[0x20];
+
+	u8         sensor_name_hi[0x20];
+	u8         sensor_name_lo[0x20];
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -10931,6 +10956,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
 	struct mlx5_ifc_mtutc_reg_bits mtutc_reg;
 	struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
+	struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
2.39.2


  parent reply	other threads:[~2023-03-14  5:43 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-14  5:42 [pull request][net-next 00/15] mlx5 updates 2023-03-13 Saeed Mahameed
2023-03-14  5:42 ` [net-next 01/15] net/mlx5: remove redundant clear_bit Saeed Mahameed
2023-03-16  5:20   ` patchwork-bot+netdevbpf
2023-03-14  5:42 ` [net-next 02/15] net/mlx5: Stop waiting for PCI up if teardown was triggered Saeed Mahameed
2023-03-14  5:42 ` [net-next 03/15] net/mlx5: Add comment to mlx5_devlink_params_register() Saeed Mahameed
2023-03-14  5:42 ` Saeed Mahameed [this message]
2023-03-14  5:42 ` [net-next 05/15] net/mlx5e: Correct SKB room check to use all room in the fifo Saeed Mahameed
2023-03-16  4:56   ` Jakub Kicinski
2023-03-16  4:59     ` Jakub Kicinski
2023-03-16  5:05       ` Saeed Mahameed
2023-03-16  5:13         ` Jakub Kicinski
2023-03-14  5:42 ` [net-next 06/15] net/mlx5e: Rename RQ/SQ adaptive moderation state flag Saeed Mahameed
2023-03-14  5:42 ` [net-next 07/15] net/mlx5e: Stringify RQ SW state in RQ devlink health diagnostics Saeed Mahameed
2023-03-14  5:42 ` [net-next 08/15] net/mlx5e: Expose SQ SW state as part of SQ " Saeed Mahameed
2023-03-14  5:42 ` [net-next 09/15] net/mlx5e: Add XSK RQ state flag for RQ devlink " Saeed Mahameed
2023-03-14  5:42 ` [net-next 10/15] net/mlx5: Move needed PTYS functions to core layer Saeed Mahameed
2023-03-14  5:42 ` [net-next 11/15] net/mlx5e: Add devlink hairpin queues parameters Saeed Mahameed
2023-03-14  5:42 ` [net-next 12/15] net/mlx5e: Add more information to hairpin table dump Saeed Mahameed
2023-03-14  5:42 ` [net-next 13/15] net/mlx5e: TC, Extract indr setup block checks to function Saeed Mahameed
2023-03-14  5:42 ` [net-next 14/15] net/mlx5e: Enable TC offload for ingress MACVLAN over bond Saeed Mahameed
2023-03-14  5:42 ` [net-next 15/15] net/mlx5e: Enable TC offload for egress " Saeed Mahameed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230314054234.267365-5-saeed@kernel.org \
    --to=saeed@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    --cc=spatra@nvidia.com \
    --cc=tariqt@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).