linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] thermal: use a custom lock class for intel x86_pkg_temp
@ 2025-06-24  9:30 Benjamin Berg
  2025-06-24 12:45 ` kernel test robot
  0 siblings, 1 reply; 2+ messages in thread
From: Benjamin Berg @ 2025-06-24  9:30 UTC (permalink / raw)
  To: linux-pm; +Cc: Zhang Rui, Benjamin Berg, Hans de Goede

From: Benjamin Berg <benjamin.berg@intel.com>

The intel driver has code paths that will take the tz->lock while the
cpuhp_state-up lock is held. As the cpuhp_state-up lock is used in other
code paths, it may happen that lockdep detects possible deadlocks
through unrelated thermal zone devices.

Fix these false positives by using a separate lockdep class for the
x86_pkg_temp thermal device.

Reported-by: Hans de Goede <hansg@kernel.org>
Closes: https://lore.kernel.org/linux-pm/e9d7ef79-6a24-4515-aa35-d1f2357da798@kernel.org/
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>

---

Hi,

I believe that this should solve the lockdep warning that Hans was
seeing. That said, I have not tested it much.

Benjamin
---
 drivers/thermal/intel/x86_pkg_temp_thermal.c | 2 ++
 drivers/thermal/thermal_core.c               | 7 +++++++
 include/linux/thermal.h                      | 7 +++++++
 3 files changed, 16 insertions(+)

diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 3fc679b6f11b..ac219d3f688e 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -310,6 +310,7 @@ static int pkg_temp_thermal_trips_init(int cpu, int tj_max,
 
 static int pkg_temp_thermal_device_add(unsigned int cpu)
 {
+	struct lock_class_key x86_pkg_temp_class;
 	struct thermal_trip trips[MAX_NUMBER_OF_TRIPS] = { 0 };
 	int id = topology_logical_die_id(cpu);
 	u32 eax, ebx, ecx, edx;
@@ -349,6 +350,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
 		err = PTR_ERR(zonedev->tzone);
 		goto out_kfree_zonedev;
 	}
+	thermal_zone_device_set_lock_class(zonedev->tzone, &x86_pkg_temp_class);
 	err = thermal_zone_device_enable(zonedev->tzone);
 	if (err)
 		goto out_unregister_tz;
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 17ca5c082643..2a34af9cd02a 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1657,6 +1657,13 @@ struct thermal_zone_device *thermal_tripless_zone_device_register(
 }
 EXPORT_SYMBOL_GPL(thermal_tripless_zone_device_register);
 
+void thermal_zone_set_lock_class(struct thermal_zone_device *tz,
+				 struct lock_class_key *lock_class)
+{
+	lockdep_set_class_and_name(&tz->lock, lock_class, tz->type);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_set_lock_class);
+
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd)
 {
 	return tzd->devdata;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0b5ed6821080..c39a1fe49c86 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -240,6 +240,9 @@ struct thermal_zone_device *thermal_tripless_zone_device_register(
 					const struct thermal_zone_device_ops *ops,
 					const struct thermal_zone_params *tzp);
 
+void thermal_zone_set_lock_class(struct thermal_zone_device *tz,
+				 struct lock_class_key *lock_class);
+
 void thermal_zone_device_unregister(struct thermal_zone_device *tz);
 
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
@@ -290,6 +293,10 @@ static inline struct thermal_zone_device *thermal_tripless_zone_device_register(
 					const struct thermal_zone_params *tzp)
 { return ERR_PTR(-ENODEV); }
 
+static inline void thermal_zone_set_lock_class(struct thermal_zone_device *tz,
+					       struct lock_class_key *lock_class)
+{ }
+
 static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 { }
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] thermal: use a custom lock class for intel x86_pkg_temp
  2025-06-24  9:30 [PATCH] thermal: use a custom lock class for intel x86_pkg_temp Benjamin Berg
@ 2025-06-24 12:45 ` kernel test robot
  0 siblings, 0 replies; 2+ messages in thread
From: kernel test robot @ 2025-06-24 12:45 UTC (permalink / raw)
  To: Benjamin Berg, linux-pm
  Cc: oe-kbuild-all, Zhang Rui, Benjamin Berg, Hans de Goede

Hi Benjamin,

kernel test robot noticed the following build errors:

[auto build test ERROR on rafael-pm/thermal]
[also build test ERROR on amd-pstate/linux-next amd-pstate/bleeding-edge linus/master v6.16-rc3 next-20250623]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Benjamin-Berg/thermal-use-a-custom-lock-class-for-intel-x86_pkg_temp/20250624-173220
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal
patch link:    https://lore.kernel.org/r/20250624093001.1359961-1-benjamin%40sipsolutions.net
patch subject: [PATCH] thermal: use a custom lock class for intel x86_pkg_temp
config: i386-buildonly-randconfig-006-20250624 (https://download.01.org/0day-ci/archive/20250624/202506242034.nhZNCW5t-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250624/202506242034.nhZNCW5t-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202506242034.nhZNCW5t-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/thermal/intel/x86_pkg_temp_thermal.c: In function 'pkg_temp_thermal_device_add':
>> drivers/thermal/intel/x86_pkg_temp_thermal.c:351:9: error: implicit declaration of function 'thermal_zone_device_set_lock_class'; did you mean 'thermal_zone_set_lock_class'? [-Werror=implicit-function-declaration]
     351 |         thermal_zone_device_set_lock_class(zonedev->tzone, &x86_pkg_temp_class);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |         thermal_zone_set_lock_class
   cc1: some warnings being treated as errors


vim +351 drivers/thermal/intel/x86_pkg_temp_thermal.c

   309	
   310	static int pkg_temp_thermal_device_add(unsigned int cpu)
   311	{
   312		struct lock_class_key x86_pkg_temp_class;
   313		struct thermal_trip trips[MAX_NUMBER_OF_TRIPS] = { 0 };
   314		int id = topology_logical_die_id(cpu);
   315		u32 eax, ebx, ecx, edx;
   316		struct zone_device *zonedev;
   317		int thres_count, err;
   318		int tj_max;
   319	
   320		if (id >= max_id)
   321			return -ENOMEM;
   322	
   323		cpuid(6, &eax, &ebx, &ecx, &edx);
   324		thres_count = ebx & 0x07;
   325		if (!thres_count)
   326			return -ENODEV;
   327	
   328		thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
   329	
   330		tj_max = intel_tcc_get_tjmax(cpu);
   331		if (tj_max < 0)
   332			return tj_max;
   333	
   334		zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
   335		if (!zonedev)
   336			return -ENOMEM;
   337	
   338		err = pkg_temp_thermal_trips_init(cpu, tj_max, trips, thres_count);
   339		if (err)
   340			goto out_kfree_zonedev;
   341	
   342		INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
   343		zonedev->cpu = cpu;
   344		zonedev->tzone = thermal_zone_device_register_with_trips("x86_pkg_temp",
   345				trips, thres_count,
   346				zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
   347		if (IS_ERR(zonedev->tzone)) {
   348			err = PTR_ERR(zonedev->tzone);
   349			goto out_kfree_zonedev;
   350		}
 > 351		thermal_zone_device_set_lock_class(zonedev->tzone, &x86_pkg_temp_class);
   352		err = thermal_zone_device_enable(zonedev->tzone);
   353		if (err)
   354			goto out_unregister_tz;
   355	
   356		/* Store MSR value for package thermal interrupt, to restore at exit */
   357		rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
   358		      zonedev->msr_pkg_therm_high);
   359	
   360		cpumask_set_cpu(cpu, &zonedev->cpumask);
   361		raw_spin_lock_irq(&pkg_temp_lock);
   362		zones[id] = zonedev;
   363		raw_spin_unlock_irq(&pkg_temp_lock);
   364	
   365		return 0;
   366	
   367	out_unregister_tz:
   368		thermal_zone_device_unregister(zonedev->tzone);
   369	out_kfree_zonedev:
   370		kfree(zonedev);
   371		return err;
   372	}
   373	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-06-24 12:46 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-24  9:30 [PATCH] thermal: use a custom lock class for intel x86_pkg_temp Benjamin Berg
2025-06-24 12:45 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).