public inbox for ltp@lists.linux.it
 help / color / mirror / Atom feed
* [LTP] [PATCH v13] thermal: add new test group
@ 2026-03-23 14:08 Piotr Kubaj
  2026-03-24 13:05 ` Andrea Cervesato via ltp
  0 siblings, 1 reply; 3+ messages in thread
From: Piotr Kubaj @ 2026-03-23 14:08 UTC (permalink / raw)
  To: ltp; +Cc: helena.anna.dubel, tomasz.ossowski, rafael.j.wysocki,
	daniel.niestepski

Currently consists of only one test for the CPU package thermal sensor
interface for Intel platforms.
It works by checking the initial count of thermal interrupts. Then it
decreases the threshold for sending a thermal interrupt to just above
the current temperature and runs a workload on the CPU. Finally, it
restores the original thermal threshold and checks whether the number
of thermal interrupts increased.

Signed-off-by: Piotr Kubaj <piotr.kubaj@intel.com>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
---
One more calloc() replacement.
---
 runtest/thermal                               |   3 +
 testcases/kernel/Makefile                     |   1 +
 testcases/kernel/thermal/.gitignore           |   1 +
 testcases/kernel/thermal/Makefile             |   9 +
 .../kernel/thermal/thermal_interrupt_events.c | 217 ++++++++++++++++++
 5 files changed, 231 insertions(+)
 create mode 100644 runtest/thermal
 create mode 100644 testcases/kernel/thermal/.gitignore
 create mode 100644 testcases/kernel/thermal/Makefile
 create mode 100644 testcases/kernel/thermal/thermal_interrupt_events.c

diff --git a/runtest/thermal b/runtest/thermal
new file mode 100644
index 000000000..57e3d29f8
--- /dev/null
+++ b/runtest/thermal
@@ -0,0 +1,3 @@
+# Thermal driver API
+# https://docs.kernel.org/driver-api/thermal/
+thermal_interrupt_events thermal_interrupt_events
diff --git a/testcases/kernel/Makefile b/testcases/kernel/Makefile
index 98fd45a9d..ac816e4e8 100644
--- a/testcases/kernel/Makefile
+++ b/testcases/kernel/Makefile
@@ -36,6 +36,7 @@ SUBDIRS			+= connectors \
 			   sched \
 			   security \
 			   sound \
+			   thermal \
 			   tracing \
 			   uevents \
 			   watchqueue \
diff --git a/testcases/kernel/thermal/.gitignore b/testcases/kernel/thermal/.gitignore
new file mode 100644
index 000000000..1090bdad8
--- /dev/null
+++ b/testcases/kernel/thermal/.gitignore
@@ -0,0 +1 @@
+thermal_interrupt_events
diff --git a/testcases/kernel/thermal/Makefile b/testcases/kernel/thermal/Makefile
new file mode 100644
index 000000000..4657c3fb3
--- /dev/null
+++ b/testcases/kernel/thermal/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2025, Intel Corporation. All rights reserved.
+# Author:Piotr Kubaj <piotr.kubaj@intel.com>
+
+top_srcdir             ?= ../../..
+
+include $(top_srcdir)/include/mk/testcases.mk
+
+include $(top_srcdir)/include/mk/generic_leaf_target.mk
diff --git a/testcases/kernel/thermal/thermal_interrupt_events.c b/testcases/kernel/thermal/thermal_interrupt_events.c
new file mode 100644
index 000000000..85b211c3e
--- /dev/null
+++ b/testcases/kernel/thermal/thermal_interrupt_events.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Copyright (C) 2026 Intel - http://www.intel.com/
+ */
+
+/*\
+ * Tests the CPU package thermal sensor interface for Intel platforms.
+
+ * Works by checking the initial count of thermal interrupts. Then it
+ * decreases the threshold for sending a thermal interrupt to just above
+ * the current temperature and runs a workload on the CPU. Finally, it restores
+ * the original thermal threshold and checks whether the number of thermal
+ * interrupts increased.
+ */
+
+#include <ctype.h>
+#include "tst_safe_stdio.h"
+#include "tst_test.h"
+#include "tst_timer_test.h"
+
+#define	RUNTIME		30
+#define	SLEEP		10
+#define	TEMP_INCREMENT	10
+
+static bool x86_pkg_temp_tz_found, *x86_pkg_temp_tz;
+static char temp_path[PATH_MAX], trip_path[PATH_MAX];
+static int nproc, temp_high, temp, trip, tz_counter;
+static uint64_t *interrupt_init, *interrupt_later;
+
+static void read_interrupts(uint64_t *interrupts, const int nproc)
+{
+	bool interrupts_found = false;
+	char line[8192];
+
+	memset(interrupts, 0, nproc * sizeof(*interrupts));
+	FILE *fp = SAFE_FOPEN("/proc/interrupts", "r");
+
+	while (fgets(line, sizeof(line), fp)) {
+		if (strstr(line, "Thermal event interrupts")) {
+			interrupts_found = true;
+			char *ptr = strchr(line, ':');
+
+			for (int i = 0; i < nproc; i++) {
+				char *endptr;
+
+				while (*ptr && !isdigit(*ptr))
+					ptr++;
+
+				errno = 0;
+
+				interrupts[i] = strtoull(ptr, &endptr, 10);
+
+				if (ptr == endptr)
+					tst_brk(TBROK, "interrupt not found");
+
+				ptr = endptr;
+				tst_res(TDEBUG, "interrupts[%d]: %ld", i, interrupts[i]);
+			}
+			break;
+		}
+	}
+	SAFE_FCLOSE(fp);
+	if (!interrupts_found)
+		tst_brk(TCONF, "No Thermal event interrupts line in /proc/interrupts");
+}
+
+static void setup(void)
+{
+	char line[8192];
+
+	nproc = tst_ncpus();
+	tst_res(TDEBUG, "Number of logical cores: %d", nproc);
+	interrupt_init = SAFE_CALLOC(nproc, sizeof(uint64_t));
+	interrupt_later = SAFE_CALLOC(nproc, sizeof(uint64_t));
+
+	DIR *dir = SAFE_OPENDIR("/sys/class/thermal/");
+	struct dirent *entry;
+
+	while ((entry = SAFE_READDIR(dir))) {
+		if ((!strncmp(entry->d_name, "thermal_zone", sizeof("thermal_zone") - 1)))
+			tz_counter++;
+	}
+	SAFE_CLOSEDIR(dir);
+	tst_res(TDEBUG, "Found %d thermal zone(s)", tz_counter);
+
+	read_interrupts(interrupt_init, nproc);
+
+	x86_pkg_temp_tz = SAFE_CALLOC(tz_counter, sizeof(bool));
+
+	for (int i = 0; i < tz_counter; i++) {
+		char path[PATH_MAX];
+
+		snprintf(path, PATH_MAX, "/sys/class/thermal/thermal_zone%d/type", i);
+		tst_res(TDEBUG, "Checking whether %s is x86_pkg_temp", path);
+
+		SAFE_FILE_SCANF(path, "%s", line);
+		if (strstr(line, "x86_pkg_temp")) {
+			tst_res(TDEBUG, "Thermal zone %d uses x86_pkg_temp", i);
+			x86_pkg_temp_tz[i] = true;
+			x86_pkg_temp_tz_found = true;
+		}
+	}
+
+	if (!x86_pkg_temp_tz_found)
+		tst_brk(TCONF, "No thermal zone uses x86_pkg_temp");
+}
+
+static void *cpu_workload(double run_time)
+{
+	tst_timer_start(CLOCK_MONOTONIC);
+	int num = 2;
+
+	while (!tst_timer_expired_ms(run_time * 1000)) {
+		for (int i = 2; i * i <= num; i++) {
+			if (num % i == 0)
+				break;
+		}
+		num++;
+		SAFE_FILE_SCANF(temp_path, "%d", &temp);
+
+		if (temp > temp_high)
+			break;
+	}
+	return NULL;
+}
+
+static void test_zone(int i)
+{
+	int sleep_time = SLEEP;
+	double run_time = RUNTIME;
+
+	snprintf(temp_path, PATH_MAX, "/sys/class/thermal/thermal_zone%d/temp", i);
+	tst_res(TINFO, "Testing %s", temp_path);
+	SAFE_FILE_SCANF(temp_path, "%d", &temp);
+	if (temp < 0)
+		tst_brk(TBROK, "Unexpected zone temperature value %d", temp);
+
+	tst_res(TDEBUG, "Current temperature for %s: %d", temp_path, temp);
+
+	temp_high = temp + TEMP_INCREMENT;
+
+	snprintf(trip_path, PATH_MAX, "/sys/class/thermal/thermal_zone%d/trip_point_1_temp", i);
+
+	tst_res(TDEBUG, "Setting new trip_point_1_temp value: %d", temp_high);
+	SAFE_FILE_SCANF(trip_path, "%d", &trip);
+	SAFE_FILE_PRINTF(trip_path, "%d", temp_high);
+
+	while (sleep_time > 0) {
+		tst_res(TDEBUG, "Running for %f seconds, then sleeping for %d seconds", run_time, sleep_time);
+
+		for (int j = 0; j < nproc; j++) {
+			if (!SAFE_FORK()) {
+				cpu_workload(run_time);
+				exit(0);
+			}
+		}
+
+		tst_reap_children();
+
+		SAFE_FILE_SCANF(temp_path, "%d", &temp);
+		tst_res(TDEBUG, "Temperature for %s after a test: %d", temp_path, temp);
+
+		if (temp > temp_high)
+			break;
+		sleep(sleep_time--);
+		run_time -= 3;
+	}
+
+}
+
+static void cleanup(void)
+{
+	if (x86_pkg_temp_tz_found)
+		SAFE_FILE_PRINTF(trip_path, "%d", trip);
+
+	free(x86_pkg_temp_tz);
+	free(interrupt_init);
+	free(interrupt_later);
+}
+
+static void run(void)
+{
+	for (int i = 0; i < tz_counter; i++) {
+		if (x86_pkg_temp_tz[i])
+			test_zone(i);
+	}
+	read_interrupts(interrupt_later, nproc);
+
+	for (int i = 0; i < nproc; i++) {
+		if (interrupt_later[i] < interrupt_init[i])
+			tst_res(TFAIL, "CPU %d interrupt counter: %ld (previous: %ld)",
+				i, interrupt_later[i], interrupt_init[i]);
+	}
+
+	if (temp <= temp_high)
+		tst_res(TFAIL, "Zone temperature is not rising as expected");
+	else
+		tst_res(TPASS, "x86 package thermal interrupt triggered");
+}
+
+static struct tst_test test = {
+	.cleanup = cleanup,
+	.forks_child = 1,
+	.needs_kconfigs = (const char *const []) {
+		"CONFIG_X86_THERMAL_VECTOR",
+		NULL
+	},
+	.min_runtime = 5,
+	.needs_root = 1,
+	.setup = setup,
+	.tags = (const struct tst_tag[]) {
+		{"linux-git", "9635c586a559ba0e45b2bfbff79c937ddbaf1a62"},
+		{}
+	},
+	.test_all = run
+};
-- 
2.47.3

---------------------------------------------------------------------
Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.
Spolka oswiadcza, ze posiada status duzego przedsiebiorcy w rozumieniu ustawy z dnia 8 marca 2013 r. o przeciwdzialaniu nadmiernym opoznieniom w transakcjach handlowych.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by others is strictly prohibited.


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [LTP] [PATCH v13] thermal: add new test group
  2026-03-23 14:08 [LTP] [PATCH v13] thermal: add new test group Piotr Kubaj
@ 2026-03-24 13:05 ` Andrea Cervesato via ltp
  2026-03-25 10:55   ` Kubaj, Piotr
  0 siblings, 1 reply; 3+ messages in thread
From: Andrea Cervesato via ltp @ 2026-03-24 13:05 UTC (permalink / raw)
  To: Piotr Kubaj
  Cc: daniel.niestepski, tomasz.ossowski, helena.anna.dubel,
	rafael.j.wysocki, ltp

Hi Piotr,

> +				ptr = endptr;
> +				tst_res(TDEBUG, "interrupts[%d]: %ld", i, interrupts[i]);

`interrupts` is uint64_t*, so we should use %ul instead.

> +static void run(void)
> +{
> +	for (int i = 0; i < tz_counter; i++) {
> +		if (x86_pkg_temp_tz[i])
> +			test_zone(i);
> +	}
> +	read_interrupts(interrupt_later, nproc);
> +
> +	for (int i = 0; i < nproc; i++) {
> +		if (interrupt_later[i] < interrupt_init[i])
> +			tst_res(TFAIL, "CPU %d interrupt counter: %ld (previous: %ld)",
> +				i, interrupt_later[i], interrupt_init[i]);

We always consider TFAIL when counter decreases, but we never consider when
it increases. Is there a reason for that?

> +	}
> +
> +	if (temp <= temp_high)
> +		tst_res(TFAIL, "Zone temperature is not rising as expected");
> +	else
> +		tst_res(TPASS, "x86 package thermal interrupt triggered");
> +}

I also have other questions in here. Why are we considering only the
last zone temperature? Is there a reason for it, or we should save all
temperature for all zones and eventually verify temperature increased
specifically for each one of them?

Because in a single socket system (I guess) we have one single
temperature for all the zones, but on i.e. dual socket server, this
test would verify that only the last zone has increased temperature
above the higher level. And this is wrong, according to the goal of
this test. We want to verify that kernel is correctly working for all 
systems, correctly increasing the thermal counter for each thermal
zone. If this is correct, `temp` and `temp_high` should be an array,
where each item is associated to a zone, and it should be processed
only at the end for TPASS/TFAIL.


Kind Regards,
--
Andrea Cervesato
SUSE QE Automation Engineer Linux
andrea.cervesato@suse.com

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [LTP] [PATCH v13] thermal: add new test group
  2026-03-24 13:05 ` Andrea Cervesato via ltp
@ 2026-03-25 10:55   ` Kubaj, Piotr
  0 siblings, 0 replies; 3+ messages in thread
From: Kubaj, Piotr @ 2026-03-25 10:55 UTC (permalink / raw)
  To: andrea.cervesato@suse.com
  Cc: Wysocki, Rafael J, Ossowski, Tomasz, Dubel, Helena Anna,
	Niestepski, Daniel, ltp@lists.linux.it

2026-03-24 (火) の 13:05 +0000 に Andrea Cervesato さんは書きました:
> Hi Piotr,
> 
> > +				ptr = endptr;
> > +				tst_res(TDEBUG, "interrupts[%d]:
> > %ld", i, interrupts[i]);
> 
> `interrupts` is uint64_t*, so we should use %ul instead.
That causes:
In file included from thermal_interrupt_events.c:19:
thermal_interrupt_events.c: In function ‘read_interrupts’:
../../../include/tst_test.h:74:55: warning: format ‘%u’ expects
argument of type ‘unsigned int’, but argument 6 has type ‘uint64_t’
{aka ‘long unsigned int’} [-Wformat=]
   74 |                 tst_res_(__FILE__, __LINE__, (ttype),
(arg_fmt), ##__VA_ARGS__);\
      |                                                       ^~~~~~~~~
thermal_interrupt_events.c:58:33: note: in expansion of macro ‘tst_res’
   58 |                                 tst_res(TDEBUG,
"interrupts[%d]: %ul", i, interrupts[i]);
      |                                 ^~~~~~~

> 
> > +static void run(void)
> > +{
> > +	for (int i = 0; i < tz_counter; i++) {
> > +		if (x86_pkg_temp_tz[i])
> > +			test_zone(i);
> > +	}
> > +	read_interrupts(interrupt_later, nproc);
> > +
> > +	for (int i = 0; i < nproc; i++) {
> > +		if (interrupt_later[i] < interrupt_init[i])
> > +			tst_res(TFAIL, "CPU %d interrupt counter:
> > %ld (previous: %ld)",
> > +				i, interrupt_later[i],
> > interrupt_init[i]);
> 
> We always consider TFAIL when counter decreases, but we never
> consider when
> it increases. Is there a reason for that?
Yes, increasing is expected.

> 
> > +	}
> > +
> > +	if (temp <= temp_high)
> > +		tst_res(TFAIL, "Zone temperature is not rising as
> > expected");
> > +	else
> > +		tst_res(TPASS, "x86 package thermal interrupt
> > triggered");
> > +}
> 
> I also have other questions in here. Why are we considering only the
> last zone temperature? Is there a reason for it, or we should save
> all
> temperature for all zones and eventually verify temperature increased
> specifically for each one of them?
> 
> Because in a single socket system (I guess) we have one single
> temperature for all the zones, but on i.e. dual socket server, this
> test would verify that only the last zone has increased temperature
> above the higher level. And this is wrong, according to the goal of
> this test. We want to verify that kernel is correctly working for all
> systems, correctly increasing the thermal counter for each thermal
> zone. If this is correct, `temp` and `temp_high` should be an array,
> where each item is associated to a zone, and it should be processed
> only at the end for TPASS/TFAIL.
You're right, all the zones should be checked. I opted for a simpler
solution, checking after testing each zone whether temperature rose
since it's possible that only one of the zones has an issue. Because
interrupts also need to be checked after each zone test, I moved
reading initial interrupts array to the run() function as well.
cleanup() also needed a fix to restore previous values for all the
tested zones.
> 
> 
> Kind Regards,
> --
> Andrea Cervesato
> SUSE QE Automation Engineer Linux
> andrea.cervesato@suse.com
---------------------------------------------------------------------
Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.
Spolka oswiadcza, ze posiada status duzego przedsiebiorcy w rozumieniu ustawy z dnia 8 marca 2013 r. o przeciwdzialaniu nadmiernym opoznieniom w transakcjach handlowych.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by others is strictly prohibited.

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-03-25 10:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-23 14:08 [LTP] [PATCH v13] thermal: add new test group Piotr Kubaj
2026-03-24 13:05 ` Andrea Cervesato via ltp
2026-03-25 10:55   ` Kubaj, Piotr

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox