igt-dev.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH i-g-t 0/1] Enable/Disable DRM debug logging between tests in shard mode
@ 2025-12-15 10:33 Sobin Thomas
  2025-12-15 10:33 ` [PATCH i-g-t 1/1] feat: " Sobin Thomas
  0 siblings, 1 reply; 4+ messages in thread
From: Sobin Thomas @ 2025-12-15 10:33 UTC (permalink / raw)
  To: igt-dev, zbigniew.kempczynski; +Cc: Sobin Thomas

In shard mode, when disk usage limit is exceeded during a test, DRM
debug logging is disabled by writing "0" to /sys/module/drm/parameters/debug
to prevent premature exit. However, this disabled state persisted across
subsequent tests in the same shard, causing loss of debug information for
tests that did not exceed the disk limit.

Add a global flag to track when DRM debug was disabled in a previous test.
At the start of each new test in monitor_output(), check this flag and
restore DRM debug logging to the original value (or default "14" if the
original value could not be saved). Verify the write succeeded by reading
back the value, retrying up to 10 times with 100ms delays if needed.

The original DRM debug value is saved once at startup in execute() before
any tests run, ensuring we can restore to the correct state. The per-test
logging_disabled flag remains local to monitor_output() to track state
within the current test execution.

This ensures each test starts with a clean state and proper debug logging,
while still allowing dynamic disabling when disk limits are exceeded.

Sobin Thomas (1):
  feat: Enable/Disable DRM debug logging between tests in shard mode

 runner/executor.c | 148 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 147 insertions(+), 1 deletion(-)

-- 
2.51.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH i-g-t 1/1] feat: Enable/Disable DRM debug logging between tests in shard mode
  2025-12-15 10:33 [PATCH i-g-t 0/1] Enable/Disable DRM debug logging between tests in shard mode Sobin Thomas
@ 2025-12-15 10:33 ` Sobin Thomas
  2025-12-15 13:02   ` Jani Nikula
  2025-12-16  9:15   ` Zbigniew Kempczyński
  0 siblings, 2 replies; 4+ messages in thread
From: Sobin Thomas @ 2025-12-15 10:33 UTC (permalink / raw)
  To: igt-dev, zbigniew.kempczynski; +Cc: Sobin Thomas

In shard mode, when disk usage limit is exceeded during a test, DRM
debug logging is disabled by writing "0" to /sys/module/drm/parameters/debug
to prevent premature exit. However, this disabled state persisted across
subsequent tests in the same shard, causing loss of debug information for
tests that did not exceed the disk limit.

Add a global flag to track when DRM debug was disabled in a previous test.
At the start of each new test in monitor_output(), check this flag and
restore DRM debug logging to the original value (or default "14" if the
original value could not be saved). Verify the write succeeded by reading
back the value, retrying up to 10 times with 100ms delays if needed.

The original DRM debug value is saved once at startup in execute() before
any tests run, ensuring we can restore to the correct state. The per-test
logging_disabled flag remains local to monitor_output() to track state
within the current test execution.

This ensures each test starts with a clean state and proper debug logging,
while still allowing dynamic disabling when disk limits are exceeded.

Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
---
 runner/executor.c | 148 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 147 insertions(+), 1 deletion(-)

diff --git a/runner/executor.c b/runner/executor.c
index 847abe481..8b1bb4d33 100644
--- a/runner/executor.c
+++ b/runner/executor.c
@@ -45,12 +45,15 @@
 #define KMSG_HEADER "[IGT] "
 #define KMSG_WARN 4
 #define GRACEFUL_EXITCODE -SIGHUP
-
+#define DRM_DEBUG_DEFAULT "14"
+#define DRM_DEBUG_DISABLE "0x0"
 static struct {
 	int *fds;
 	size_t num_dogs;
 } watchdogs;
 
+static bool global_logging_disabled;
+
 static void runner_gettime(struct timespec *tv)
 {
 	if (clock_gettime(CLOCK_BOOTTIME, tv))
@@ -948,6 +951,111 @@ static size_t calc_last_dmesg_chunk(size_t limit, size_t disk_usage)
 	return dt != 0 ? dt : -1;
 }
 
+static char original_drm_debug[16] = {0};
+static bool drm_debug_saved;
+
+static void restore_drm_debug(void)
+{
+	int fd;
+	static const char default_debug[] = "14";
+	const char *value_to_write;
+	char readback[16];
+	ssize_t cnt;
+	int retries = 10;
+	bool success = false;
+
+	// Use saved value if available, otherwise use default "14"
+	value_to_write = drm_debug_saved ? original_drm_debug : DRM_DEBUG_DEFAULT;
+	outf("Restoring DRM Debug to %s", value_to_write);
+	while (retries-- > 0 && !success) {
+		outf("Restoring DRM Debug");
+		/* Write the value */
+		fd = open("/sys/module/drm/parameters/debug", O_WRONLY);
+		if (fd >= 0) {
+			write(fd, default_debug, sizeof(default_debug) - 1);
+			close(fd);
+		} else {
+			errf("Failed to open /sys/module/drm/parameters/debug"
+			     "for writing: %m\n");
+			break;
+		}
+
+	/* Read it back to verify */
+	fd = open("/sys/module/drm/parameters/debug", O_RDONLY);
+	if (fd >= 0) {
+		cnt = read(fd, readback, sizeof(readback) - 1);
+		if (cnt > 0) {
+			readback[cnt] = '\0';
+			/* Remove trailing newline if present */
+			if (cnt > 0 && readback[cnt - 1] == '\n')
+				readback[cnt - 1] = '\0';
+
+			/* Check if it matches what we wrote */
+			if (strcmp(readback, DRM_DEBUG_DEFAULT) == 0) {
+				outf("Write Success");
+				success = true;
+				if (settings->log_level >= LOG_LEVEL_NORMAL) {
+					errf("[RUNNER] Successfully restored "
+					     "DRM debug to %s\n", readback);
+				}
+			} else {
+				errf("[RUNNER] DRM debug readback mismatch:"
+				     "wrote 14, read %s (retry %d)\n",
+					readback, 10 - retries);
+				usleep(100000); /* Wait 100ms before retry */
+			}
+		}
+		close(fd);
+	} else {
+		errf("Failed to open /sys/module/drm/parameters/debug for reading: %m\n");
+		break;
+	}
+	}
+
+	if (!success)
+		errf("[RUNNER] Failed to restore DRM debug to default "
+		     "after 10 retries\n");
+}
+
+static void disable_drm_debug(void)
+{
+	int fd;
+	static const char zero[] = "0x0";
+	const char *msg = "[RUNNER] logging disabled due to disk usage limit\n";
+	int cnt = 0;
+	// Save the current value before disabling
+	if (!drm_debug_saved) {
+		fd = open("/sys/module/drm/parameters/debug", O_RDONLY);
+	if (fd >= 0) {
+		cnt = read(fd, original_drm_debug, sizeof(original_drm_debug) - 1);
+		if (cnt > 0) {
+			original_drm_debug[cnt] = '\0';
+			// Remove trailing newline if present
+			if (cnt > 0 && original_drm_debug[cnt - 1] == '\n')
+				original_drm_debug[cnt - 1] = '\0';
+			drm_debug_saved = true;
+		}
+		close(fd);
+		}
+	}
+
+	fd = open("/sys/module/drm/parameters/debug", O_WRONLY);
+	if (fd >= 0) {
+		write(fd, zero, sizeof(zero) - 1);
+		close(fd);
+		fprintf(stdout, "%s", msg);
+		fprintf(stderr, "%s", msg);
+
+		fflush(stdout);
+		fflush(stderr);
+
+		/* Log to kernel that we disabled debug */
+		kmsg_log(4, "igt_runner: Disabled DRM debug due to disk usage limit\n");
+	}
+	global_logging_disabled = true;
+}
+
+
 /*
  * Returns:
  *  =0 - Success
@@ -984,10 +1092,20 @@ static int monitor_output(pid_t child,
 	long dmesgwritten;
 	bool socket_comms_used = false; /* whether the test actually uses comms */
 	bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */
+	bool logging_disabled = false;
 
 	runner_gettime(&time_beg);
 	time_last_activity = time_last_subtest = time_killed = time_beg;
 
+	/* Restore DRM debug at the start of each test if it was disabled earlier */
+	if (global_logging_disabled) {
+		restore_drm_debug();
+		global_logging_disabled = false;
+
+		if (settings->log_level >= LOG_LEVEL_NORMAL)
+			outf("[RUNNER] Restored DRM debug logging for new test\n");
+	}
+
 	if (errfd > nfds)
 		nfds = errfd;
 	if (socketfd > nfds)
@@ -1321,6 +1439,34 @@ static int monitor_output(pid_t child,
 			}
 		}
 
+		/* Check if we need to disable logging due to disk usage limit */
+
+		if (!logging_disabled && disk_usage_limit_exceeded(settings, disk_usage)) {
+			char msg[256];
+
+			logging_disabled = true;
+
+			/* Disable DRM debug to reduce dmesg spam */
+			disable_drm_debug();
+
+			/* Write warning message to output files */
+			snprintf(msg, sizeof(msg),
+				"\n[RUNNER] Disk usage limit reached (%zu/%zu bytes). "
+				"Disabling further logging and DRM debug. Test continues.\n ",
+				disk_usage, settings->disk_usage_limit);
+
+			write(outputs[_F_OUT], msg, strlen(msg));
+			write(outputs[_F_ERR], msg, strlen(msg));
+
+			if (settings->log_level >= LOG_LEVEL_NORMAL)
+				fprintf(stderr, "%s", msg);
+
+			if (settings->sync) {
+				fdatasync(outputs[_F_OUT]);
+				fdatasync(outputs[_F_ERR]);
+			}
+		}
+
 		if (sigfd >= 0 && FD_ISSET(sigfd, &set)) {
 			double time;
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH i-g-t 1/1] feat: Enable/Disable DRM debug logging between tests in shard mode
  2025-12-15 10:33 ` [PATCH i-g-t 1/1] feat: " Sobin Thomas
@ 2025-12-15 13:02   ` Jani Nikula
  2025-12-16  9:15   ` Zbigniew Kempczyński
  1 sibling, 0 replies; 4+ messages in thread
From: Jani Nikula @ 2025-12-15 13:02 UTC (permalink / raw)
  To: Sobin Thomas, igt-dev, zbigniew.kempczynski; +Cc: Sobin Thomas

On Mon, 15 Dec 2025, Sobin Thomas <sobin.thomas@intel.com> wrote:
> In shard mode, when disk usage limit is exceeded during a test, DRM
> debug logging is disabled by writing "0" to /sys/module/drm/parameters/debug
> to prevent premature exit. However, this disabled state persisted across
> subsequent tests in the same shard, causing loss of debug information for
> tests that did not exceed the disk limit.
>
> Add a global flag to track when DRM debug was disabled in a previous test.
> At the start of each new test in monitor_output(), check this flag and
> restore DRM debug logging to the original value (or default "14" if the
> original value could not be saved). Verify the write succeeded by reading
> back the value, retrying up to 10 times with 100ms delays if needed.

Why the retries? Under what circumstances is retrying like this
warranted?

> The original DRM debug value is saved once at startup in execute() before
> any tests run, ensuring we can restore to the correct state. The per-test
> logging_disabled flag remains local to monitor_output() to track state
> within the current test execution.
>
> This ensures each test starts with a clean state and proper debug logging,
> while still allowing dynamic disabling when disk limits are exceeded.

There are already functions igt_drm_debug_mask_reset() and
igt_drm_debug_mask_update() that abstract modifying the parameter.

Please do not add four (!) new open-coded
open("/sys/module/drm/parameters/debug", ...) calls. Amend the existing
if needed.

Also, I think reducing debug logging in dmesg should be logged in dmegs
with igt_kmsg(). If I look at a dmesg, I need to know then and there
that the debugs were reduced, not out-of-band in some IGT log.

Some random comments inline.

> Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
> ---
>  runner/executor.c | 148 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 147 insertions(+), 1 deletion(-)
>
> diff --git a/runner/executor.c b/runner/executor.c
> index 847abe481..8b1bb4d33 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -45,12 +45,15 @@
>  #define KMSG_HEADER "[IGT] "
>  #define KMSG_WARN 4
>  #define GRACEFUL_EXITCODE -SIGHUP
> -
> +#define DRM_DEBUG_DEFAULT "14"
> +#define DRM_DEBUG_DISABLE "0x0"

It's a mask. Seems silly to define the default in decimal and 0 in hex.

>  static struct {
>  	int *fds;
>  	size_t num_dogs;
>  } watchdogs;
>  
> +static bool global_logging_disabled;
> +
>  static void runner_gettime(struct timespec *tv)
>  {
>  	if (clock_gettime(CLOCK_BOOTTIME, tv))
> @@ -948,6 +951,111 @@ static size_t calc_last_dmesg_chunk(size_t limit, size_t disk_usage)
>  	return dt != 0 ? dt : -1;
>  }
>  
> +static char original_drm_debug[16] = {0};
> +static bool drm_debug_saved;
> +
> +static void restore_drm_debug(void)
> +{
> +	int fd;
> +	static const char default_debug[] = "14";
> +	const char *value_to_write;
> +	char readback[16];
> +	ssize_t cnt;
> +	int retries = 10;
> +	bool success = false;
> +
> +	// Use saved value if available, otherwise use default "14"
> +	value_to_write = drm_debug_saved ? original_drm_debug : DRM_DEBUG_DEFAULT;
> +	outf("Restoring DRM Debug to %s", value_to_write);

So this is using the macro for default just to debug log...?

> +	while (retries-- > 0 && !success) {
> +		outf("Restoring DRM Debug");
> +		/* Write the value */
> +		fd = open("/sys/module/drm/parameters/debug", O_WRONLY);
> +		if (fd >= 0) {
> +			write(fd, default_debug, sizeof(default_debug) - 1);

...but the actual value used is a magic value in a static const string?

> +			close(fd);
> +		} else {
> +			errf("Failed to open /sys/module/drm/parameters/debug"
> +			     "for writing: %m\n");
> +			break;
> +		}
> +
> +	/* Read it back to verify */
> +	fd = open("/sys/module/drm/parameters/debug", O_RDONLY);
> +	if (fd >= 0) {
> +		cnt = read(fd, readback, sizeof(readback) - 1);
> +		if (cnt > 0) {
> +			readback[cnt] = '\0';
> +			/* Remove trailing newline if present */
> +			if (cnt > 0 && readback[cnt - 1] == '\n')
> +				readback[cnt - 1] = '\0';
> +
> +			/* Check if it matches what we wrote */
> +			if (strcmp(readback, DRM_DEBUG_DEFAULT) == 0) {
> +				outf("Write Success");
> +				success = true;
> +				if (settings->log_level >= LOG_LEVEL_NORMAL) {
> +					errf("[RUNNER] Successfully restored "
> +					     "DRM debug to %s\n", readback);
> +				}
> +			} else {
> +				errf("[RUNNER] DRM debug readback mismatch:"
> +				     "wrote 14, read %s (retry %d)\n",
> +					readback, 10 - retries);
> +				usleep(100000); /* Wait 100ms before retry */
> +			}
> +		}
> +		close(fd);
> +	} else {
> +		errf("Failed to open /sys/module/drm/parameters/debug for reading: %m\n");
> +		break;
> +	}
> +	}

Something went wrong with the indentation there.

> +
> +	if (!success)
> +		errf("[RUNNER] Failed to restore DRM debug to default "
> +		     "after 10 retries\n");
> +}
> +
> +static void disable_drm_debug(void)
> +{
> +	int fd;
> +	static const char zero[] = "0x0";
> +	const char *msg = "[RUNNER] logging disabled due to disk usage limit\n";
> +	int cnt = 0;
> +	// Save the current value before disabling
> +	if (!drm_debug_saved) {
> +		fd = open("/sys/module/drm/parameters/debug", O_RDONLY);
> +	if (fd >= 0) {
> +		cnt = read(fd, original_drm_debug, sizeof(original_drm_debug) - 1);
> +		if (cnt > 0) {
> +			original_drm_debug[cnt] = '\0';
> +			// Remove trailing newline if present
> +			if (cnt > 0 && original_drm_debug[cnt - 1] == '\n')
> +				original_drm_debug[cnt - 1] = '\0';
> +			drm_debug_saved = true;
> +		}
> +		close(fd);
> +		}
> +	}
> +
> +	fd = open("/sys/module/drm/parameters/debug", O_WRONLY);
> +	if (fd >= 0) {
> +		write(fd, zero, sizeof(zero) - 1);

So this is not using the macro either.

> +		close(fd);
> +		fprintf(stdout, "%s", msg);
> +		fprintf(stderr, "%s", msg);
> +
> +		fflush(stdout);
> +		fflush(stderr);
> +
> +		/* Log to kernel that we disabled debug */
> +		kmsg_log(4, "igt_runner: Disabled DRM debug due to disk usage limit\n");
> +	}
> +	global_logging_disabled = true;
> +}
> +
> +
>  /*
>   * Returns:
>   *  =0 - Success
> @@ -984,10 +1092,20 @@ static int monitor_output(pid_t child,
>  	long dmesgwritten;
>  	bool socket_comms_used = false; /* whether the test actually uses comms */
>  	bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */
> +	bool logging_disabled = false;
>  
>  	runner_gettime(&time_beg);
>  	time_last_activity = time_last_subtest = time_killed = time_beg;
>  
> +	/* Restore DRM debug at the start of each test if it was disabled earlier */
> +	if (global_logging_disabled) {
> +		restore_drm_debug();
> +		global_logging_disabled = false;
> +
> +		if (settings->log_level >= LOG_LEVEL_NORMAL)
> +			outf("[RUNNER] Restored DRM debug logging for new test\n");
> +	}
> +
>  	if (errfd > nfds)
>  		nfds = errfd;
>  	if (socketfd > nfds)
> @@ -1321,6 +1439,34 @@ static int monitor_output(pid_t child,
>  			}
>  		}
>  
> +		/* Check if we need to disable logging due to disk usage limit */
> +
> +		if (!logging_disabled && disk_usage_limit_exceeded(settings, disk_usage)) {
> +			char msg[256];
> +
> +			logging_disabled = true;
> +
> +			/* Disable DRM debug to reduce dmesg spam */
> +			disable_drm_debug();
> +
> +			/* Write warning message to output files */
> +			snprintf(msg, sizeof(msg),
> +				"\n[RUNNER] Disk usage limit reached (%zu/%zu bytes). "
> +				"Disabling further logging and DRM debug. Test continues.\n ",
> +				disk_usage, settings->disk_usage_limit);
> +
> +			write(outputs[_F_OUT], msg, strlen(msg));
> +			write(outputs[_F_ERR], msg, strlen(msg));
> +
> +			if (settings->log_level >= LOG_LEVEL_NORMAL)
> +				fprintf(stderr, "%s", msg);
> +
> +			if (settings->sync) {
> +				fdatasync(outputs[_F_OUT]);
> +				fdatasync(outputs[_F_ERR]);
> +			}
> +		}
> +
>  		if (sigfd >= 0 && FD_ISSET(sigfd, &set)) {
>  			double time;

-- 
Jani Nikula, Intel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH i-g-t 1/1] feat: Enable/Disable DRM debug logging between tests in shard mode
  2025-12-15 10:33 ` [PATCH i-g-t 1/1] feat: " Sobin Thomas
  2025-12-15 13:02   ` Jani Nikula
@ 2025-12-16  9:15   ` Zbigniew Kempczyński
  1 sibling, 0 replies; 4+ messages in thread
From: Zbigniew Kempczyński @ 2025-12-16  9:15 UTC (permalink / raw)
  To: Sobin Thomas; +Cc: igt-dev

On Mon, Dec 15, 2025 at 10:33:05AM +0000, Sobin Thomas wrote:
> In shard mode, when disk usage limit is exceeded during a test, DRM
> debug logging is disabled by writing "0" to /sys/module/drm/parameters/debug
> to prevent premature exit. However, this disabled state persisted across
> subsequent tests in the same shard, causing loss of debug information for
> tests that did not exceed the disk limit.

Why even to touch debug parameter?

IMO executor shouldn't write more log to disk when limit is exceeded
(with the exception of warning "disk usage limit exceeded, suppressing dmesg
output").

If I'm not wrong need_to_timeout(..., disk_usage) returns !NULL when
disk_usage exceeds limits and then child is killed. Dropping out passing
disk_usage there and bypassing part where dump_dmesg() is called after
exceeding disk limit should be enough.  Maybe adding some boolean flag
would be good and reporting single time to disk that kernel logging is
suppressed at this point. Maybe draining kmsg log will be still necessary
in dump_dmesg(), but adding param 'suppress' or similar should likely work.

--
Zbigniew

> 
> Add a global flag to track when DRM debug was disabled in a previous test.
> At the start of each new test in monitor_output(), check this flag and
> restore DRM debug logging to the original value (or default "14" if the
> original value could not be saved). Verify the write succeeded by reading
> back the value, retrying up to 10 times with 100ms delays if needed.
> 
> The original DRM debug value is saved once at startup in execute() before
> any tests run, ensuring we can restore to the correct state. The per-test
> logging_disabled flag remains local to monitor_output() to track state
> within the current test execution.
> 
> This ensures each test starts with a clean state and proper debug logging,
> while still allowing dynamic disabling when disk limits are exceeded.
> 
> Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
> ---
>  runner/executor.c | 148 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 147 insertions(+), 1 deletion(-)
> 
> diff --git a/runner/executor.c b/runner/executor.c
> index 847abe481..8b1bb4d33 100644
> --- a/runner/executor.c
> +++ b/runner/executor.c
> @@ -45,12 +45,15 @@
>  #define KMSG_HEADER "[IGT] "
>  #define KMSG_WARN 4
>  #define GRACEFUL_EXITCODE -SIGHUP
> -
> +#define DRM_DEBUG_DEFAULT "14"
> +#define DRM_DEBUG_DISABLE "0x0"
>  static struct {
>  	int *fds;
>  	size_t num_dogs;
>  } watchdogs;
>  
> +static bool global_logging_disabled;
> +
>  static void runner_gettime(struct timespec *tv)
>  {
>  	if (clock_gettime(CLOCK_BOOTTIME, tv))
> @@ -948,6 +951,111 @@ static size_t calc_last_dmesg_chunk(size_t limit, size_t disk_usage)
>  	return dt != 0 ? dt : -1;
>  }
>  
> +static char original_drm_debug[16] = {0};
> +static bool drm_debug_saved;
> +
> +static void restore_drm_debug(void)
> +{
> +	int fd;
> +	static const char default_debug[] = "14";
> +	const char *value_to_write;
> +	char readback[16];
> +	ssize_t cnt;
> +	int retries = 10;
> +	bool success = false;
> +
> +	// Use saved value if available, otherwise use default "14"
> +	value_to_write = drm_debug_saved ? original_drm_debug : DRM_DEBUG_DEFAULT;
> +	outf("Restoring DRM Debug to %s", value_to_write);
> +	while (retries-- > 0 && !success) {
> +		outf("Restoring DRM Debug");
> +		/* Write the value */
> +		fd = open("/sys/module/drm/parameters/debug", O_WRONLY);
> +		if (fd >= 0) {
> +			write(fd, default_debug, sizeof(default_debug) - 1);
> +			close(fd);
> +		} else {
> +			errf("Failed to open /sys/module/drm/parameters/debug"
> +			     "for writing: %m\n");
> +			break;
> +		}
> +
> +	/* Read it back to verify */
> +	fd = open("/sys/module/drm/parameters/debug", O_RDONLY);
> +	if (fd >= 0) {
> +		cnt = read(fd, readback, sizeof(readback) - 1);
> +		if (cnt > 0) {
> +			readback[cnt] = '\0';
> +			/* Remove trailing newline if present */
> +			if (cnt > 0 && readback[cnt - 1] == '\n')
> +				readback[cnt - 1] = '\0';
> +
> +			/* Check if it matches what we wrote */
> +			if (strcmp(readback, DRM_DEBUG_DEFAULT) == 0) {
> +				outf("Write Success");
> +				success = true;
> +				if (settings->log_level >= LOG_LEVEL_NORMAL) {
> +					errf("[RUNNER] Successfully restored "
> +					     "DRM debug to %s\n", readback);
> +				}
> +			} else {
> +				errf("[RUNNER] DRM debug readback mismatch:"
> +				     "wrote 14, read %s (retry %d)\n",
> +					readback, 10 - retries);
> +				usleep(100000); /* Wait 100ms before retry */
> +			}
> +		}
> +		close(fd);
> +	} else {
> +		errf("Failed to open /sys/module/drm/parameters/debug for reading: %m\n");
> +		break;
> +	}
> +	}
> +
> +	if (!success)
> +		errf("[RUNNER] Failed to restore DRM debug to default "
> +		     "after 10 retries\n");
> +}
> +
> +static void disable_drm_debug(void)
> +{
> +	int fd;
> +	static const char zero[] = "0x0";
> +	const char *msg = "[RUNNER] logging disabled due to disk usage limit\n";
> +	int cnt = 0;
> +	// Save the current value before disabling
> +	if (!drm_debug_saved) {
> +		fd = open("/sys/module/drm/parameters/debug", O_RDONLY);
> +	if (fd >= 0) {
> +		cnt = read(fd, original_drm_debug, sizeof(original_drm_debug) - 1);
> +		if (cnt > 0) {
> +			original_drm_debug[cnt] = '\0';
> +			// Remove trailing newline if present
> +			if (cnt > 0 && original_drm_debug[cnt - 1] == '\n')
> +				original_drm_debug[cnt - 1] = '\0';
> +			drm_debug_saved = true;
> +		}
> +		close(fd);
> +		}
> +	}
> +
> +	fd = open("/sys/module/drm/parameters/debug", O_WRONLY);
> +	if (fd >= 0) {
> +		write(fd, zero, sizeof(zero) - 1);
> +		close(fd);
> +		fprintf(stdout, "%s", msg);
> +		fprintf(stderr, "%s", msg);
> +
> +		fflush(stdout);
> +		fflush(stderr);
> +
> +		/* Log to kernel that we disabled debug */
> +		kmsg_log(4, "igt_runner: Disabled DRM debug due to disk usage limit\n");
> +	}
> +	global_logging_disabled = true;
> +}
> +
> +
>  /*
>   * Returns:
>   *  =0 - Success
> @@ -984,10 +1092,20 @@ static int monitor_output(pid_t child,
>  	long dmesgwritten;
>  	bool socket_comms_used = false; /* whether the test actually uses comms */
>  	bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */
> +	bool logging_disabled = false;
>  
>  	runner_gettime(&time_beg);
>  	time_last_activity = time_last_subtest = time_killed = time_beg;
>  
> +	/* Restore DRM debug at the start of each test if it was disabled earlier */
> +	if (global_logging_disabled) {
> +		restore_drm_debug();
> +		global_logging_disabled = false;
> +
> +		if (settings->log_level >= LOG_LEVEL_NORMAL)
> +			outf("[RUNNER] Restored DRM debug logging for new test\n");
> +	}
> +
>  	if (errfd > nfds)
>  		nfds = errfd;
>  	if (socketfd > nfds)
> @@ -1321,6 +1439,34 @@ static int monitor_output(pid_t child,
>  			}
>  		}
>  
> +		/* Check if we need to disable logging due to disk usage limit */
> +
> +		if (!logging_disabled && disk_usage_limit_exceeded(settings, disk_usage)) {
> +			char msg[256];
> +
> +			logging_disabled = true;
> +
> +			/* Disable DRM debug to reduce dmesg spam */
> +			disable_drm_debug();
> +
> +			/* Write warning message to output files */
> +			snprintf(msg, sizeof(msg),
> +				"\n[RUNNER] Disk usage limit reached (%zu/%zu bytes). "
> +				"Disabling further logging and DRM debug. Test continues.\n ",
> +				disk_usage, settings->disk_usage_limit);
> +
> +			write(outputs[_F_OUT], msg, strlen(msg));
> +			write(outputs[_F_ERR], msg, strlen(msg));
> +
> +			if (settings->log_level >= LOG_LEVEL_NORMAL)
> +				fprintf(stderr, "%s", msg);
> +
> +			if (settings->sync) {
> +				fdatasync(outputs[_F_OUT]);
> +				fdatasync(outputs[_F_ERR]);
> +			}
> +		}
> +
>  		if (sigfd >= 0 && FD_ISSET(sigfd, &set)) {
>  			double time;
>  
> -- 
> 2.51.0
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-12-16  9:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-15 10:33 [PATCH i-g-t 0/1] Enable/Disable DRM debug logging between tests in shard mode Sobin Thomas
2025-12-15 10:33 ` [PATCH i-g-t 1/1] feat: " Sobin Thomas
2025-12-15 13:02   ` Jani Nikula
2025-12-16  9:15   ` Zbigniew Kempczyński

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).