From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by gabe.freedesktop.org (Postfix) with ESMTPS id B959010E2DA for ; Thu, 20 Apr 2023 13:03:36 +0000 (UTC) Message-ID: <5a2ff1e5-026c-c8e7-aa77-e003143c3ecd@intel.com> Date: Thu, 20 Apr 2023 15:03:32 +0200 MIME-Version: 1.0 Content-Language: en-US To: Kamil Konieczny , igt-dev@lists.freedesktop.org References: <20230227142748.28811-1-kamil.konieczny@linux.intel.com> From: Andrzej Hajda In-Reply-To: <20230227142748.28811-1-kamil.konieczny@linux.intel.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [igt-dev] [PATCH v2 i-g-t] runner: check disk limit at dumping kmsg List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Arkadiusz Hiler , Karol Krol Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: Hi, On 27.02.2023 15:27, Kamil Konieczny wrote: > It was reported that kernel dumps can grow beyond disk limit size > so add checks for it and report error if that happen. > > v2: return number of written bytes (Petri) > > Reported-by: Karol Krol > Ref: https://gitlab.freedesktop.org/drm/igt-gpu-tools/-/issues/129 > Cc: Petri Latvala > Cc: Arkadiusz Hiler > Cc: Juha-Pekka Heikkila > Signed-off-by: Kamil Konieczny > Reviewed-by: Petri Latvala It looks correct, however the main issue is that logs cut are quite often unusable, the vital info is usually at the end. From my observation the long dmesg is usually a result of trace dump in dmesg. Limiting it in .config should help, currently it is: CONFIG_GLOBAL_TRACE_BUF_SIZE=1441792 It seems huge since it is very compressed comparing to dmesg. Anyway, going back to the subject: Reviewed-by: Andrzej Hajda Regards Andrzej > --- > runner/executor.c | 24 +++++++++++++++++++----- > 1 file changed, 19 insertions(+), 5 deletions(-) > > diff --git a/runner/executor.c b/runner/executor.c > index 597cd7f5..ae6cf1bb 100644 > --- a/runner/executor.c > +++ b/runner/executor.c > @@ -584,7 +584,7 @@ void close_outputs(int *fds) > } > > /* Returns the number of bytes written to disk, or a negative number on error */ > -static long dump_dmesg(int kmsgfd, int outfd) > +static long dump_dmesg(int kmsgfd, int outfd, size_t disk_limit) > { > /* > * Write kernel messages to the log file until we reach > @@ -599,12 +599,18 @@ static long dump_dmesg(int kmsgfd, int outfd) > bool underflow_once = false; > char cont; > char buf[2048]; > - ssize_t r; > + ssize_t r, disk_written; > long written = 0; > > if (kmsgfd < 0) > return 0; > > + disk_written = lseek(outfd, 0, SEEK_SET); > + if (disk_written > disk_limit) { > + errf("Error dumping kmsg: disk limit already exceeded\n"); > + return 0; /* number of written bytes */ > + } > + > comparefd = open("/dev/kmsg", O_RDONLY | O_NONBLOCK); > if (comparefd < 0) { > errf("Error opening another fd for /dev/kmsg\n"); > @@ -655,6 +661,13 @@ static long dump_dmesg(int kmsgfd, int outfd) > > write(outfd, buf, r); > written += r; > + disk_written += r; > + > + if (disk_written > disk_limit) { > + close(comparefd); > + errf("Error dumping kmsg: disk limit exceeded\n"); > + return written; > + } > > if (comparefd < 0 && sscanf(buf, "%u,%llu,%llu,%c;", > &flags, &seq, &usec, &cont) == 4) { > @@ -890,6 +903,7 @@ static int monitor_output(pid_t child, > unsigned long taints = 0; > bool aborting = false; > size_t disk_usage = 0; > + const size_t mon_disk_limit = settings->disk_usage_limit ? : (~(size_t)0); > bool socket_comms_used = false; /* whether the test actually uses comms */ > bool results_received = false; /* whether we already have test results that might need overriding if we detect an abort condition */ > > @@ -1219,7 +1233,7 @@ static int monitor_output(pid_t child, > > time_last_activity = time_now; > > - dmesgwritten = dump_dmesg(kmsgfd, outputs[_F_DMESG]); > + dmesgwritten = dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit); > if (settings->sync) > fdatasync(outputs[_F_DMESG]); > > @@ -1457,7 +1471,7 @@ static int monitor_output(pid_t child, > asprintf(abortreason, "Child refuses to die, tainted 0x%lx.", taints); > } > > - dump_dmesg(kmsgfd, outputs[_F_DMESG]); > + dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit); > if (settings->sync) > fdatasync(outputs[_F_DMESG]); > > @@ -1483,7 +1497,7 @@ static int monitor_output(pid_t child, > } > } > > - dump_dmesg(kmsgfd, outputs[_F_DMESG]); > + dump_dmesg(kmsgfd, outputs[_F_DMESG], mon_disk_limit); > if (settings->sync) > fdatasync(outputs[_F_DMESG]); >