All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>,
	 intel-xe@lists.freedesktop.org
Cc: John Harrison <John.C.Harrison@Intel.com>,
	Matthew Brost <matthew.brost@intel.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: Re: [PATCH 3/3] Revert "drm/xe: Add a reason string to the devcoredump"
Date: Wed, 27 Nov 2024 09:13:02 +0100	[thread overview]
Message-ID: <b91fdc7fa3550ed802f5ccdb759c2fb84406897e.camel@linux.intel.com> (raw)
In-Reply-To: <20241127083042.358726-4-himal.prasad.ghimiray@intel.com>

On Wed, 2024-11-27 at 14:00 +0530, Himal Prasad Ghimiray wrote:
> This reverts commit 06e2cb496396613e583a72a85e0822fb43b2ead5. The
> commit
> was accidentally and unintentionally pushed.
> 
> Cc: John Harrison <John.C.Harrison@Intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Himal Prasad Ghimiray
> <himal.prasad.ghimiray@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

> ---
>  drivers/gpu/drm/xe/xe_devcoredump.c       | 21 +++------------------
>  drivers/gpu/drm/xe/xe_devcoredump.h       |  5 ++---
>  drivers/gpu/drm/xe/xe_devcoredump_types.h |  4 ----
>  drivers/gpu/drm/xe/xe_guc_submit.c        | 14 ++++----------
>  4 files changed, 9 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c
> b/drivers/gpu/drm/xe/xe_devcoredump.c
> index f4c77f525819..0e5edf14a241 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -99,7 +99,6 @@ static ssize_t __xe_devcoredump_read(char *buffer,
> size_t count,
>  	p = drm_coredump_printer(&iter);
>  
>  	drm_puts(&p, "**** Xe Device Coredump ****\n");
> -	drm_printf(&p, "Reason: %s\n", ss->reason);
>  	drm_puts(&p, "kernel: " UTS_RELEASE "\n");
>  	drm_puts(&p, "module: " KBUILD_MODNAME "\n");
>  
> @@ -107,7 +106,7 @@ static ssize_t __xe_devcoredump_read(char
> *buffer, size_t count,
>  	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec,
> ts.tv_nsec);
>  	ts = ktime_to_timespec64(ss->boot_time);
>  	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec,
> ts.tv_nsec);
> -	drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss-
> >pid);
> +	drm_printf(&p, "Process: %s\n", ss->process_name);
>  	xe_device_snapshot_print(xe, &p);
>  
>  	drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id);
> @@ -139,9 +138,6 @@ static void xe_devcoredump_snapshot_free(struct
> xe_devcoredump_snapshot *ss)
>  {
>  	int i;
>  
> -	kfree(ss->reason);
> -	ss->reason = NULL;
> -
>  	xe_guc_log_snapshot_free(ss->guc.log);
>  	ss->guc.log = NULL;
>  
> @@ -257,11 +253,8 @@ static void devcoredump_snapshot(struct
> xe_devcoredump *coredump,
>  	ss->snapshot_time = ktime_get_real();
>  	ss->boot_time = ktime_get_boottime();
>  
> -	if (q->vm && q->vm->xef) {
> +	if (q->vm && q->vm->xef)
>  		process_name = q->vm->xef->process_name;
> -		ss->pid = q->vm->xef->pid;
> -	}
> -
>  	strscpy(ss->process_name, process_name);
>  
>  	ss->gt = q->gt;
> @@ -299,18 +292,15 @@ static void devcoredump_snapshot(struct
> xe_devcoredump *coredump,
>   * xe_devcoredump - Take the required snapshots and initialize
> coredump device.
>   * @q: The faulty xe_exec_queue, where the issue was detected.
>   * @job: The faulty xe_sched_job, where the issue was detected.
> - * @fmt: Printf format + args to describe the reason for the core
> dump
>   *
>   * This function should be called at the crash time within the
> serialized
>   * gt_reset. It is skipped if we still have the core dump device
> available
>   * with the information of the 'first' snapshot.
>   */
> -__printf(3, 4)
> -void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job
> *job, const char *fmt, ...)
> +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job
> *job)
>  {
>  	struct xe_device *xe = gt_to_xe(q->gt);
>  	struct xe_devcoredump *coredump = &xe->devcoredump;
> -	va_list varg;
>  
>  	if (coredump->captured) {
>  		drm_dbg(&xe->drm, "Multiple hangs are occurring, but
> only the first snapshot was taken\n");
> @@ -318,11 +308,6 @@ void xe_devcoredump(struct xe_exec_queue *q,
> struct xe_sched_job *job, const cha
>  	}
>  
>  	coredump->captured = true;
> -
> -	va_start(varg, fmt);
> -	coredump->snapshot.reason = kvasprintf(GFP_ATOMIC, fmt,
> varg);
> -	va_end(varg);
> -
>  	devcoredump_snapshot(coredump, q, job);
>  
>  	drm_info(&xe->drm, "Xe device coredump has been created\n");
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h
> b/drivers/gpu/drm/xe/xe_devcoredump.h
> index 6a17e6d60102..c04a534e3384 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.h
> @@ -14,12 +14,11 @@ struct xe_exec_queue;
>  struct xe_sched_job;
>  
>  #ifdef CONFIG_DEV_COREDUMP
> -void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job
> *job, const char *fmt, ...);
> +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job
> *job);
>  int xe_devcoredump_init(struct xe_device *xe);
>  #else
>  static inline void xe_devcoredump(struct xe_exec_queue *q,
> -				  struct xe_sched_job *job,
> -				  const char *fmt, ...)
> +				  struct xe_sched_job *job)
>  {
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> index e6234e887102..be4d59ea9ac8 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
> @@ -28,10 +28,6 @@ struct xe_devcoredump_snapshot {
>  	ktime_t boot_time;
>  	/** @process_name: Name of process that triggered this gpu
> hang */
>  	char process_name[TASK_COMM_LEN];
> -	/** @pid: Process id of process that triggered this gpu hang
> */
> -	pid_t pid;
> -	/** @reason: The reason the coredump was triggered */
> -	char *reason;
>  
>  	/** @gt: Affected GT, used by forcewake for delayed capture
> */
>  	struct xe_gt *gt;
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c
> b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 9c36329fe857..d00af8d8acbe 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -901,8 +901,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct
> work_struct *w)
>  		if (!ret) {
>  			xe_gt_warn(q->gt, "Schedule disable failed
> to respond, guc_id=%d\n",
>  				   q->guc->id);
> -			xe_devcoredump(q, NULL, "Schedule disable
> failed to respond, guc_id=%d\n",
> -				       q->guc->id);
> +			xe_devcoredump(q, NULL);
>  			xe_sched_submission_start(sched);
>  			xe_gt_reset_async(q->gt);
>  			return;
> @@ -910,7 +909,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct
> work_struct *w)
>  	}
>  
>  	if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q-
> >lrc[0]))
> -		xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d",
> q->guc->id);
> +		xe_devcoredump(q, NULL);
>  
>  	xe_sched_submission_start(sched);
>  }
> @@ -1137,9 +1136,7 @@ guc_exec_queue_timedout_job(struct
> drm_sched_job *drm_job)
>  				xe_gt_warn(guc_to_gt(guc),
>  					   "Schedule disable failed
> to respond, guc_id=%d",
>  					   q->guc->id);
> -			xe_devcoredump(q, job,
> -				       "Schedule disable failed to
> respond, guc_id=%d, ret=%d, guc_read=%d",
> -				       q->guc->id, ret,
> xe_guc_read_stopped(guc));
> +			xe_devcoredump(q, job);
>  			set_exec_queue_extra_ref(q);
>  			xe_exec_queue_get(q);	/* GT reset owns
> this */
>  			set_exec_queue_banned(q);
> @@ -1169,10 +1166,7 @@ guc_exec_queue_timedout_job(struct
> drm_sched_job *drm_job)
>  	trace_xe_sched_job_timedout(job);
>  
>  	if (!exec_queue_killed(q))
> -		xe_devcoredump(q, job,
> -			       "Timedout job - seqno=%u,
> lrc_seqno=%u, guc_id=%d, flags=0x%lx",
> -			       xe_sched_job_seqno(job),
> xe_sched_job_lrc_seqno(job),
> -			       q->guc->id, q->flags);
> +		xe_devcoredump(q, job);
>  
>  	/*
>  	 * Kernel jobs should never fail, nor should VM jobs if they
> do


  reply	other threads:[~2024-11-27  8:13 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-27  8:30 [PATCH 0/3] Revert accidentally and unintentionally pushed Himal Prasad Ghimiray
2024-11-27  8:30 ` [PATCH 1/3] Revert "drm/xe: Add mutex locking to devcoredump" Himal Prasad Ghimiray
2024-11-27  8:11   ` Thomas Hellström
2024-11-27  8:30 ` [PATCH 2/3] Revert "drm/xe: Move the coredump registration to the worker thread" Himal Prasad Ghimiray
2024-11-27  8:12   ` Thomas Hellström
2024-11-27  8:30 ` [PATCH 3/3] Revert "drm/xe: Add a reason string to the devcoredump" Himal Prasad Ghimiray
2024-11-27  8:13   ` Thomas Hellström [this message]
2024-11-27  8:47 ` ✓ CI.Patch_applied: success for Revert accidentally and unintentionally pushed Patchwork
2024-11-27  8:48 ` ✓ CI.checkpatch: " Patchwork
2024-11-27  8:49 ` ✓ CI.KUnit: " Patchwork
2024-11-27  9:07 ` ✓ CI.Build: " Patchwork
2024-11-27  9:09 ` ✓ CI.Hooks: " Patchwork
2024-11-27  9:11 ` ✓ CI.checksparse: " Patchwork
2024-11-27  9:36 ` ✓ Xe.CI.BAT: " Patchwork
2024-11-27 11:32 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b91fdc7fa3550ed802f5ccdb759c2fb84406897e.camel@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=John.C.Harrison@Intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.