All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: intel-xe@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Subject: Re: [Intel-xe] [PATCH 05/14] drm/xe: Convert GuC CT print to snapshot capture and print.
Date: Tue, 2 May 2023 05:27:08 +0000	[thread overview]
Message-ID: <ZFCfLB9bPSLACRsO@DUT025-TGLU.fm.intel.com> (raw)
In-Reply-To: <20230426205713.512695-6-rodrigo.vivi@intel.com>

On Wed, Apr 26, 2023 at 04:57:04PM -0400, Rodrigo Vivi wrote:
> The goal is to allow for a snapshot capture to be taken at the time
> of the crash, while the print out can happen at a later time through
> the exposed devcoredump virtual device.
> 
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_guc_ct.c       | 132 +++++++++++++++++++++++----
>  drivers/gpu/drm/xe/xe_guc_ct.h       |   7 +-
>  drivers/gpu/drm/xe/xe_guc_ct_types.h |  26 ++++++
>  3 files changed, 145 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index e16e5fe37ed4..0b7b95dbd9be 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -1095,31 +1095,26 @@ static void g2h_worker_func(struct work_struct *w)
>  	xe_device_mem_access_put(ct_to_xe(ct));
>  }
>  
> -static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
> -			     struct drm_printer *p)
> +static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
> +				     struct guc_ctb_snapshot *snapshot)
>  {
>  	u32 head, tail;
>  
> -	drm_printf(p, "\tsize: %d\n", ctb->info.size);
> -	drm_printf(p, "\tresv_space: %d\n", ctb->info.resv_space);
> -	drm_printf(p, "\thead: %d\n", ctb->info.head);
> -	drm_printf(p, "\ttail: %d\n", ctb->info.tail);
> -	drm_printf(p, "\tspace: %d\n", ctb->info.space);
> -	drm_printf(p, "\tbroken: %d\n", ctb->info.broken);
> +	snapshot->cmds = kmalloc_array(ctb->info.size, sizeof(u32), GFP_ATOMIC);

So since this GFP_ATOMIC I assume this so we can call this code from the
TDR or a CTB handler (dma fence signaling paths)? Also I don't see
where you check for this allocation failing.

>  
> -	head = desc_read(xe, ctb, head);
> -	tail = desc_read(xe, ctb, tail);
> -	drm_printf(p, "\thead (memory): %d\n", head);
> -	drm_printf(p, "\ttail (memory): %d\n", tail);
> -	drm_printf(p, "\tstatus (memory): 0x%x\n", desc_read(xe, ctb, status));
> +	xe_map_memcpy_from(xe, &snapshot->desc, &ctb->desc, 0,
> +			   sizeof(struct guc_ct_buffer_desc));
> +	memcpy(&snapshot->info, &ctb->info, sizeof(struct guc_ctb_info));
> +
> +	head = snapshot->desc.head;
> +	tail = snapshot->desc.tail;
>  
>  	if (head != tail) {
>  		struct iosys_map map =
>  			IOSYS_MAP_INIT_OFFSET(&ctb->cmds, head * sizeof(u32));
>  
>  		while (head != tail) {
> -			drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
> -				   xe_map_rd(xe, &map, 0, u32));
> +			snapshot->cmds[head] = xe_map_rd(xe, &map, 0, u32);
>  			++head;
>  			if (head == ctb->info.size) {
>  				head = 0;
> @@ -1131,20 +1126,119 @@ static void guc_ct_ctb_print(struct xe_device *xe, struct guc_ctb *ctb,
>  	}
>  }
>  
> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
> +static void guc_ctb_snapshot_print(struct guc_ctb_snapshot *snapshot,
> +				   struct drm_printer *p)
> +{
> +	u32 head, tail;
> +
> +	drm_printf(p, "\tsize: %d\n", snapshot->info.size);
> +	drm_printf(p, "\tresv_space: %d\n", snapshot->info.space);
> +	drm_printf(p, "\thead: %d\n", snapshot->info.head);
> +	drm_printf(p, "\ttail: %d\n", snapshot->info.tail);
> +	drm_printf(p, "\tspace: %d\n", snapshot->info.space);
> +	drm_printf(p, "\tbroken: %d\n", snapshot->info.broken);
> +	drm_printf(p, "\thead (memory): %d\n", snapshot->desc.head);
> +	drm_printf(p, "\ttail (memory): %d\n", snapshot->desc.tail);
> +	drm_printf(p, "\tstatus (memory): 0x%x\n", snapshot->desc.status);
> +
> +	head = snapshot->desc.head;
> +	tail = snapshot->desc.tail;
> +
> +	while (head != tail) {
> +		drm_printf(p, "\tcmd[%d]: 0x%08x\n", head,
> +			   snapshot->cmds[head]);
> +		++head;
> +		if (head == snapshot->info.size)
> +			head = 0;
> +	}
> +}
> +
> +static void guc_ctb_snapshot_free(struct guc_ctb_snapshot *snapshot)
> +{
> +	kfree(snapshot->cmds);
> +}
> +
> +/**
> + * xe_guc_ct_snapshot_capture - Take a quick snapshot of the CT state.
> + * @ct: GuC CT object.
> + *
> + * This can be printed out in a later stage like during dev_coredump
> + * analysis.
> + *
> + * Returns: a GuC CT snapshot object that must be freed by the caller
> + * 	    by using `xe_guc_ct_snapshot_free`.
> + */
> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct)
>  {
> +	struct xe_guc_ct_snapshot *snapshot;
> +
> +	snapshot = kzalloc(sizeof(struct xe_guc_ct_snapshot), GFP_ATOMIC);
> +

Same here, need to check for an alloc failure.

Also maybe we should a flag to switch between GFP_ATOMIC (signaling
path) and GFP_KERNEL (debugfs). In the case above, CMDs might be huge as
deadlock workaround (like 16 MBs or something) so atomic seems risky.

Aside from these comments, I do rather like what you have done here.

Matt

>  	if (ct->enabled) {
> +		snapshot->ct_enabled = true;
> +		guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.h2g,
> +					 &snapshot->h2g);
> +		guc_ctb_snapshot_capture(ct_to_xe(ct), &ct->ctbs.g2h,
> +					 &snapshot->g2h);
> +	}
> +
> +	return snapshot;
> +}
> +
> +/**
> + * xe_guc_ct_snapshot_print - Print out a given GuC CT snapshot.
> + * @snapshot: GuC CT snapshot object.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function prints out a given GuC CT snapshot object.
> + */
> +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
> +			      struct drm_printer *p)
> +{
> +	if (snapshot->ct_enabled) {
>  		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
> -		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.h2g, p);
> +		guc_ctb_snapshot_print(&snapshot->h2g, p);
>  
>  		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
> -		guc_ct_ctb_print(ct_to_xe(ct), &ct->ctbs.g2h, p);
> -		drm_printf(p, "\tg2h outstanding: %d\n", ct->g2h_outstanding);
> +		guc_ctb_snapshot_print(&snapshot->g2h, p);
> +
> +		drm_printf(p, "\tg2h outstanding: %d\n",
> +			   snapshot->g2h_outstanding);
>  	} else {
>  		drm_puts(p, "\nCT disabled\n");
>  	}
>  }
>  
> +/**
> + * xe_guc_ct_snapshot_free - Free all allocated objects for a given snapshot.
> + * @snapshot: GuC CT snapshot object.
> + *
> + * This function free all the memory that needed to be allocated at capture
> + * time.
> + */
> +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot)
> +{
> +	guc_ctb_snapshot_free(&snapshot->h2g);
> +	guc_ctb_snapshot_free(&snapshot->g2h);
> +	kfree(snapshot);
> +}
> +
> +/**
> + * xe_guc_ct_print - GuC CT Print.
> + * @ct: GuC CT.
> + * @p: drm_printer where it will be printed out.
> + *
> + * This function quickly capture a snapshot and immediately print it out.
> + */
> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p)
> +{
> +	struct xe_guc_ct_snapshot *snapshot;
> +
> +	snapshot = xe_guc_ct_snapshot_capture(ct);
> +	xe_guc_ct_snapshot_print(snapshot, p);
> +	xe_guc_ct_snapshot_free(snapshot);
> +}
> +
>  #ifdef XE_GUC_CT_SELFTEST
>  /*
>   * Disable G2H processing in IRQ handler to force xe_guc_ct_send to enter flow
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
> index 49fb74f91e4d..29e0dff7ad9b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
> @@ -13,9 +13,14 @@ struct drm_printer;
>  int xe_guc_ct_init(struct xe_guc_ct *ct);
>  int xe_guc_ct_enable(struct xe_guc_ct *ct);
>  void xe_guc_ct_disable(struct xe_guc_ct *ct);
> -void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
>  void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
>  
> +struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
> +void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
> +			      struct drm_printer *p);
> +void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
> +void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p);
> +
>  static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
>  {
>  	wake_up_all(&ct->wq);
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> index 64e3dd14d4b2..93046d95b009 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
> @@ -48,6 +48,32 @@ struct guc_ctb {
>  	struct guc_ctb_info info;
>  };
>  
> +/**
> + * struct guc_ctb_snapshot - GuC command transport buffer (CTB) snapshot
> + */
> +struct guc_ctb_snapshot {
> +	/** @desc: snapshot of the CTB descriptor */
> +	struct guc_ct_buffer_desc desc;
> +	/** @cmds: snapshot of the CTB commands */
> +	u32 *cmds;
> +	/** @info: snapshot of the CTB info */
> +	struct guc_ctb_info info;
> +};
> +
> +/**
> + * struct xe_guc_ct_snapshot - GuC command transport (CT) snapshot
> + */
> +struct xe_guc_ct_snapshot {
> +	/** @ct_enabled: CT enabled info at capture time. */
> +	bool ct_enabled;
> +	/** @g2h_outstanding: G2H outstanding info at the capture time */
> +	u32 g2h_outstanding;
> +	/** @g2h: G2H CTB snapshot */
> +	struct guc_ctb_snapshot g2h;
> +	/** @h2g: H2G CTB snapshot */
> +	struct guc_ctb_snapshot h2g;
> +};
> +
>  /**
>   * struct xe_guc_ct - GuC command transport (CT) layer
>   *
> -- 
> 2.39.2
> 

  reply	other threads:[~2023-05-02  5:27 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-26 20:56 [Intel-xe] [PATCH 00/14] Introduce xe_devcoredump Rodrigo Vivi
2023-04-26 20:56 ` Rodrigo Vivi
2023-04-26 20:57 ` [Intel-xe] [PATCH 01/14] drm/xe: Fix print of RING_EXECLIST_SQ_CONTENTS_HI Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-04-26 21:40   ` [Intel-xe] " Lucas De Marchi
2023-04-26 21:59     ` Rodrigo Vivi
2023-04-26 20:57 ` [Intel-xe] [PATCH 02/14] drm/xe: Introduce the dev_coredump infrastructure Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-04-27  8:28   ` [Intel-xe] " Thomas Hellström
2023-05-02  7:57     ` Matthew Brost
2023-05-02 18:06       ` Rodrigo Vivi
2023-05-02 18:06         ` Rodrigo Vivi
2023-05-02 20:29         ` Matthew Brost
2023-05-02 20:29           ` Matthew Brost
2023-05-02  7:55   ` Jani Nikula
2023-05-02 17:25     ` Rodrigo Vivi
2023-04-26 20:57 ` [Intel-xe] [PATCH 03/14] drm/xe: Do not take any action if our device was removed Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:40   ` [Intel-xe] " Matthew Brost
2023-05-02 17:21     ` Rodrigo Vivi
2023-05-02 23:06       ` Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 04/14] drm/xe: Extract non mapped regions out of GuC CTB into its own struct Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02  5:12   ` [Intel-xe] " Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 05/14] drm/xe: Convert GuC CT print to snapshot capture and print Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02  5:27   ` Matthew Brost [this message]
2023-04-26 20:57 ` [Intel-xe] [PATCH 06/14] drm/xe: Add GuC CT snapshot to xe_devcoredump Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 14:55   ` [Intel-xe] " Matthew Brost
2023-05-02 14:55     ` Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 07/14] drm/xe: Introduce guc_submit_types.h with relevant structs Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02  7:44   ` [Intel-xe] " Matthew Brost
2023-05-02  7:44     ` Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 08/14] drm/xe: Convert GuC Engine print to snapshot capture and print Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:01   ` [Intel-xe] " Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 09/14] drm/xe: Add GuC Submit Engine snapshot to xe_devcoredump Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:03   ` [Intel-xe] " Matthew Brost
2023-05-02 15:03     ` Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 10/14] drm/xe: Convert Xe HW Engine print to snapshot capture and print Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:20   ` [Intel-xe] " Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 11/14] drm/xe: Add HW Engine snapshot to xe_devcoredump Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:30   ` [Intel-xe] " Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 12/14] drm/xe: Limit CONFIG_DRM_XE_SIMPLE_ERROR_CAPTURE to itself Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:35   ` [Intel-xe] " Matthew Brost
2023-05-02 15:35     ` Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 13/14] drm/xe: Convert VM print to snapshot capture and print Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02  7:50   ` [Intel-xe] " Matthew Brost
2023-05-02  8:07   ` Matthew Brost
2023-04-26 20:57 ` [Intel-xe] [PATCH 14/14] drm/xe: Add VM snapshot to xe_devcoredump Rodrigo Vivi
2023-04-26 20:57   ` Rodrigo Vivi
2023-05-02 15:38   ` [Intel-xe] " Matthew Brost
2023-04-26 21:01 ` [Intel-xe] ✓ CI.Patch_applied: success for Introduce xe_devcoredump Patchwork
2023-04-26 21:02 ` [Intel-xe] ✓ CI.KUnit: " Patchwork
2023-04-26 21:06 ` [Intel-xe] ✓ CI.Build: " Patchwork
2023-04-26 21:29 ` [Intel-xe] ○ CI.BAT: info " Patchwork
2023-05-02  8:11 ` [Intel-xe] [PATCH 00/14] " Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZFCfLB9bPSLACRsO@DUT025-TGLU.fm.intel.com \
    --to=matthew.brost@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.