AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <christian.koenig@amd.com>
To: "Jesse.zhang@amd.com" <jesse.zhang@amd.com>,
	amd-gfx@lists.freedesktop.org
Cc: Alexander.Deucher@amd.com
Subject: Re: [PATCH 1/5] drm/amdgpu: Add sysfs interface for gc reset mask
Date: Tue, 22 Oct 2024 11:52:28 +0200	[thread overview]
Message-ID: <fa32e594-cfed-4854-b271-60db35c47bb9@amd.com> (raw)
In-Reply-To: <20241022075909.2530386-1-jesse.zhang@amd.com>

Am 22.10.24 um 09:59 schrieb Jesse.zhang@amd.com:
> Add two sysfs interfaces for gfx and compute:
> gfx_reset_mask
> compute_reset_mask
>
> These interfaces are read-only and show the resets supported by the IP.
> For example, full adapter reset (mode1/mode2/BACO/etc),
> soft reset, queue reset, and pipe reset.
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
> Suggested-by:Alex Deucher <alexander.deucher@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   6 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 122 ++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |   2 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  |   6 ++
>   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  |   5 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  |   5 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |   5 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c |   5 +
>   8 files changed, 156 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 48c9b9b06905..0dd475c30267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -300,6 +300,12 @@ extern int amdgpu_wbrf;
>   #define AMDGPU_RESET_VCE			(1 << 13)
>   #define AMDGPU_RESET_VCE1			(1 << 14)
>   
> +/* reset mask */
> +#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */
> +#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */
> +#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */
> +#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */

The general approach looks good to me, but I would really prefer that 
the sysfs node returns a text string instead of some flags. E.g. 
"soft,queue,pipe,full" etc...

That's easier to extend should we at some point get something more 
complex and we don't need any specialized knowledge to decode it.

Regards,
Christian.

> +
>   /* max cursor sizes (in pixels) */
>   #define CIK_CURSOR_WIDTH 128
>   #define CIK_CURSOR_HEIGHT 128
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index e96984c53e72..b4706355ece8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -1588,6 +1588,94 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
>   	return count;
>   }
>   
> +static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
> +						struct device_attribute *attr,
> +						char *buf)
> +{
> +	struct drm_device *ddev = dev_get_drvdata(dev);
> +	struct amdgpu_device *adev = drm_to_adev(ddev);
> +	ssize_t size = 0;
> +	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
> +
> +	if (!adev || !ring)
> +		return -ENODEV;
> +
> +	if (amdgpu_device_should_recover_gpu(adev))
> +		size |= AMDGPU_RESET_TYPE_FULL;
> +
> +	if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery)
> +			&& !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery)
> +		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
> +
> +	if (amdgpu_gpu_recovery && ring->funcs->reset) {
> +                switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
> +                case IP_VERSION(9, 2, 2): //reven2
> +                case IP_VERSION(9, 3, 0): //renior
> +                case IP_VERSION(9, 4, 0): //vega20
> +                case IP_VERSION(10, 1, 0): //navi10
> +                case IP_VERSION(10, 1, 1): //navi12
> +                case IP_VERSION(10, 1, 2): //navi13
> +                        /* Skip flag setting because some cases
> +                         * are not supported by current firmware.
> +                         */
> +                        break;
> +
> +                default:
> +                        size |= AMDGPU_RESET_TYPE_PER_QUEUE;
> +                        break;
> +		}
> +        }
> +
> +	size = sysfs_emit_at(buf, 0, "%lu\n", size);
> +	return size;
> +}
> +
> +static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
> +						struct device_attribute *attr,
> +						char *buf)
> +{
> +	struct drm_device *ddev = dev_get_drvdata(dev);
> +	struct amdgpu_device *adev = drm_to_adev(ddev);
> +	ssize_t size = 0;
> +	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
> +
> +	if (!adev || !ring)
> +		return -ENODEV;
> +
> +	if (amdgpu_device_should_recover_gpu(adev))
> +		size |= AMDGPU_RESET_TYPE_FULL;
> +
> +	if (amdgpu_gpu_recovery && unlikely(!adev->debug_disable_soft_recovery)
> +			&& !amdgpu_sriov_vf(adev) && ring->funcs->soft_recovery)
> +		size |= AMDGPU_RESET_TYPE_SOFT_RESET;
> +
> +	if (amdgpu_gpu_recovery && ring->funcs->reset) {
> +                switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
> +                case IP_VERSION(9, 2, 2): //reven2
> +                case IP_VERSION(9, 3, 0): //renior
> +                case IP_VERSION(9, 4, 0): //vega20
> +                case IP_VERSION(10, 1, 0): //navi10
> +                case IP_VERSION(10, 1, 1): //navi12
> +                case IP_VERSION(10, 1, 2): //navi13
> +                        /* Skip flag setting because some test cases
> +                         * are not supported by current firmware.
> +                         */
> +                        break;
> +
> +                default:
> +                        size |= AMDGPU_RESET_TYPE_PER_QUEUE;
> +                        break;
> +		}
> +        }
> +
> +	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
> +			adev->gfx.mec_fw_version >= 0x0000009b)
> +		size |= AMDGPU_RESET_TYPE_PER_PIPE;
> +
> +	size = sysfs_emit_at(buf, 0, "%lu\n", size);
> +	return size;
> +}
> +
>   static DEVICE_ATTR(run_cleaner_shader, 0200,
>   		   NULL, amdgpu_gfx_set_run_cleaner_shader);
>   
> @@ -1602,6 +1690,12 @@ static DEVICE_ATTR(current_compute_partition, 0644,
>   static DEVICE_ATTR(available_compute_partition, 0444,
>   		   amdgpu_gfx_get_available_compute_partition, NULL);
>   
> +static DEVICE_ATTR(gfx_reset_mask, 0444,
> +		   amdgpu_gfx_get_gfx_reset_mask, NULL);
> +
> +static DEVICE_ATTR(compute_reset_mask, 0444,
> +		   amdgpu_gfx_get_compute_reset_mask, NULL);
> +
>   int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
>   {
>   	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
> @@ -1702,6 +1796,34 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
>   			    cleaner_shader_size);
>   }
>   
> +int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
> +{
> +	int r = 0;
> +
> +	if (adev->gfx.num_gfx_rings) {
> +		r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
> +		if (r)
> +			return r;
> +	}
> +
> +	if (adev->gfx.num_compute_rings) {
> +		r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
> +		if (r)
> +			return r;
> +	}
> +
> +	return r;
> +}
> +
> +void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
> +{
> +	if (adev->gfx.num_gfx_rings)
> +		device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
> +
> +	if (adev->gfx.num_compute_rings)
> +		device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
> +}
> +
>   /**
>    * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
>    * @adev: amdgpu_device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index f710178a21bc..0cf2151b3cf4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -582,6 +582,8 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev);
>   void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
>   void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
>   void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
> +int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev);
> +void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev);
>   
>   static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 9da95b25e158..2baa76095232 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4856,6 +4856,11 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
>   	if (r)
>   		return r;
> +
> +	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return 0;
>   }
>   
> @@ -4908,6 +4913,7 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   
>   	gfx_v10_0_free_microcode(adev);
>   	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> +	amdgpu_gfx_sysfs_reset_mask_fini(adev);
>   
>   	kfree(adev->gfx.ip_dump_core);
>   	kfree(adev->gfx.ip_dump_compute_queues);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 5aff8f72de9c..32d14b9cc6e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -1721,6 +1721,10 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	if (r)
>   		return r;
>   
> +	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return 0;
>   }
>   
> @@ -1783,6 +1787,7 @@ static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	gfx_v11_0_free_microcode(adev);
>   
>   	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> +	amdgpu_gfx_sysfs_reset_mask_fini(adev);
>   
>   	kfree(adev->gfx.ip_dump_core);
>   	kfree(adev->gfx.ip_dump_compute_queues);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 9fec28d8a5fc..925b7ca49b2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -1470,6 +1470,10 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	if (r)
>   		return r;
>   
> +	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return 0;
>   }
>   
> @@ -1530,6 +1534,7 @@ static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	gfx_v12_0_free_microcode(adev);
>   
>   	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> +	amdgpu_gfx_sysfs_reset_mask_fini(adev);
>   
>   	kfree(adev->gfx.ip_dump_core);
>   	kfree(adev->gfx.ip_dump_compute_queues);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index b4c4b9916289..0de199c1cfdd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2394,6 +2394,10 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
>   	if (r)
>   		return r;
>   
> +	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return 0;
>   }
>   
> @@ -2432,6 +2436,7 @@ static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
>   	gfx_v9_0_free_microcode(adev);
>   
>   	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> +	amdgpu_gfx_sysfs_reset_mask_fini(adev);
>   
>   	kfree(adev->gfx.ip_dump_core);
>   	kfree(adev->gfx.ip_dump_compute_queues);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 016290f00592..87cfd77e2fb4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -1175,6 +1175,10 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
>   	if (r)
>   		return r;
>   
> +	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
> +	if (r)
> +		return r;
> +
>   	return 0;
>   }
>   
> @@ -1200,6 +1204,7 @@ static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block)
>   	gfx_v9_4_3_free_microcode(adev);
>   	amdgpu_gfx_sysfs_fini(adev);
>   	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
> +	amdgpu_gfx_sysfs_reset_mask_fini(adev);
>   
>   	kfree(adev->gfx.ip_dump_core);
>   	kfree(adev->gfx.ip_dump_compute_queues);


      parent reply	other threads:[~2024-10-22  9:52 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-22  7:59 [PATCH 1/5] drm/amdgpu: Add sysfs interface for gc reset mask Jesse.zhang@amd.com
2024-10-22  7:59 ` [PATCH 2/5] drm/amdgpu: Add sysfs interface for sdma " Jesse.zhang@amd.com
2024-10-22  7:59 ` [PATCH 3/5] drm/amdgpu: Add sysfs interface for vcn " Jesse.zhang@amd.com
2024-10-22  7:59 ` [PATCH 4/5] drm/amdgpu: Add sysfs interface for vpe " Jesse.zhang@amd.com
2024-10-22  7:59 ` [PATCH 5/5] drm/amdgpu: Add sysfs interface for jpeg " Jesse.zhang@amd.com
2024-10-22  9:52 ` Christian König [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fa32e594-cfed-4854-b271-60db35c47bb9@amd.com \
    --to=christian.koenig@amd.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=jesse.zhang@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox