From: "Bhardwaj, Rajneesh" <rajneesh.bhardwaj@amd.com>
To: Lijo Lazar <lijo.lazar@amd.com>, amd-gfx@lists.freedesktop.org
Cc: Hawking.Zhang@amd.com, Alexander.Deucher@amd.com,
Christian.Koenig@amd.com, feifxu@amd.com
Subject: Re: [PATCH v2 01/10] drm/amdgpu: Add init levels
Date: Wed, 11 Sep 2024 22:09:30 -0400 [thread overview]
Message-ID: <ad24e421-c24c-409d-9823-e128e2fde7ad@amd.com> (raw)
In-Reply-To: <20240911065858.2224424-2-lijo.lazar@amd.com>
The series is
Acked-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
On 9/11/2024 2:58 AM, Lijo Lazar wrote:
> Add init levels to define the level to which device needs to be
> initialized.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
>
> v2:
> Add comments describing init levels
> Drop unnecessary assignment
> Rename AMDGPU_INIT_LEVEL_MINIMAL to AMDGPU_INIT_LEVEL_MINIMAL_XGMI
>
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 22 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 66 ++++++++++++++++++++++
> 2 files changed, 88 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6e6580ab7e04..d8299383af11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -820,6 +820,24 @@ struct amdgpu_mqd {
> struct amdgpu_mqd_prop *p);
> };
>
> +/*
> + * Custom Init levels could be defined for different situations where a full
> + * initialization of all hardware blocks are not expected. Sample cases are
> + * custom init sequences after resume after S0i3/S3, reset on initialization,
> + * partial reset of blocks etc. Presently, this defines only two levels. Levels
> + * are described in corresponding struct definitions - amdgpu_init_default,
> + * amdgpu_init_minimal_xgmi.
> + */
> +enum amdgpu_init_lvl_id {
> + AMDGPU_INIT_LEVEL_DEFAULT,
> + AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
> +};
> +
> +struct amdgpu_init_level {
> + enum amdgpu_init_lvl_id level;
> + uint32_t hwini_ip_block_mask;
> +};
> +
> #define AMDGPU_RESET_MAGIC_NUM 64
> #define AMDGPU_MAX_DF_PERFMONS 4
> struct amdgpu_reset_domain;
> @@ -1169,6 +1187,8 @@ struct amdgpu_device {
> bool enforce_isolation[MAX_XCP];
> /* Added this mutex for cleaner shader isolation between GFX and compute processes */
> struct mutex enforce_isolation_mutex;
> +
> + struct amdgpu_init_level *init_lvl;
> };
>
> static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> @@ -1623,4 +1643,6 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group;
> extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
> extern const struct attribute_group amdgpu_flash_attr_group;
>
> +void amdgpu_set_init_level(struct amdgpu_device *adev,
> + enum amdgpu_init_lvl_id lvl);
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 61a189e30bcd..2ecc70f220d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -144,6 +144,50 @@ const char *amdgpu_asic_name[] = {
> "LAST",
> };
>
> +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
> +/*
> + * Default init level where all blocks are expected to be initialized. This is
> + * the level of initialization expected by default and also after a full reset
> + * of the device.
> + */
> +struct amdgpu_init_level amdgpu_init_default = {
> + .level = AMDGPU_INIT_LEVEL_DEFAULT,
> + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
> +};
> +
> +/*
> + * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
> + * is used for cases like reset on initialization where the entire hive needs to
> + * be reset before first use.
> + */
> +struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
> + .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
> + .hwini_ip_block_mask =
> + BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
> + BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH)
> +};
> +
> +static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
> + enum amd_ip_block_type block)
> +{
> + return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
> +}
> +
> +void amdgpu_set_init_level(struct amdgpu_device *adev,
> + enum amdgpu_init_lvl_id lvl)
> +{
> + switch (lvl) {
> + case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
> + adev->init_lvl = &amdgpu_init_minimal_xgmi;
> + break;
> + case AMDGPU_INIT_LEVEL_DEFAULT:
> + fallthrough;
> + default:
> + adev->init_lvl = &amdgpu_init_default;
> + break;
> + }
> +}
> +
> static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
>
> /**
> @@ -2633,6 +2677,9 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
> continue;
> if (adev->ip_blocks[i].status.hw)
> continue;
> + if (!amdgpu_ip_member_of_hwini(
> + adev, adev->ip_blocks[i].version->type))
> + continue;
> if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
> (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
> adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
> @@ -2658,6 +2705,9 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
> continue;
> if (adev->ip_blocks[i].status.hw)
> continue;
> + if (!amdgpu_ip_member_of_hwini(
> + adev, adev->ip_blocks[i].version->type))
> + continue;
> r = adev->ip_blocks[i].version->funcs->hw_init(adev);
> if (r) {
> DRM_ERROR("hw_init of IP block <%s> failed %d\n",
> @@ -2681,6 +2731,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
> if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
> continue;
>
> + if (!amdgpu_ip_member_of_hwini(adev,
> + AMD_IP_BLOCK_TYPE_PSP))
> + break;
> +
> if (!adev->ip_blocks[i].status.sw)
> continue;
>
> @@ -2803,6 +2857,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> }
> adev->ip_blocks[i].status.sw = true;
>
> + if (!amdgpu_ip_member_of_hwini(
> + adev, adev->ip_blocks[i].version->type))
> + continue;
> +
> if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
> /* need to do common hw init early so everything is set up for gmc */
> r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
> @@ -4196,6 +4254,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>
> amdgpu_device_set_mcbp(adev);
>
> + /*
> + * By default, use default mode where all blocks are expected to be
> + * initialized. At present a 'swinit' of blocks is required to be
> + * completed before the need for a different level is detected.
> + */
> + amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
> /* early init functions */
> r = amdgpu_device_ip_early_init(adev);
> if (r)
> @@ -5473,6 +5537,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
> }
>
> list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
> + /* After reset, it's default init level */
> + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
> if (need_full_reset) {
> /* post card */
> amdgpu_ras_set_fed(tmp_adev, false);
next prev parent reply other threads:[~2024-09-12 2:09 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-11 6:58 [PATCH v2 00/10] Support XGMI reset on init Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 01/10] drm/amdgpu: Add init levels Lijo Lazar
2024-09-12 2:09 ` Bhardwaj, Rajneesh [this message]
2024-09-11 6:58 ` [PATCH v2 02/10] drm/amdgpu: Use init level for pending_reset flag Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 03/10] drm/amdgpu: Separate reinitialization after reset Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 04/10] drm/amdgpu: Add reset on init handler for XGMI Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 05/10] drm/amdgpu: Add helper to initialize badpage info Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 06/10] drm/amdgpu: Refactor XGMI reset on init handling Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 07/10] drm/amdgpu: Drop delayed reset work handler Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 08/10] drm/amdgpu: Support reset-on-init on select SOCs Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 09/10] drm/amdgpu: Add interface for TOS reload cases Lijo Lazar
2024-09-11 6:58 ` [PATCH v2 10/10] drm/amdgpu: Add PSP reload case to reset-on-init Lijo Lazar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ad24e421-c24c-409d-9823-e128e2fde7ad@amd.com \
--to=rajneesh.bhardwaj@amd.com \
--cc=Alexander.Deucher@amd.com \
--cc=Christian.Koenig@amd.com \
--cc=Hawking.Zhang@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=feifxu@amd.com \
--cc=lijo.lazar@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.