AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Felix Kuehling <felix.kuehling@amd.com>
To: Jonathan Kim <jonathan.kim@amd.com>,
	amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: Jinhuieric.Huang@amd.com
Subject: Re: [PATCH 13/33] drm/amdkfd: prepare map process for single process debug devices
Date: Tue, 30 May 2023 15:36:49 -0400	[thread overview]
Message-ID: <2105ee65-003b-73db-9f7b-f13059e2236e@amd.com> (raw)
In-Reply-To: <20230525172745.702700-13-jonathan.kim@amd.com>

Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> Older HW only supports debugging on a single process because the
> SPI debug mode setting registers are device global.
>
> The HWS has supplied a single pinned VMID (0xf) for MAP_PROCESS
> for debug purposes. To pin the VMID, the KFD will remove the VMID from
> the HWS dynamic VMID allocation via SET_RESOUCES so that a debugged
> process will never migrate away from its pinned VMID.
>
> The KFD is responsible for reserving and releasing this pinned VMID
> accordingly whenever the debugger attaches and detaches respectively.
>
> v2: spot fix ups using new kfd_node references
>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 93 +++++++++++++++++++
>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  5 +
>   .../drm/amd/amdkfd/kfd_packet_manager_v9.c    |  9 ++
>   .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   |  5 +-
>   4 files changed, 111 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d1f44feb7084..c8519adc89ac 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -1524,6 +1524,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
>   	dqm->gws_queue_count = 0;
>   	dqm->active_runlist = false;
>   	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
> +	dqm->trap_debug_vmid = 0;
>   
>   	init_sdma_bitmaps(dqm);
>   
> @@ -2500,6 +2501,98 @@ static void kfd_process_hw_exception(struct work_struct *work)
>   	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
>   }
>   
> +int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
> +				struct qcm_process_device *qpd)
> +{
> +	int r;
> +	int updated_vmid_mask;
> +
> +	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> +		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> +		return -EINVAL;
> +	}
> +
> +	dqm_lock(dqm);
> +
> +	if (dqm->trap_debug_vmid != 0) {
> +		pr_err("Trap debug id already reserved\n");
> +		r = -EBUSY;
> +		goto out_unlock;
> +	}
> +
> +	r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
> +			USE_DEFAULT_GRACE_PERIOD, false);
> +	if (r)
> +		goto out_unlock;
> +
> +	updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
> +	updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
> +
> +	dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
> +	dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
> +	r = set_sched_resources(dqm);
> +	if (r)
> +		goto out_unlock;
> +
> +	r = map_queues_cpsch(dqm);
> +	if (r)
> +		goto out_unlock;
> +
> +	pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
> +
> +out_unlock:
> +	dqm_unlock(dqm);
> +	return r;
> +}
> +
> +/*
> + * Releases vmid for the trap debugger
> + */
> +int release_debug_trap_vmid(struct device_queue_manager *dqm,
> +			struct qcm_process_device *qpd)
> +{
> +	int r;
> +	int updated_vmid_mask;
> +	uint32_t trap_debug_vmid;
> +
> +	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> +		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
> +		return -EINVAL;
> +	}
> +
> +	dqm_lock(dqm);
> +	trap_debug_vmid = dqm->trap_debug_vmid;
> +	if (dqm->trap_debug_vmid == 0) {
> +		pr_err("Trap debug id is not reserved\n");
> +		r = -EINVAL;
> +		goto out_unlock;
> +	}
> +
> +	r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
> +			USE_DEFAULT_GRACE_PERIOD, false);
> +	if (r)
> +		goto out_unlock;
> +
> +	updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
> +	updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
> +
> +	dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
> +	dqm->trap_debug_vmid = 0;
> +	r = set_sched_resources(dqm);
> +	if (r)
> +		goto out_unlock;
> +
> +	r = map_queues_cpsch(dqm);
> +	if (r)
> +		goto out_unlock;
> +
> +	pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid);
> +
> +out_unlock:
> +	dqm_unlock(dqm);
> +	return r;
> +}
> +
>   #if defined(CONFIG_DEBUG_FS)
>   
>   static void seq_reg_dump(struct seq_file *m,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index d4dd3b4acbf0..bf7aa3f84182 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -250,6 +250,7 @@ struct device_queue_manager {
>   	struct kfd_mem_obj	*fence_mem;
>   	bool			active_runlist;
>   	int			sched_policy;
> +	uint32_t		trap_debug_vmid;
>   
>   	/* hw exception  */
>   	bool			is_hws_hang;
> @@ -285,6 +286,10 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
>   unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
>   unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
>   unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
> +int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
> +			struct qcm_process_device *qpd);
> +int release_debug_trap_vmid(struct device_queue_manager *dqm,
> +			struct qcm_process_device *qpd);
>   
>   static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
>   {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index 1fda6dcf84b1..0fe73dbd28af 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -34,6 +34,9 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   {
>   	struct pm4_mes_map_process *packet;
>   	uint64_t vm_page_table_base_addr = qpd->page_table_base;
> +	struct kfd_node *kfd = pm->dqm->dev;
> +	struct kfd_process_device *pdd =
> +			container_of(qpd, struct kfd_process_device, qpd);
>   
>   	packet = (struct pm4_mes_map_process *)buffer;
>   	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
> @@ -49,6 +52,12 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   	packet->bitfields14.sdma_enable = 1;
>   	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
>   
> +	if (kfd->dqm->trap_debug_vmid && pdd->process->debug_trap_enabled &&
> +			pdd->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
> +		packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid;
> +		packet->bitfields2.new_debug = 1;
> +	}
> +
>   	packet->sh_mem_config = qpd->sh_mem_config;
>   	packet->sh_mem_bases = qpd->sh_mem_bases;
>   	if (qpd->tba_addr) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
> index 206f1960857f..8b6b2bd5c148 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
> @@ -146,7 +146,10 @@ struct pm4_mes_map_process {
>   	union {
>   		struct {
>   			uint32_t pasid:16;
> -			uint32_t reserved1:8;
> +			uint32_t reserved1:2;
> +			uint32_t debug_vmid:4;
> +			uint32_t new_debug:1;
> +			uint32_t reserved2:1;
>   			uint32_t diq_enable:1;
>   			uint32_t process_quantum:7;
>   		} bitfields2;

  reply	other threads:[~2023-05-30 19:36 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-25 17:27 [PATCH 01/33] drm/amdkfd: add debug and runtime enable interface Jonathan Kim
2023-05-25 17:27 ` [PATCH 02/33] drm/amdkfd: display debug capabilities Jonathan Kim
2023-05-30 19:20   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 03/33] drm/amdkfd: prepare per-process debug enable and disable Jonathan Kim
2023-05-25 17:27 ` [PATCH 04/33] drm/amdgpu: add kgd hw debug mode setting interface Jonathan Kim
2023-05-25 17:27 ` [PATCH 05/33] drm/amdgpu: setup hw debug registers on driver initialization Jonathan Kim
2023-05-30 19:23   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 06/33] drm/amdgpu: add gfx9 hw debug mode enable and disable calls Jonathan Kim
2023-05-25 17:27 ` [PATCH 07/33] drm/amdgpu: add gfx9.4.1 " Jonathan Kim
2023-05-25 17:27 ` [PATCH 08/33] drm/amdkfd: fix kfd_suspend_all_processes Jonathan Kim
2023-05-25 17:27 ` [PATCH 09/33] drm/amdgpu: add gfx10 hw debug mode enable and disable calls Jonathan Kim
2023-05-25 17:27 ` [PATCH 10/33] drm/amdgpu: add gfx9.4.2 " Jonathan Kim
2023-05-25 17:27 ` [PATCH 11/33] drm/amdgpu: add gfx11 " Jonathan Kim
2023-05-25 17:27 ` [PATCH 12/33] drm/amdgpu: add configurable grace period for unmap queues Jonathan Kim
2023-05-30 19:28   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 13/33] drm/amdkfd: prepare map process for single process debug devices Jonathan Kim
2023-05-30 19:36   ` Felix Kuehling [this message]
2023-05-25 17:27 ` [PATCH 14/33] drm/amdgpu: prepare map process for multi-process " Jonathan Kim
2023-05-30 19:55   ` Felix Kuehling
2023-05-30 19:58     ` Kim, Jonathan
2023-05-25 17:27 ` [PATCH 15/33] drm/amdgpu: expose debug api for mes Jonathan Kim
2023-05-25 17:27 ` [PATCH 16/33] drm/amdkfd: add per process hw trap enable and disable functions Jonathan Kim
2023-05-30 20:04   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 17/33] drm/amdkfd: apply trap workaround for gfx11 Jonathan Kim
2023-05-25 17:27 ` [PATCH 18/33] drm/amdkfd: add raise exception event function Jonathan Kim
2023-05-30 20:07   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 19/33] drm/amdkfd: add send exception operation Jonathan Kim
2023-05-25 17:27 ` [PATCH 20/33] drm/amdkfd: add runtime enable operation Jonathan Kim
2023-05-30 20:11   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 21/33] drm/amdkfd: add debug trap enabled flag to tma Jonathan Kim
2023-05-25 17:27 ` [PATCH 22/33] drm/amdkfd: update process interrupt handling for debug events Jonathan Kim
2023-05-30 20:16   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 23/33] drm/amdkfd: add debug set exceptions enabled operation Jonathan Kim
2023-05-25 17:27 ` [PATCH 24/33] drm/amdkfd: add debug wave launch override operation Jonathan Kim
2023-05-30 20:21   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 25/33] drm/amdkfd: add debug wave launch mode operation Jonathan Kim
2023-05-30 20:22   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 26/33] drm/amdkfd: add debug suspend and resume process queues operation Jonathan Kim
2023-05-30 20:24   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 27/33] drm/amdkfd: add debug set and clear address watch points operation Jonathan Kim
2023-05-30 20:26   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 28/33] drm/amdkfd: add debug set flags operation Jonathan Kim
2023-05-30 20:30   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 29/33] drm/amdkfd: add debug query event operation Jonathan Kim
2023-05-25 17:27 ` [PATCH 30/33] drm/amdkfd: add debug query exception info operation Jonathan Kim
2023-05-25 17:27 ` [PATCH 31/33] drm/amdkfd: add debug queue snapshot operation Jonathan Kim
2023-05-25 17:27 ` [PATCH 32/33] drm/amdkfd: add debug device " Jonathan Kim
2023-05-30 20:31   ` Felix Kuehling
2023-05-25 17:27 ` [PATCH 33/33] drm/amdkfd: bump kfd ioctl minor version for debug api availability Jonathan Kim
2023-05-30 19:17 ` [PATCH 01/33] drm/amdkfd: add debug and runtime enable interface Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2105ee65-003b-73db-9f7b-f13059e2236e@amd.com \
    --to=felix.kuehling@amd.com \
    --cc=Jinhuieric.Huang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=jonathan.kim@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox