Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: vitaly prosyak <vprosyak@amd.com>
To: vitaly.prosyak@amd.com, igt-dev@lists.freedesktop.org
Cc: "Jesse.zhang@amd.com" <Jesse.zhang@amd.com>,
	Christian Koenig <christian.koenig@amd.com>,
	Alexander Deucher <alexander.deucher@amd.com>
Subject: Re: [PATCH 3/3] lib/amdpgu: fix the hard code when shedule ring.
Date: Sun, 5 Jan 2025 02:20:17 -0500	[thread overview]
Message-ID: <056e6ee5-3d1b-4097-b4e1-b2f7d815a712@amd.com> (raw)
In-Reply-To: <20250105071551.113830-3-vitaly.prosyak@amd.com>

I applied some code formatting, and the series of three changes now looks good to me.

Reviewed-by: Vitaly Prosyak <vitaly.prosyak@amd.com>

On 2025-01-05 02:15, vitaly.prosyak@amd.com wrote:
> From: "Jesse.zhang@amd.com" <Jesse.zhang@amd.com>
>
> Implementation of dynamically selected scheduling rings.
>
> Cc: Vitaly Prosyak <vitaly.prosyak@amd.com>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: Alexander Deucher <alexander.deucher@amd.com>
>
> v2: fix formatting(Vitaly)
>
> Suggest-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
> Signed-off-by: Jesse Zhang  <jesse.zhang@amd.com>
> Reviewed-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
> ---
>  lib/amdgpu/amd_deadlock_helpers.c | 43 ++++++++++++++++++-------------
>  lib/amdgpu/amd_deadlock_helpers.h |  8 +++---
>  tests/amdgpu/amd_deadlock.c       | 28 +++++++++++---------
>  3 files changed, 46 insertions(+), 33 deletions(-)
>
> diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
> index dabd7ae76..8ac6abf8f 100644
> --- a/lib/amdgpu/amd_deadlock_helpers.c
> +++ b/lib/amdgpu/amd_deadlock_helpers.c
> @@ -65,7 +65,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
>  	int job_count = 0;
>  	struct amdgpu_cmd_base *base_cmd = get_cmd_base();
>  
> -	if( priority == AMDGPU_CTX_PRIORITY_HIGH)
> +	if (priority == AMDGPU_CTX_PRIORITY_HIGH)
>  		r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &context_handle);
>  	else
>  		r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> @@ -173,7 +173,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
>  	free_cmd_base(base_cmd);
>  }
>  
> -void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type)
> +void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci)
>  {
>  	int r;
>  	FILE *fp;
> @@ -190,18 +190,21 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
>  		igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>  
>  	if (ip_type == AMD_IP_GFX)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_COMPUTE)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_DMA)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  
>  	snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
>  	r = access(sysfs, R_OK);
>  	if (!r) {
>  		fp = popen(cmd, "r");
>  		if (fp == NULL)
> -			igt_skip("read the sysfs failed: %s \n",sysfs);
> +			igt_skip("read the sysfs failed: %s\n", sysfs);
>  
>  		if (fgets(buffer, 128, fp) != NULL)
>  			sched_mask = strtol(buffer, NULL, 16);
> @@ -247,7 +250,7 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
>  
>  	/* recover the sched mask */
>  	if (sched_mask > 1) {
> -		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s",sched_mask, sysfs);
> +		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s", sched_mask, sysfs);
>  		r = system(cmd);
>  		igt_assert_eq(r, 0);
>  	}
> @@ -269,7 +272,7 @@ bad_access_helper(amdgpu_device_handle device_handle, unsigned int cmd_error,
>  	ring_context = calloc(1, sizeof(*ring_context));
>  	igt_assert(ring_context);
>  
> -	if( priority == AMDGPU_CTX_PRIORITY_HIGH)
> +	if (priority == AMDGPU_CTX_PRIORITY_HIGH)
>  		r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &ring_context->context_handle);
>  	else
>  		r = amdgpu_cs_ctx_create(device_handle, &ring_context->context_handle);
> @@ -401,7 +404,7 @@ amdgpu_hang_sdma_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
>  	free_cmd_base(base_cmd);
>  }
>  
> -void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type)
> +void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci)
>  {
>  	int r;
>  	FILE *fp;
> @@ -418,18 +421,21 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
>  		igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>  
>  	if (ip_type == AMD_IP_GFX)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_COMPUTE)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_DMA)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  
>  	snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
>  	r = access(sysfs, R_OK);
>  	if (!r) {
>  		fp = popen(cmd, "r");
>  		if (fp == NULL)
> -			igt_skip("read the sysfs failed: %s \n",sysfs);
> +			igt_skip("read the sysfs failed: %s\n", sysfs);
>  
>  		if (fgets(buffer, 128, fp) != NULL)
>  			sched_mask = strtol(buffer, NULL, 16);
> @@ -475,14 +481,14 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
>  
>  	/* recover the sched mask */
>  	if (sched_mask > 1) {
> -		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s",sched_mask, sysfs);
> +		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s", sched_mask, sysfs);
>  		r = system(cmd);
>  		igt_assert_eq(r, 0);
>  	}
>  
>  }
>  
> -void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
> +void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci)
>  {
>  	int r;
>  	FILE *fp;
> @@ -498,13 +504,14 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
>  	if (!info.available_rings)
>  		igt_info("SKIP ... as there's no ring for the sdma\n");
>  
> -	snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> +	snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
>  	r = access(sysfs, R_OK);
>  	if (!r) {
>  		fp = popen(cmd, "r");
>  		if (fp == NULL)
> -			igt_skip("read the sysfs failed: %s \n",sysfs);
> +			igt_skip("read the sysfs failed: %s\n", sysfs);
>  
>  		if (fgets(buffer, 128, fp) != NULL)
>  			sched_mask = strtol(buffer, NULL, 16);
> @@ -530,7 +537,7 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
>  
>  	/* recover the sched mask */
>  	if (sched_mask > 1) {
> -		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s",sched_mask, sysfs);
> +		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s", sched_mask, sysfs);
>  		r = system(cmd);
>  		igt_assert_eq(r, 0);
>  	}
> diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
> index 7f8419280..1d654c490 100644
> --- a/lib/amdgpu/amd_deadlock_helpers.h
> +++ b/lib/amdgpu/amd_deadlock_helpers.h
> @@ -24,12 +24,14 @@
>  #ifndef __AMD_DEADLOCK_HELPERS_H__
>  #define __AMD_DEADLOCK_HELPERS_H__
>  
> +#include "amd_ip_blocks.h"
> +
>  void
> -amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type);
> +amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci);
>  void
> -bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type);
> +bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci);
>  
>  void
> -amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type);
> +amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci);
>  #endif
>  
> diff --git a/tests/amdgpu/amd_deadlock.c b/tests/amdgpu/amd_deadlock.c
> index b8bb053ca..29b7ae509 100644
> --- a/tests/amdgpu/amd_deadlock.c
> +++ b/tests/amdgpu/amd_deadlock.c
> @@ -40,6 +40,7 @@ igt_main
>  	int fd = -1;
>  	int r;
>  	bool arr_cap[AMD_IP_MAX] = {0};
> +	struct pci_addr pci;
>  
>  	igt_fixture {
>  		uint32_t major, minor;
> @@ -60,12 +61,15 @@ igt_main
>  		asic_rings_readness(device, 1, arr_cap);
>  		igt_skip_on(!is_deadlock_tests_enable(&gpu_info));
>  
> +		igt_skip_on(get_pci_addr_from_fd(fd, &pci));
> +		igt_info("PCI Address: domain %04x, bus %02x, device %02x, function %02x\n",
> +				pci.domain, pci.bus, pci.device, pci.function);
>  	}
>  	igt_describe("Test-GPU-reset-by-flooding-sdma-ring-with-jobs");
>  	igt_subtest_with_dynamic("amdgpu-deadlock-sdma") {
>  		if (arr_cap[AMD_IP_DMA]) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma")
> -			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA);
> +			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -75,7 +79,7 @@ igt_main
>  			is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-illegal-reg-access")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
> -					AMDGPU_HW_IP_GFX);
> +					AMDGPU_HW_IP_GFX, &pci);
>  		}
>  	}
>  
> @@ -85,7 +89,7 @@ igt_main
>  			is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-illegal-mem-access")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> -					AMDGPU_HW_IP_GFX);
> +					AMDGPU_HW_IP_GFX, &pci);
>  		}
>  	}
>  
> @@ -94,7 +98,7 @@ igt_main
>  	igt_subtest_with_dynamic("amdgpu-deadlock-gfx") {
>  		if (arr_cap[AMD_IP_GFX]) {
>  			igt_dynamic_f("amdgpu-deadlock-gfx")
> -			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX);
> +			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX, &pci);
>  		}
>  	}
>  
> @@ -103,7 +107,7 @@ igt_main
>  		if (arr_cap[AMD_IP_COMPUTE] &&
>  			 is_reset_enable(AMD_IP_COMPUTE, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  		bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> -				AMDGPU_HW_IP_COMPUTE);
> +				AMDGPU_HW_IP_COMPUTE, &pci);
>  		}
>  	}
>  
> @@ -111,7 +115,7 @@ igt_main
>  	igt_subtest_with_dynamic("amdgpu-deadlock-compute") {
>  		if (arr_cap[AMD_IP_COMPUTE]) {
>  			igt_dynamic_f("amdgpu-deadlock-compute")
> -			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE);
> +			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE, &pci);
>  		}
>  	}
>  
> @@ -120,7 +124,7 @@ igt_main
>  		if (arr_cap[AMD_IP_DMA] &&
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-corrupted-header-test")
> -			amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG);
> +			amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG, &pci);
>  		}
>  	}
>  
> @@ -129,7 +133,7 @@ igt_main
>  		if (arr_cap[AMD_IP_DMA] &&
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-slow-linear-copy")
> -			amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG);
> +			amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG, &pci);
>  		}
>  	}
>  
> @@ -139,7 +143,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-badop-test")
>  			bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_OPCODE,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -149,7 +153,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-bad-mem-test")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -159,7 +163,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-bad-reg-test")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -169,7 +173,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-bad-length-test")
>  			bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_PACKET_LENGTH,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  

  reply	other threads:[~2025-01-05  7:20 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-05  7:15 [PATCH 1/3] lib/amdgpu: Extract PCI device address from file descriptor vitaly.prosyak
2025-01-05  7:15 ` [PATCH 2/3] lib/amdgpu: enhance wait memory helper vitaly.prosyak
2025-01-05  7:15 ` [PATCH 3/3] lib/amdpgu: fix the hard code when shedule ring vitaly.prosyak
2025-01-05  7:20   ` vitaly prosyak [this message]
2025-01-05  8:26 ` ✓ Xe.CI.BAT: success for series starting with [1/3] lib/amdgpu: Extract PCI device address from file descriptor Patchwork
2025-01-05  8:30 ` ✓ i915.CI.BAT: " Patchwork
2025-01-05  9:52 ` ✗ Xe.CI.Full: failure " Patchwork
2025-01-05 10:12 ` ✗ i915.CI.Full: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=056e6ee5-3d1b-4097-b4e1-b2f7d815a712@amd.com \
    --to=vprosyak@amd.com \
    --cc=Jesse.zhang@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=christian.koenig@amd.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=vitaly.prosyak@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox