From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Mukul Joshi <mukul.joshi@amd.com>,
Felix Kuehling <Felix.Kuehling@amd.com>,
Alex Deucher <alexander.deucher@amd.com>,
Sasha Levin <sashal@kernel.org>,
christian.koenig@amd.com, Xinhui.Pan@amd.com, airlied@gmail.com,
daniel@ffwll.ch, amd-gfx@lists.freedesktop.org,
dri-devel@lists.freedesktop.org
Subject: [PATCH AUTOSEL 6.5 25/41] drm/amdkfd: Update cache info reporting for GFX v9.4.3
Date: Sun, 24 Sep 2023 09:15:13 -0400 [thread overview]
Message-ID: <20230924131529.1275335-25-sashal@kernel.org> (raw)
In-Reply-To: <20230924131529.1275335-1-sashal@kernel.org>
From: Mukul Joshi <mukul.joshi@amd.com>
[ Upstream commit 0752e66e91fa86fa5481b04b22053363833ffb85 ]
Update cache info reporting in sysfs to report the correct
number of CUs and associated cache information based on
different spatial partitioning modes.
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 4 ++
drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 82 +++++++++++++----------
drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +-
3 files changed, 51 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index fc719389b5d65..4684711aa695a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -79,6 +79,10 @@ struct crat_header {
#define CRAT_SUBTYPE_IOLINK_AFFINITY 5
#define CRAT_SUBTYPE_MAX 6
+/*
+ * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
+ * as it breaks the ABI.
+ */
#define CRAT_SIBLINGMAP_SIZE 32
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ea67a353beb00..5582191022106 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1650,14 +1650,17 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
struct kfd_gpu_cache_info *pcache_info,
struct kfd_cu_info *cu_info,
- int cache_type, unsigned int cu_processor_id)
+ int cache_type, unsigned int cu_processor_id,
+ struct kfd_node *knode)
{
unsigned int cu_sibling_map_mask;
int first_active_cu;
- int i, j, k;
+ int i, j, k, xcc, start, end;
struct kfd_cache_properties *pcache = NULL;
- cu_sibling_map_mask = cu_info->cu_bitmap[0][0][0];
+ start = ffs(knode->xcc_mask) - 1;
+ end = start + NUM_XCC(knode->xcc_mask);
+ cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
cu_sibling_map_mask &=
((1 << pcache_info[cache_type].num_cu_shared) - 1);
first_active_cu = ffs(cu_sibling_map_mask);
@@ -1692,16 +1695,18 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
k = 0;
- for (i = 0; i < cu_info->num_shader_engines; i++) {
- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
- pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
- pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
- pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
- pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
- k += 4;
-
- cu_sibling_map_mask = cu_info->cu_bitmap[0][i % 4][j + i / 4];
- cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ for (xcc = start; xcc < end; xcc++) {
+ for (i = 0; i < cu_info->num_shader_engines; i++) {
+ for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+ pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ k += 4;
+
+ cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ }
}
}
pcache->sibling_map_size = k;
@@ -1719,7 +1724,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
{
struct kfd_gpu_cache_info *pcache_info = NULL;
- int i, j, k;
+ int i, j, k, xcc, start, end;
int ct = 0;
unsigned int cu_processor_id;
int ret;
@@ -1753,37 +1758,42 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
* then it will consider only one CU from
* the shared unit
*/
+ start = ffs(kdev->xcc_mask) - 1;
+ end = start + NUM_XCC(kdev->xcc_mask);
+
for (ct = 0; ct < num_of_cache_types; ct++) {
cu_processor_id = gpu_processor_id;
if (pcache_info[ct].cache_level == 1) {
- for (i = 0; i < pcu_info->num_shader_engines; i++) {
- for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
- for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
-
- ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
- pcu_info->cu_bitmap[0][i % 4][j + i / 4], ct,
- cu_processor_id, k);
-
- if (ret < 0)
- break;
-
- if (!ret) {
- num_of_entries++;
- list_add_tail(&props_ext->list, &dev->cache_props);
+ for (xcc = start; xcc < end; xcc++) {
+ for (i = 0; i < pcu_info->num_shader_engines; i++) {
+ for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
+ for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+ ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+ pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
+ cu_processor_id, k);
+
+ if (ret < 0)
+ break;
+
+ if (!ret) {
+ num_of_entries++;
+ list_add_tail(&props_ext->list, &dev->cache_props);
+ }
+
+ /* Move to next CU block */
+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+ pcu_info->num_cu_per_sh) ?
+ pcache_info[ct].num_cu_shared :
+ (pcu_info->num_cu_per_sh - k);
+ cu_processor_id += num_cu_shared;
}
-
- /* Move to next CU block */
- num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
- pcu_info->num_cu_per_sh) ?
- pcache_info[ct].num_cu_shared :
- (pcu_info->num_cu_per_sh - k);
- cu_processor_id += num_cu_shared;
}
}
}
} else {
ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
- pcu_info, ct, cu_processor_id);
+ pcu_info, ct, cu_processor_id, kdev);
if (ret < 0)
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index cba2cd5ed9d19..46927263e014d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -86,7 +86,7 @@ struct kfd_mem_properties {
struct attribute attr;
};
-#define CACHE_SIBLINGMAP_SIZE 64
+#define CACHE_SIBLINGMAP_SIZE 128
struct kfd_cache_properties {
struct list_head list;
--
2.40.1
next prev parent reply other threads:[~2023-09-24 13:17 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-24 13:14 [PATCH AUTOSEL 6.5 01/41] nvme-fc: Prevent null pointer dereference in nvme_fc_io_getuuid() Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 02/41] parisc: sba: Fix compile warning wrt list of SBA devices Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 03/41] parisc: sba-iommu: Fix sparse warnigs Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 04/41] parisc: ccio-dma: Fix sparse warnings Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 05/41] parisc: iosapic.c: " Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 06/41] parisc: drivers: Fix sparse warning Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 07/41] parisc: irq: Make irq_stack_union static to avoid " Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 08/41] scsi: qedf: Add synchronization between I/O completions and abort Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 09/41] scsi: ufs: core: Move __ufshcd_send_uic_cmd() outside host_lock Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 10/41] scsi: ufs: core: Poll HCS.UCRDY before issuing a UIC command Sasha Levin
2023-09-24 13:14 ` [PATCH AUTOSEL 6.5 11/41] selftests/ftrace: Correctly enable event in instance-event.tc Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 12/41] ring-buffer: Avoid softlockup in ring_buffer_resize() Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 13/41] btrfs: do not block starts waiting on previous transaction commit Sasha Levin
2023-09-25 13:01 ` David Sterba
2023-09-25 17:47 ` Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 14/41] btrfs: improve error message after failure to add delayed dir index item Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 15/41] btrfs: assert delayed node locked when removing delayed item Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 16/41] selftests: fix dependency checker script Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 17/41] ring-buffer: Do not attempt to read past "commit" Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 18/41] net/smc: bugfix for smcr v2 server connect success statistic Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 19/41] ata: sata_mv: Fix incorrect string length computation in mv_dump_mem() Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 20/41] efi/x86: Ensure that EFI_RUNTIME_MAP is enabled for kexec Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 21/41] platform/mellanox: mlxbf-bootctl: add NET dependency into Kconfig Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 22/41] platform/x86: asus-wmi: Support 2023 ROG X16 tablet mode Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 23/41] thermal/of: add missing of_node_put() Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 24/41] drm/amdgpu: Store CU info from all XCCs for GFX v9.4.3 Sasha Levin
2023-09-24 13:15 ` Sasha Levin [this message]
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 26/41] drm/amdkfd: Update CU masking for GFX 9.4.3 Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 27/41] drm/amd/display: Add dirty rect support for Replay Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 28/41] drm/amd/display: Don't check registers, if using AUX BL control Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 29/41] drm/amdgpu/soc21: don't remap HDP registers for SR-IOV Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 30/41] drm/amdgpu/nbio4.3: set proper rmmio_remap.reg_offset " Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 31/41] drm/amdgpu: fallback to old RAS error message for aqua_vanjaram Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 32/41] drm/amdkfd: Checkpoint and restore queues on GFX11 Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 33/41] drm/amdgpu: Handle null atom context in VBIOS info ioctl Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 34/41] objtool: Fix _THIS_IP_ detection for cold functions Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 35/41] nvme-pci: do not set the NUMA node of device if it has none Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 36/41] riscv: errata: fix T-Head dcache.cva encoding Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 37/41] scsi: pm80xx: Use phy-specific SAS address when sending PHY_START command Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 38/41] scsi: pm80xx: Avoid leaking tags when processing OPC_INB_SET_CONTROLLER_CONFIG command Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 39/41] smb3: correct places where ENOTSUPP is used instead of preferred EOPNOTSUPP Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 40/41] ata: libata-eh: do not clear ATA_PFLAG_EH_PENDING in ata_eh_reset() Sasha Levin
2023-09-24 13:15 ` [PATCH AUTOSEL 6.5 41/41] ata: libata-eh: do not thaw the port twice " Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230924131529.1275335-25-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=Felix.Kuehling@amd.com \
--cc=Xinhui.Pan@amd.com \
--cc=airlied@gmail.com \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=christian.koenig@amd.com \
--cc=daniel@ffwll.ch \
--cc=dri-devel@lists.freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mukul.joshi@amd.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox