All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Philip Yang <Philip.Yang@amd.com>,
	Felix Kuehling <felix.kuehling@amd.com>,
	Alex Deucher <alexander.deucher@amd.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.11 27/63] drm/amdkfd: Accounting pdd vram_usage for svm
Date: Fri, 15 Nov 2024 07:37:50 +0100	[thread overview]
Message-ID: <20241115063726.900156661@linuxfoundation.org> (raw)
In-Reply-To: <20241115063725.892410236@linuxfoundation.org>

6.11-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Philip Yang <Philip.Yang@amd.com>

[ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ]

Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
is currently missing the svm_bo usage accounting, so "rocm-smi
--showpids" per process VRAM usage report is incorrect.

Add pdd->vram_usage accounting when svm_bo allocation and release,
change to atomic64_t type because it is updated outside process mutex
now.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04)
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 26 ++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 546b02f2241a6..5953bc5f31192 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1170,7 +1170,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 
 		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
 			size >>= 1;
-		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+		atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
 	}
 
 	mutex_unlock(&p->mutex);
@@ -1241,7 +1241,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 		kfd_process_device_remove_obj_handle(
 			pdd, GET_IDR_HANDLE(args->handle));
 
-	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+	atomic64_sub(size, &pdd->vram_usage);
 
 err_unlock:
 err_pdd:
@@ -2346,7 +2346,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
 		bo_bucket->restored_offset = offset;
 		/* Update the VRAM usage count */
-		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+		atomic64_add(bo_bucket->size, &pdd->vram_usage);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2b3ec92981e8f..f35741fade911 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -766,7 +766,7 @@ struct kfd_process_device {
 	enum kfd_pdd_bound bound;
 
 	/* VRAM usage */
-	uint64_t vram_usage;
+	atomic64_t vram_usage;
 	struct attribute attr_vram;
 	char vram_filename[MAX_SYSFS_FILENAME_LEN];
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index e44892109f71b..8343b3e4de7b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
 	} else if (strncmp(attr->name, "vram_", 5) == 0) {
 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 							      attr_vram);
-		return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+		return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
 	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 							      attr_sdma);
@@ -1599,7 +1599,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
 	pdd->bound = PDD_UNBOUND;
 	pdd->already_dequeued = false;
 	pdd->runtime_inuse = false;
-	pdd->vram_usage = 0;
+	atomic64_set(&pdd->vram_usage, 0);
 	pdd->sdma_past_activity_counter = 0;
 	pdd->user_gpu_id = dev->id;
 	atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bd9c2921e0dcc..7d00d89586a10 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -404,6 +404,27 @@ static void svm_range_bo_release(struct kref *kref)
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
+
+	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+		struct kfd_process_device *pdd;
+		struct kfd_process *p;
+		struct mm_struct *mm;
+
+		mm = svm_bo->eviction_fence->mm;
+		/*
+		 * The forked child process takes svm_bo device pages ref, svm_bo could be
+		 * released after parent process is gone.
+		 */
+		p = kfd_lookup_process_by_mm(mm);
+		if (p) {
+			pdd = kfd_get_process_device_data(svm_bo->node, p);
+			if (pdd)
+				atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+			kfd_unref_process(p);
+		}
+		mmput(mm);
+	}
+
 	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
 		/* We're not in the eviction worker. Signal the fence. */
 		dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -531,6 +552,7 @@ int
 svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 			bool clear)
 {
+	struct kfd_process_device *pdd;
 	struct amdgpu_bo_param bp;
 	struct svm_range_bo *svm_bo;
 	struct amdgpu_bo_user *ubo;
@@ -622,6 +644,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 	list_add(&prange->svm_bo_list, &svm_bo->range_list);
 	spin_unlock(&svm_bo->list_lock);
 
+	pdd = svm_range_get_pdd_by_node(prange, node);
+	if (pdd)
+		atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
 	return 0;
 
 reserve_bo_failed:
-- 
2.43.0




  parent reply	other threads:[~2024-11-15  6:48 UTC|newest]

Thread overview: 78+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-15  6:37 [PATCH 6.11 00/63] 6.11.9-rc1 review Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 01/63] smb: client: Fix use-after-free of network namespace Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 02/63] nvme/host: Fix RCU list traversal to use SRCU primitive Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 03/63] 9p: v9fs_fid_find: also lookup by inode if not found dentry Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 04/63] 9p: Avoid creating multiple slab caches with the same name Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 05/63] selftests/bpf: Verify that sync_linked_regs preserves subreg_def Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 06/63] nvmet-passthru: clear EUID/NGUID/UUID while using loop target Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 07/63] irqchip/ocelot: Fix trigger register address Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 08/63] pinctrl: aw9523: add missing mutex_destroy Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 09/63] pinctrl: intel: platform: Add Panther Lake to the list of supported Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 10/63] nvme: tcp: avoid race between queue_lock lock and destroy Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 11/63] block: Fix elevator_get_default() checking for NULL q->tag_set Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 12/63] HID: multitouch: Add support for B2402FVA track point Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 13/63] HID: multitouch: Add quirk for HONOR MagicBook Art 14 touchpad Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 14/63] iommu/arm-smmu: Clarify MMU-500 CPRE workaround Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 15/63] nvme: disable CC.CRIME (NVME_CC_CRIME) Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 16/63] bpf: use kvzmalloc to allocate BPF verifier environment Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 17/63] crypto: api - Fix liveliness check in crypto_alg_tested Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 18/63] crypto: marvell/cesa - Disable hash algorithms Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 19/63] s390/ap: Fix CCA crypto card behavior within protected execution environment Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 20/63] sound: Make CONFIG_SND depend on INDIRECT_IOMEM instead of UML Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 21/63] drm/vmwgfx: Limit display layout ioctl array size to VMWGFX_NUM_DISPLAY_UNITS Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 22/63] selftests/bpf: Assert link info uprobe_multi count & path_size if unset Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 23/63] RDMA/siw: Add sendpage_ok() check to disable MSG_SPLICE_PAGES Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 24/63] ALSA: hda/tas2781: Add new quirk for Lenovo, ASUS, Dell projects Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 25/63] kasan: Disable Software Tag-Based KASAN with GCC Greg Kroah-Hartman
2024-11-15  6:55   ` Jiri Slaby
2024-11-15  9:27     ` Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 26/63] nvme-multipath: defer partition scanning Greg Kroah-Hartman
2024-11-15  6:37 ` Greg Kroah-Hartman [this message]
2024-11-15  6:37 ` [PATCH 6.11 28/63] powerpc/powernv: Free name on error in opal_event_init() Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 29/63] net: phy: mdio-bcm-unimac: Add BCM6846 support Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 30/63] drm/xe/query: Increase timestamp width Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 31/63] nvme-loop: flush off pending I/O while shutting down loop controller Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 32/63] nvme: make keep-alive synchronous operation Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 33/63] samples/landlock: Fix port parsing in sandboxer Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 34/63] vDPA/ifcvf: Fix pci_read_config_byte() return code handling Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 35/63] virtio_pci: Fix admin vq cleanup by using correct info pointer Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.11 36/63] bpf: Add sk_is_inet and IS_ICSK check in tls_sw_has_ctx_tx/rx Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 37/63] bpf: Fix mismatched RCU unlock flavour in bpf_out_neigh_v6 Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 38/63] ASoC: Intel: avs: Update stream status in a separate thread Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 39/63] ASoC: codecs: Fix error handling in aw_dev_get_dsp_status function Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 40/63] ASoC: amd: yc: Add quirk for ASUS Vivobook S15 M3502RA Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 41/63] ASoC: amd: yc: Fix non-functional mic on ASUS E1404FA Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 42/63] ASoC: Intel: soc-acpi: lnl: Add match entry for TM2 laptops Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 43/63] netfs: Downgrade i_rwsem for a buffered write Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 44/63] fs: Fix uninitialized value issue in from_kuid and from_kgid Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 45/63] afs: Fix lock recursion Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 46/63] HID: i2c-hid: Delayed i2c resume wakeup for 0x0d42 Goodix touchpad Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 47/63] HID: multitouch: Add quirk for Logitech Bolt receiver w/ Casa touchpad Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 48/63] HID: lenovo: Add support for Thinkpad X1 Tablet Gen 3 keyboard Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 49/63] ASoC: codecs: lpass-rx-macro: fix RXn(rx,n) macro for DSM_CTL and SEC7 regs Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 50/63] RISCV: KVM: use raw_spinlock for critical section in imsic Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 51/63] ASoC: rt722-sdca: increase clk_stop_timeout to fix clock stop issue Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 52/63] LoongArch: Use "Exception return address" to comment ERA Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 53/63] ASoC: fsl_micfil: Add sample rate constraint Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 54/63] LoongArch: KVM: Mark hrtimer to expire in hard interrupt context Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 55/63] net: usb: qmi_wwan: add Fibocom FG132 0x0112 composition Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 56/63] bpf: Check validity of link->type in bpf_link_show_fdinfo() Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 57/63] drm/xe: Enlarge the invalidation timeout from 150 to 500 Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 58/63] drm/xe/guc/ct: Flush g2h worker in case of g2h response timeout Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 59/63] drm/xe: Handle unreliable MMIO reads during forcewake Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 60/63] drm/xe/ufence: Prefetch ufence addr to catch bogus address Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 61/63] drm/xe: Dont restart parallel queues multiple times on GT reset Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 62/63] mm: krealloc: Fix MTE false alarm in __do_krealloc Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.11 63/63] 9p: fix slab cache name creation for real Greg Kroah-Hartman
2024-11-15  8:16 ` [PATCH 6.11 00/63] 6.11.9-rc1 review Luna Jernberg
2024-11-15 17:40 ` Peter Schneider
2024-11-15 18:11 ` Jon Hunter
2024-11-15 19:48 ` Florian Fainelli
2024-11-15 21:18 ` Mark Brown
2024-11-15 23:50 ` Ron Economos
2024-11-16  8:07 ` Naresh Kamboju
2024-11-16 17:17 ` [PATCH 6.11] " Hardik Garg
2024-11-16 20:52 ` [PATCH 6.11 00/63] " Markus Reichelt
2024-11-16 21:04 ` Shuah Khan
2024-11-17 13:24 ` Muhammad Usama Anjum
2024-11-17 13:30 ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241115063726.900156661@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=Philip.Yang@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=felix.kuehling@amd.com \
    --cc=patches@lists.linux.dev \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.