public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Philip Yang <Philip.Yang@amd.com>,
	Felix Kuehling <felix.kuehling@amd.com>,
	Alex Deucher <alexander.deucher@amd.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.6 19/48] drm/amdkfd: Accounting pdd vram_usage for svm
Date: Fri, 15 Nov 2024 07:38:08 +0100	[thread overview]
Message-ID: <20241115063723.658323107@linuxfoundation.org> (raw)
In-Reply-To: <20241115063722.962047137@linuxfoundation.org>

6.6-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Philip Yang <Philip.Yang@amd.com>

[ Upstream commit 68d26c10ef503175df3142db6fcd75dd94860592 ]

Process device data pdd->vram_usage is read by rocm-smi via sysfs, this
is currently missing the svm_bo usage accounting, so "rocm-smi
--showpids" per process VRAM usage report is incorrect.

Add pdd->vram_usage accounting when svm_bo allocation and release,
change to atomic64_t type because it is updated outside process mutex
now.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 98c0b0efcc11f2a5ddf3ce33af1e48eedf808b04)
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 26 ++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 19d46be639429..8669677662d0c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1164,7 +1164,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 
 		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
 			size >>= 1;
-		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+		atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
 	}
 
 	mutex_unlock(&p->mutex);
@@ -1235,7 +1235,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
 		kfd_process_device_remove_obj_handle(
 			pdd, GET_IDR_HANDLE(args->handle));
 
-	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+	atomic64_sub(size, &pdd->vram_usage);
 
 err_unlock:
 err_pdd:
@@ -2352,7 +2352,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
 		bo_bucket->restored_offset = offset;
 		/* Update the VRAM usage count */
-		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+		atomic64_add(bo_bucket->size, &pdd->vram_usage);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 67204c3dfbb8f..27c9d5c43765a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -765,7 +765,7 @@ struct kfd_process_device {
 	enum kfd_pdd_bound bound;
 
 	/* VRAM usage */
-	uint64_t vram_usage;
+	atomic64_t vram_usage;
 	struct attribute attr_vram;
 	char vram_filename[MAX_SYSFS_FILENAME_LEN];
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 43f520b379670..6c90231e0aec2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -306,7 +306,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
 	} else if (strncmp(attr->name, "vram_", 5) == 0) {
 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 							      attr_vram);
-		return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+		return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
 	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
 		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
 							      attr_sdma);
@@ -1589,7 +1589,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
 	pdd->bound = PDD_UNBOUND;
 	pdd->already_dequeued = false;
 	pdd->runtime_inuse = false;
-	pdd->vram_usage = 0;
+	atomic64_set(&pdd->vram_usage, 0);
 	pdd->sdma_past_activity_counter = 0;
 	pdd->user_gpu_id = dev->id;
 	atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index ce76d45549984..6b7c6f45a80a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -391,6 +391,27 @@ static void svm_range_bo_release(struct kref *kref)
 		spin_lock(&svm_bo->list_lock);
 	}
 	spin_unlock(&svm_bo->list_lock);
+
+	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+		struct kfd_process_device *pdd;
+		struct kfd_process *p;
+		struct mm_struct *mm;
+
+		mm = svm_bo->eviction_fence->mm;
+		/*
+		 * The forked child process takes svm_bo device pages ref, svm_bo could be
+		 * released after parent process is gone.
+		 */
+		p = kfd_lookup_process_by_mm(mm);
+		if (p) {
+			pdd = kfd_get_process_device_data(svm_bo->node, p);
+			if (pdd)
+				atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+			kfd_unref_process(p);
+		}
+		mmput(mm);
+	}
+
 	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
 		/* We're not in the eviction worker. Signal the fence. */
 		dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -518,6 +539,7 @@ int
 svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 			bool clear)
 {
+	struct kfd_process_device *pdd;
 	struct amdgpu_bo_param bp;
 	struct svm_range_bo *svm_bo;
 	struct amdgpu_bo_user *ubo;
@@ -609,6 +631,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
 	list_add(&prange->svm_bo_list, &svm_bo->range_list);
 	spin_unlock(&svm_bo->list_lock);
 
+	pdd = svm_range_get_pdd_by_node(prange, node);
+	if (pdd)
+		atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
 	return 0;
 
 reserve_bo_failed:
-- 
2.43.0




  parent reply	other threads:[~2024-11-15  6:50 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-15  6:37 [PATCH 6.6 00/48] 6.6.62-rc1 review Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 01/48] 9p: v9fs_fid_find: also lookup by inode if not found dentry Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 02/48] 9p: Avoid creating multiple slab caches with the same name Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 03/48] selftests/bpf: Verify that sync_linked_regs preserves subreg_def Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 04/48] irqchip/ocelot: Fix trigger register address Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 05/48] nvme: tcp: avoid race between queue_lock lock and destroy Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 06/48] block: Fix elevator_get_default() checking for NULL q->tag_set Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 07/48] HID: multitouch: Add support for B2402FVA track point Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 08/48] HID: multitouch: Add quirk for HONOR MagicBook Art 14 touchpad Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 09/48] iommu/arm-smmu: Clarify MMU-500 CPRE workaround Greg Kroah-Hartman
2024-11-15  6:37 ` [PATCH 6.6 10/48] nvme: disable CC.CRIME (NVME_CC_CRIME) Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 11/48] bpf: use kvzmalloc to allocate BPF verifier environment Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 12/48] crypto: api - Fix liveliness check in crypto_alg_tested Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 13/48] crypto: marvell/cesa - Disable hash algorithms Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 14/48] sound: Make CONFIG_SND depend on INDIRECT_IOMEM instead of UML Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 15/48] drm/vmwgfx: Limit display layout ioctl array size to VMWGFX_NUM_DISPLAY_UNITS Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 16/48] RDMA/siw: Add sendpage_ok() check to disable MSG_SPLICE_PAGES Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 17/48] kasan: Disable Software Tag-Based KASAN with GCC Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 18/48] nvme-multipath: defer partition scanning Greg Kroah-Hartman
2024-11-15  6:38 ` Greg Kroah-Hartman [this message]
2024-11-15  6:38 ` [PATCH 6.6 20/48] powerpc/powernv: Free name on error in opal_event_init() Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 21/48] net: phy: mdio-bcm-unimac: Add BCM6846 support Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 22/48] nvme-loop: flush off pending I/O while shutting down loop controller Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 23/48] nvme: make keep-alive synchronous operation Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 24/48] smb: client: Fix use-after-free of network namespace Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 25/48] nvme/host: Fix RCU list traversal to use SRCU primitive Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 26/48] vDPA/ifcvf: Fix pci_read_config_byte() return code handling Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 27/48] bpf: Add sk_is_inet and IS_ICSK check in tls_sw_has_ctx_tx/rx Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 28/48] bpf: Fix mismatched RCU unlock flavour in bpf_out_neigh_v6 Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 29/48] ASoC: amd: yc: Add quirk for ASUS Vivobook S15 M3502RA Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 30/48] ASoC: amd: yc: Fix non-functional mic on ASUS E1404FA Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 31/48] fs: Fix uninitialized value issue in from_kuid and from_kgid Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 32/48] HID: multitouch: Add quirk for Logitech Bolt receiver w/ Casa touchpad Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 33/48] HID: lenovo: Add support for Thinkpad X1 Tablet Gen 3 keyboard Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 34/48] RISCV: KVM: use raw_spinlock for critical section in imsic Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 35/48] ASoC: rt722-sdca: increase clk_stop_timeout to fix clock stop issue Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 36/48] LoongArch: Use "Exception return address" to comment ERA Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 37/48] ASoC: fsl_micfil: Add sample rate constraint Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 38/48] net: usb: qmi_wwan: add Fibocom FG132 0x0112 composition Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 39/48] bpf: Check validity of link->type in bpf_link_show_fdinfo() Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 40/48] io_uring: fix possible deadlock in io_register_iowq_max_workers() Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 41/48] mm: krealloc: Fix MTE false alarm in __do_krealloc Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 42/48] mm: add page_rmappable_folio() wrapper Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 43/48] mm/readahead: do not allow order-1 folio Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 44/48] mm: support order-1 folios in the page cache Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 45/48] mm: always initialise folio->_deferred_list Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 46/48] mm: refactor folio_undo_large_rmappable() Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 47/48] mm/thp: fix deferred split unqueue naming and locking Greg Kroah-Hartman
2024-11-15  6:38 ` [PATCH 6.6 48/48] 9p: fix slab cache name creation for real Greg Kroah-Hartman
2024-11-15  9:07 ` [PATCH 6.6 00/48] 6.6.62-rc1 review Takeshi Ogasawara
2024-11-15 13:36 ` Peter Schneider
2024-11-15 15:59 ` Harshit Mogalapalli
2024-11-15 18:11 ` Jon Hunter
2024-11-15 18:26 ` SeongJae Park
2024-11-15 19:27 ` Florian Fainelli
2024-11-15 21:20 ` Mark Brown
2024-11-15 23:57 ` Ron Economos
2024-11-16  8:23 ` Naresh Kamboju
2024-11-16 17:15 ` [PATCH 6.6] " Hardik Garg
2024-11-16 21:06 ` [PATCH 6.6 00/48] " Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241115063723.658323107@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=Philip.Yang@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=felix.kuehling@amd.com \
    --cc=patches@lists.linux.dev \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox