From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Dani Liberman <dliberman@habana.ai>
Subject: [PATCH 10/13] habanalabs: handle HBM MMU when capturing page fault data
Date: Thu, 6 Oct 2022 11:23:05 +0300 [thread overview]
Message-ID: <20221006082308.1266716-10-ogabbay@kernel.org> (raw)
In-Reply-To: <20221006082308.1266716-1-ogabbay@kernel.org>
From: Dani Liberman <dliberman@habana.ai>
In case of HBM MMU page fault, capture its relevant mappings.
Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/device.c | 29 ++++++++++++++++++-------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index e9b373a8cdad..b8b32285720d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2273,15 +2273,20 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
num_of_engines * sizeof(u16));
hdev->captured_err_info.razwi.flags = flags;
}
-static void hl_capture_user_mappings(struct hl_device *hdev)
+static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
{
struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info;
+ struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
struct hl_vm_hash_node *hnode;
struct hl_userptr *userptr;
+ enum vm_type *vm_type;
struct hl_ctx *ctx;
u32 map_idx = 0;
int i;
+ /* Reset previous session count*/
+ pgf_info->num_of_user_mappings = 0;
+
ctx = hl_get_compute_ctx(hdev);
if (!ctx) {
dev_err(hdev->dev, "Can't get user context for user mappings\n");
@@ -2290,7 +2295,7 @@ static void hl_capture_user_mappings(struct hl_device *hdev)
mutex_lock(&ctx->mem_hash_lock);
hash_for_each(ctx->mem_hash, i, hnode, node)
- pgf_info->num_of_user_mappings++;
+ pgf_info->num_of_user_mappings++;
if (!pgf_info->num_of_user_mappings)
goto finish;
@@ -2300,17 +2305,25 @@ static void hl_capture_user_mappings(struct hl_device *hdev)
*/
vfree(pgf_info->user_mappings);
pgf_info->user_mappings =
- vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
+ vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
if (!pgf_info->user_mappings) {
pgf_info->num_of_user_mappings = 0;
goto finish;
}
hash_for_each(ctx->mem_hash, i, hnode, node) {
- userptr = hnode->ptr;
- pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
- pgf_info->user_mappings[map_idx].size = userptr->size;
- map_idx++;
+ vm_type = hnode->ptr;
+ if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) {
+ userptr = hnode->ptr;
+ pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
+ pgf_info->user_mappings[map_idx].size = userptr->size;
+ map_idx++;
+ } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) {
+ phys_pg_pack = hnode->ptr;
+ pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
+ pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size;
+ map_idx++;
+ }
}
finish:
mutex_unlock(&ctx->mem_hash_lock);
@@ -2326,5 +2339,5 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is
hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get());
hdev->captured_err_info.pgf_info.pgf.addr = addr;
hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
- hl_capture_user_mappings(hdev);
+ hl_capture_user_mappings(hdev, is_pmmu);
}
--
2.25.1
next prev parent reply other threads:[~2022-10-06 8:24 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-06 8:22 [PATCH 01/13] habanalabs: use lower_32_bits() Oded Gabbay
2022-10-06 8:22 ` [PATCH 02/13] habanalabs/gaudi2: fix module ID for RAZWI handling Oded Gabbay
2022-10-06 8:22 ` [PATCH 03/13] habanalabs: add page fault info uapi Oded Gabbay
2022-10-06 8:22 ` [PATCH 04/13] habanalabs: replace 'pf' to 'prefetch' Oded Gabbay
2022-10-06 8:23 ` [PATCH 05/13] habanalabs/gaudi2: remove privileged MME clock configuration Oded Gabbay
2022-10-06 8:23 ` [PATCH 06/13] habanalabs/gaudi2: add device unavailable notification Oded Gabbay
2022-10-06 8:23 ` [PATCH 07/13] habanalabs: skip idle status check if reset on device release Oded Gabbay
2022-10-06 8:23 ` [PATCH 08/13] habanalabs: allow unregistering eventfd when device non-operational Oded Gabbay
2022-10-06 8:23 ` [PATCH 09/13] habanalabs: move reset workqueue to be under hl_device Oded Gabbay
2022-10-06 8:23 ` Oded Gabbay [this message]
2022-10-06 8:23 ` [PATCH 11/13] habanalabs/gaudi2: capture RAZWI information Oded Gabbay
2022-10-06 8:23 ` [PATCH 12/13] habanalabs/gaudi2: capture page fault data Oded Gabbay
2022-10-06 8:23 ` [PATCH 13/13] habanalabs: verify no zero event is sent Oded Gabbay
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221006082308.1266716-10-ogabbay@kernel.org \
--to=ogabbay@kernel.org \
--cc=dliberman@habana.ai \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox