[PATCH 4/7] habanalabs: refactor HOP functions in MMU V1

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Oded Gabbay <ogabbay@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Ohad Sharabi <osharabi@habana.ai>
Subject: [PATCH 4/7] habanalabs: refactor HOP functions in MMU V1
Date: Mon, 11 Apr 2022 12:08:02 +0300	[thread overview]
Message-ID: <20220411090805.1617112-4-ogabbay@kernel.org> (raw)
In-Reply-To: <20220411090805.1617112-1-ogabbay@kernel.org>

From: Ohad Sharabi <osharabi@habana.ai>

Take advantage of the HOPs shift/masks now defined as arrays.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/mmu/mmu_v1.c | 297 ++++++--------------
 1 file changed, 88 insertions(+), 209 deletions(-)

diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
index f43657ad442b..e2d91a69acc2 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c
@@ -10,6 +10,8 @@
 
 #include <linux/slab.h>
 
+#define MMU_V1_MAX_HOPS	(MMU_HOP4 + 1)
+
 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
 
 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
@@ -170,51 +172,15 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 	return num_of_ptes_left;
 }
 
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-					u64 virt_addr, u64 mask, u64 shift)
-{
-	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-			((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
-					struct hl_mmu_properties *mmu_prop,
-					u64 hop_addr, u64 vaddr)
-{
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP0],
-					mmu_prop->hop_shifts[MMU_HOP0]);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
-					struct hl_mmu_properties *mmu_prop,
-					u64 hop_addr, u64 vaddr)
-{
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP1],
-					mmu_prop->hop_shifts[MMU_HOP1]);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
-					struct hl_mmu_properties *mmu_prop,
-					u64 hop_addr, u64 vaddr)
+static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
+					u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
 {
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP2],
-					mmu_prop->hop_shifts[MMU_HOP2]);
-}
+	u64 mask, shift;
 
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
-					struct hl_mmu_properties *mmu_prop,
-					u64 hop_addr, u64 vaddr)
-{
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP3],
-					mmu_prop->hop_shifts[MMU_HOP3]);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
-					struct hl_mmu_properties *mmu_prop,
-					u64 hop_addr, u64 vaddr)
-{
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop_masks[MMU_HOP4],
-					mmu_prop->hop_shifts[MMU_HOP4]);
+	mask = mmu_prop->hop_masks[hop_idx];
+	shift = mmu_prop->hop_shifts[hop_idx];
+	return hop_addr_arr[hop_idx] +
+			ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
 }
 
 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
@@ -516,74 +482,50 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
 	}
 }
 
-static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
+static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
 				u64 virt_addr, bool is_dram_addr)
 {
+	u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_mmu_properties *mmu_prop;
-	u64 hop0_addr = 0, hop0_pte_addr = 0,
-		hop1_addr = 0, hop1_pte_addr = 0,
-		hop2_addr = 0, hop2_pte_addr = 0,
-		hop3_addr = 0, hop3_pte_addr = 0,
-		hop4_addr = 0, hop4_pte_addr = 0,
-		curr_pte;
 	bool is_huge, clear_hop3 = true;
+	int hop_idx;
 
 	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
 	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
-	hop0_addr = get_hop0_addr(ctx);
-	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-
-	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
-	hop1_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
-	if (hop1_addr == ULLONG_MAX)
-		goto not_mapped;
-
-	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-
-	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
-	hop2_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
-	if (hop2_addr == ULLONG_MAX)
-		goto not_mapped;
-
-	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-
-	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
-
-	hop3_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
-	if (hop3_addr == ULLONG_MAX)
-		goto not_mapped;
+	for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
+		if (hop_idx == MMU_HOP0) {
+			hop_addr[hop_idx] = get_hop0_addr(ctx);
+		} else {
+			hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+			if (hop_addr[hop_idx] == ULLONG_MAX)
+				goto not_mapped;
+		}
 
-	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+		hop_pte_addr[hop_idx] =
+				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
 
-	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
+		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
+	}
 
 	is_huge = curr_pte & mmu_prop->last_mask;
 
 	if (is_dram_addr && !is_huge) {
-		dev_err(hdev->dev,
-				"DRAM unmapping should use huge pages only\n");
+		dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
 		return -EFAULT;
 	}
 
 	if (!is_huge) {
-		hop4_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
-		if (hop4_addr == ULLONG_MAX)
+		hop_idx = MMU_HOP4;
+		hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+		if (hop_addr[hop_idx] == ULLONG_MAX)
 			goto not_mapped;
 
-		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
-							virt_addr);
-
-		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
-
+		hop_pte_addr[hop_idx] =
+				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
+		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
 		clear_hop3 = false;
 	}
 
@@ -605,39 +547,33 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 			goto not_mapped;
 		}
 
-		write_final_pte(ctx, hop3_pte_addr, default_pte);
-		put_pte(ctx, hop3_addr);
+		hop_idx = MMU_HOP3;
+		write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
+		put_pte(ctx, hop_addr[hop_idx]);
 	} else {
 		if (!(curr_pte & PAGE_PRESENT_MASK))
 			goto not_mapped;
 
-		if (hop4_addr)
-			clear_pte(ctx, hop4_pte_addr);
+		if (hop_addr[MMU_HOP4])
+			clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
 		else
-			clear_pte(ctx, hop3_pte_addr);
+			clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
 
-		if (hop4_addr && !put_pte(ctx, hop4_addr))
+		if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
 			clear_hop3 = true;
 
 		if (!clear_hop3)
 			goto mapped;
 
-		clear_pte(ctx, hop3_pte_addr);
+		for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
+			clear_pte(ctx, hop_pte_addr[hop_idx]);
 
-		if (put_pte(ctx, hop3_addr))
-			goto mapped;
+			if (hop_idx == MMU_HOP0)
+				break;
 
-		clear_pte(ctx, hop2_pte_addr);
-
-		if (put_pte(ctx, hop2_addr))
-			goto mapped;
-
-		clear_pte(ctx, hop1_pte_addr);
-
-		if (put_pte(ctx, hop1_addr))
-			goto mapped;
-
-		clear_pte(ctx, hop0_pte_addr);
+			if (put_pte(ctx, hop_addr[hop_idx]))
+				goto mapped;
+		}
 	}
 
 mapped:
@@ -650,21 +586,15 @@ static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 	return -EINVAL;
 }
 
-static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 			u32 page_size, bool is_dram_addr)
 {
+	u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0;
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct hl_mmu_properties *mmu_prop;
-	u64 hop0_addr = 0, hop0_pte_addr = 0,
-		hop1_addr = 0, hop1_pte_addr = 0,
-		hop2_addr = 0, hop2_pte_addr = 0,
-		hop3_addr = 0, hop3_pte_addr = 0,
-		hop4_addr = 0, hop4_pte_addr = 0,
-		curr_pte = 0;
-	bool hop1_new = false, hop2_new = false, hop3_new = false,
-		hop4_new = false, is_huge;
-	int rc = -ENOMEM;
+	bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false};
+	int num_hops, hop_idx, prev_hop, rc = -ENOMEM;
 
 	/*
 	 * This mapping function can map a page or a huge page. For huge page
@@ -684,39 +614,21 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		is_huge = false;
 	}
 
-	hop0_addr = get_hop0_addr(ctx);
-	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
-	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
-	if (hop1_addr == ULLONG_MAX)
-		goto err;
-
-	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
-	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
-	if (hop2_addr == ULLONG_MAX)
-		goto err;
-
-	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
+	num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS;
 
-	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
-	if (hop3_addr == ULLONG_MAX)
-		goto err;
-
-	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
-	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
-
-	if (!is_huge) {
-		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
-		if (hop4_addr == ULLONG_MAX)
-			goto err;
+	for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
+		if (hop_idx == MMU_HOP0) {
+			hop_addr[hop_idx] = get_hop0_addr(ctx);
+		} else {
+			hop_addr[hop_idx] =
+					get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
+			if (hop_addr[hop_idx] == ULLONG_MAX)
+				goto err;
+		}
 
-		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
-							virt_addr);
-		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
+		hop_pte_addr[hop_idx] =
+				get_hop_pte_addr(ctx, mmu_prop, hop_addr, virt_addr, hop_idx);
+		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx];
 	}
 
 	if (hdev->dram_default_page_mapping && is_dram_addr) {
@@ -732,30 +644,22 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 			goto err;
 		}
 
-		if (hop1_new || hop2_new || hop3_new || hop4_new) {
-			dev_err(hdev->dev,
-				"DRAM mapping should not allocate more hops\n");
-			rc = -EFAULT;
-			goto err;
+		for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
+			if (hop_new[hop_idx]) {
+				dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n");
+				rc = -EFAULT;
+				goto err;
+			}
 		}
 	} else if (curr_pte & PAGE_PRESENT_MASK) {
 		dev_err(hdev->dev,
 			"mapping already exists for virt_addr 0x%llx\n",
 				virt_addr);
 
-		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
-			*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
-		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
-			*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
-		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
-			*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
-		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
-			*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
-
-		if (!is_huge)
-			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
-				*(u64 *) (uintptr_t) hop4_pte_addr,
-				hop4_pte_addr);
+		for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++)
+			dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n", hop_idx,
+					*(u64 *) (uintptr_t) hop_pte_addr[hop_idx],
+					hop_pte_addr[hop_idx]);
 
 		rc = -EINVAL;
 		goto err;
@@ -764,53 +668,28 @@ static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
 			| PAGE_PRESENT_MASK;
 
-	if (is_huge)
-		write_final_pte(ctx, hop3_pte_addr, curr_pte);
-	else
-		write_final_pte(ctx, hop4_pte_addr, curr_pte);
+	write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
 
-	if (hop1_new) {
-		curr_pte =
-			(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-		write_pte(ctx, hop0_pte_addr, curr_pte);
-	}
-	if (hop2_new) {
-		curr_pte =
-			(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-		write_pte(ctx, hop1_pte_addr, curr_pte);
-		get_pte(ctx, hop1_addr);
-	}
-	if (hop3_new) {
-		curr_pte =
-			(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-		write_pte(ctx, hop2_pte_addr, curr_pte);
-		get_pte(ctx, hop2_addr);
-	}
+	for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
+		prev_hop = hop_idx - 1;
 
-	if (!is_huge) {
-		if (hop4_new) {
-			curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
-					PAGE_PRESENT_MASK;
-			write_pte(ctx, hop3_pte_addr, curr_pte);
-			get_pte(ctx, hop3_addr);
+		if (hop_new[hop_idx]) {
+			curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+			write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
+			if (hop_idx != MMU_HOP1)
+				get_pte(ctx, hop_addr[prev_hop]);
 		}
-
-		get_pte(ctx, hop4_addr);
-	} else {
-		get_pte(ctx, hop3_addr);
 	}
 
+	get_pte(ctx, hop_addr[num_hops - 1]);
+
 	return 0;
 
 err:
-	if (hop4_new)
-		free_hop(ctx, hop4_addr);
-	if (hop3_new)
-		free_hop(ctx, hop3_addr);
-	if (hop2_new)
-		free_hop(ctx, hop2_addr);
-	if (hop1_new)
-		free_hop(ctx, hop1_addr);
+	for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
+		if (hop_new[hop_idx])
+			free_hop(ctx, hop_addr[hop_idx]);
+	}
 
 	return rc;
 }
@@ -928,8 +807,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
 	mmu->fini = hl_mmu_v1_fini;
 	mmu->ctx_init = hl_mmu_v1_ctx_init;
 	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
-	mmu->map = _hl_mmu_v1_map;
-	mmu->unmap = _hl_mmu_v1_unmap;
+	mmu->map = hl_mmu_v1_map;
+	mmu->unmap = hl_mmu_v1_unmap;
 	mmu->flush = flush;
 	mmu->swap_out = hl_mmu_v1_swap_out;
 	mmu->swap_in = hl_mmu_v1_swap_in;
-- 
2.25.1

next prev parent reply	other threads:[~2022-04-11  9:08 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-11  9:07 [PATCH 1/7] habanalabs: don't print normal reset operations Oded Gabbay
2022-04-11  9:08 ` [PATCH 2/7] habanalabs: remove user interrupt debug print Oded Gabbay
2022-04-11  9:08 ` [PATCH 3/7] habanalabs: fix comments according to kernel-doc Oded Gabbay
2022-04-11  9:08 ` Oded Gabbay [this message]
2022-04-11  9:08 ` [PATCH 5/7] habanalabs/gaudi: use lower_32_bits() for casting Oded Gabbay
2022-04-11  9:08 ` [PATCH 6/7] habanalabs: use for_each_sgtable_dma_sg for dma sgt Oded Gabbay
2022-04-11  9:08 ` [PATCH 7/7] habanalabs: support debugfs Byte access to device DRAM Oded Gabbay

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f43657ad442 dfblob:e2d91a69acc )
 OR (
bs:"[PATCH 4/7] habanalabs: refactor HOP functions in MMU V1" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220411090805.1617112-4-ogabbay@kernel.org \
    --to=ogabbay@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=osharabi@habana.ai \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.