public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Bert Karwatzki <spasswolf@web.de>
To: Ingo Molnar <mingo@kernel.org>
Cc: "Bert Karwatzki" <spasswolf@web.de>,
	"Christian König" <christian.koenig@amd.com>,
	"Balbir Singh" <balbirs@nvidia.com>,
	"Kees Cook" <kees@kernel.org>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Linus Torvalds" <torvalds@linux-foundation.org>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Andy Lutomirski" <luto@kernel.org>,
	"Alex Deucher" <alexander.deucher@amd.com>,
	linux-kernel@vger.kernel.org, amd-gfx@lists.freedesktop.org
Subject: [PATCH v0] kernel: resource: add devm_request_free_mem_region_from_end()
Date: Thu, 27 Mar 2025 13:02:15 +0100	[thread overview]
Message-ID: <20250327120216.14083-1-spasswolf@web.de> (raw)
In-Reply-To: Z-UuHkUPy60e1GWM@gmail.com

devm_request_free_mem_region() places resources at the end of the
physical address space, DIRECT_MAP_PHYSMEM_END. If the the dma mask
of a pci device is smaller than DIRECT_MAP_PHSYMEM_END then this
resource is not dma accessible by the device which can cause the
device to fallback to using 32bit dma which can lead to severe
performance impacts.

This adds the devm_request_free_mem_region_from_end() function which
allows to select the endpoint from which to place the resource
independently from DIRECT_MAP_PHYSMEM_END.

Link: https://lore.kernel.org/all/20250322122351.3268-1-spasswolf@web.de/

Signed-off-by: Bert Karwatzki <spasswolf@web.de>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  3 ++-
 include/linux/ioport.h                   |  3 +++
 kernel/resource.c                        | 31 ++++++++++++++++++------
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d05d199b5e44..e1942fef3637 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1042,7 +1042,8 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 		pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
 		pgmap->type = MEMORY_DEVICE_COHERENT;
 	} else {
-		res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
+		res = devm_request_free_mem_region_from_end(adev->dev,
+				&iomem_resource, size, dma_get_mask(adev->dev));
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 		pgmap->range.start = res->start;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5385349f0b8a..a9a765721ab4 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -407,6 +407,9 @@ walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,

 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+		struct resource *base, unsigned long size,
+		resource_size_t seek_end);
 struct resource *request_free_mem_region(struct resource *base,
 		unsigned long size, const char *name);
 struct resource *alloc_free_mem_region(struct resource *base,
diff --git a/kernel/resource.c b/kernel/resource.c
index 12004452d999..82f40407c02d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1875,12 +1875,14 @@ EXPORT_SYMBOL(resource_list_free);
 #endif

 static resource_size_t gfr_start(struct resource *base, resource_size_t size,
-				 resource_size_t align, unsigned long flags)
+				 resource_size_t align, resource_size_t seek_end,
+				 unsigned long flags)
 {
 	if (flags & GFR_DESCENDING) {
 		resource_size_t end;

 		end = min_t(resource_size_t, base->end, DIRECT_MAP_PHYSMEM_END);
+		end = min_t(resource_size_t, end, seek_end);
 		return end - size + 1;
 	}

@@ -1920,8 +1922,8 @@ static void remove_free_mem_region(void *_res)
 static struct resource *
 get_free_mem_region(struct device *dev, struct resource *base,
 		    resource_size_t size, const unsigned long align,
-		    const char *name, const unsigned long desc,
-		    const unsigned long flags)
+		    resource_size_t seek_end, const char *name,
+		    const unsigned long desc, const unsigned long flags)
 {
 	resource_size_t addr;
 	struct resource *res;
@@ -1946,7 +1948,7 @@ get_free_mem_region(struct device *dev, struct resource *base,
 	}

 	write_lock(&resource_lock);
-	for (addr = gfr_start(base, size, align, flags);
+	for (addr = gfr_start(base, size, align, seek_end, flags);
 	     gfr_continue(base, addr, align, flags);
 	     addr = gfr_next(addr, align, flags)) {
 		if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE) !=
@@ -2021,17 +2023,30 @@ struct resource *devm_request_free_mem_region(struct device *dev,
 	unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

 	return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
-				   dev_name(dev),
+				   DIRECT_MAP_PHYSMEM_END, dev_name(dev),
 				   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
 }
 EXPORT_SYMBOL_GPL(devm_request_free_mem_region);

+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+		struct resource *base, unsigned long size,
+		resource_size_t seek_end)
+{
+	unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
+
+	return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
+				   seek_end, dev_name(dev),
+				   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
+}
+EXPORT_SYMBOL_GPL(devm_request_free_mem_region_from_end);
+
 struct resource *request_free_mem_region(struct resource *base,
 		unsigned long size, const char *name)
 {
 	unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

-	return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN, name,
+	return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN,
+				   DIRECT_MAP_PHYSMEM_END, name,
 				   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
 }
 EXPORT_SYMBOL_GPL(request_free_mem_region);
@@ -2055,8 +2070,8 @@ struct resource *alloc_free_mem_region(struct resource *base,
 	/* Default of ascending direction and insert resource */
 	unsigned long flags = 0;

-	return get_free_mem_region(NULL, base, size, align, name,
-				   IORES_DESC_NONE, flags);
+	return get_free_mem_region(NULL, base, size, align, DIRECT_MAP_PHYSMEM_END,
+				   name, IORES_DESC_NONE, flags);
 }
 EXPORT_SYMBOL_GPL(alloc_free_mem_region);
 #endif /* CONFIG_GET_FREE_REGION */
--
2.49.0

One of the problems (I'm sure there are more ...) with this patch is that
it uses dma_get_mask(adev->dev) as the endpoint from which to place the
memory, but dma_get_mask(adev->dev) returns the dma mask of the discrete
GPU, but what actually is needed to avoid the bug would be the dma mask
of the built-in GPU. In my case both are equal (44bits), but I'm not
sure if they are equal in all cases.

> So this patch does the trick for Bert, and I'm wondering what the best
> fix here would be overall, because it's a tricky situation.
>
> Am I correct in assuming that with enough physical memory this bug
> would trigger, with and without nokaslr?

I think the bug triggers as soon as DIRECT_MAP_PHYSMEM_END is bigger
then the dma mask of the integrated GPU.

> I *think* the best approach going forward would be to add the above
> quirk the the x86 memory setup code, but also issue a kernel warning at
> that point with all the relevant information included, so that the
> driver's use_dma32 bug can at least be indicated?
>
> That might also trigger for other systems, because if this scenario is
> so spurious, I doubt it's the only affected driver ...
>
> Thanks,
>
>	Ingo

Or one could make the endpoint from which the memory resource will be
placed selectable.

Bert Karwatzki

             reply	other threads:[~2025-03-27 12:03 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-27 12:02 Bert Karwatzki [this message]
2025-03-27 14:26 ` [PATCH v0] kernel: resource: add devm_request_free_mem_region_from_end() Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250327120216.14083-1-spasswolf@web.de \
    --to=spasswolf@web.de \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=balbirs@nvidia.com \
    --cc=bhelgaas@google.com \
    --cc=christian.koenig@amd.com \
    --cc=kees@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox