From: Matthew Auld <matthew.auld@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Subject: [Intel-xe] [PATCH v2 10/14] drm/xe/bo: support tiered vram allocation for small-bar
Date: Tue, 28 Feb 2023 10:41:33 +0000 [thread overview]
Message-ID: <20230228104137.80965-11-matthew.auld@intel.com> (raw)
In-Reply-To: <20230228104137.80965-1-matthew.auld@intel.com>
Add the new flag XE_BO_NEEDS_CPU_ACCESS, to force allocating in the
mappable part of lmem. If no flag is specified we do a topdown
allocation, to limit the chances of stealing the precious mappable part,
if we don't need it. If this is a full-bar system, then this all gets
nooped.
For kernel users, it looks like xe_bo_create_pin_map() is the central
place which users should call if they want CPU access to the object, so
add the flag there.
We still need to plumb this through for userspace allocations. Also it
looks like page-tables are using pin_map(), which is less than ideal. If
we can already use the GPU to do page-table management, then maybe we
should just force that for small-bar.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
---
drivers/gpu/drm/xe/tests/xe_migrate.c | 3 +-
drivers/gpu/drm/xe/xe_bo.c | 83 ++++++++++++++++++---------
drivers/gpu/drm/xe/xe_bo.h | 1 +
drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 4 ++
4 files changed, 62 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 0de17e90aba9..b786d07710d3 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -95,7 +95,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
bo->size,
ttm_bo_type_kernel,
- XE_BO_CREATE_SYSTEM_BIT);
+ XE_BO_CREATE_SYSTEM_BIT |
+ XE_BO_NEEDS_CPU_ACCESS);
if (IS_ERR(sysmem)) {
KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
str, PTR_ERR(sysmem));
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 7b331314064c..95ec6b34a28c 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -96,22 +96,30 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
struct ttm_place *places, u32 bo_flags, u32 *c)
{
- struct xe_gt *gt;
-
if (bo_flags & XE_BO_CREATE_VRAM0_BIT) {
+ struct ttm_place place = {};
+ struct xe_gt *gt;
+ u64 io_size;
+
gt = mem_type_to_gt(xe, XE_PL_VRAM0);
+ io_size = gt->mem.vram.io_size;
XE_BUG_ON(!gt->mem.vram.size);
- places[*c] = (struct ttm_place) {
- .mem_type = XE_PL_VRAM0,
- /*
- * For eviction / restore on suspend / resume objects
- * pinned in VRAM must be contiguous
- */
- .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT) ?
- TTM_PL_FLAG_CONTIGUOUS : 0,
- };
+ place.mem_type = XE_PL_VRAM0;
+
+ if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
+ XE_BO_CREATE_GGTT_BIT))
+ place.flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+ if (io_size < gt->mem.vram.size) {
+ if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+ place.fpfn = 0;
+ place.lpfn = io_size >> PAGE_SHIFT;
+ } else {
+ place.flags |= TTM_PL_FLAG_TOPDOWN;
+ }
+ }
+ places[*c] = place;
*c += 1;
if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
@@ -122,22 +130,30 @@ static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
struct ttm_place *places, u32 bo_flags, u32 *c)
{
- struct xe_gt *gt;
-
if (bo_flags & XE_BO_CREATE_VRAM1_BIT) {
+ struct ttm_place place = {};
+ struct xe_gt *gt;
+ u64 io_size;
+
gt = mem_type_to_gt(xe, XE_PL_VRAM1);
+ io_size = gt->mem.vram.io_size;
XE_BUG_ON(!gt->mem.vram.size);
- places[*c] = (struct ttm_place) {
- .mem_type = XE_PL_VRAM1,
- /*
- * For eviction / restore on suspend / resume objects
- * pinned in VRAM must be contiguous
- */
- .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
- XE_BO_CREATE_GGTT_BIT) ?
- TTM_PL_FLAG_CONTIGUOUS : 0,
- };
+ place.mem_type = XE_PL_VRAM1;
+
+ if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
+ XE_BO_CREATE_GGTT_BIT))
+ place.flags |= TTM_PL_FLAG_CONTIGUOUS;
+
+ if (io_size < gt->mem.vram.size) {
+ if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+ place.fpfn = 0;
+ place.lpfn = io_size >> PAGE_SHIFT;
+ } else {
+ place.flags |= TTM_PL_FLAG_TOPDOWN;
+ }
+ }
+ places[*c] = place;
*c += 1;
if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
@@ -369,15 +385,22 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
struct xe_device *xe = ttm_to_xe_device(bdev);
- struct xe_gt *gt;
switch (mem->mem_type) {
case XE_PL_SYSTEM:
case XE_PL_TT:
return 0;
case XE_PL_VRAM0:
- case XE_PL_VRAM1:
+ case XE_PL_VRAM1: {
+ struct xe_ttm_vram_mgr_resource *vres =
+ to_xe_ttm_vram_mgr_resource(mem);
+ struct xe_gt *gt;
+
+ if (vres->used_visible_size < mem->size)
+ return -EINVAL;
+
gt = mem_type_to_gt(xe, mem->mem_type);
+
mem->bus.offset = mem->start << PAGE_SHIFT;
if (gt->mem.vram.mapping &&
@@ -392,7 +415,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
mem->bus.caching = ttm_write_combined;
#endif
return 0;
- case XE_PL_STOLEN:
+ } case XE_PL_STOLEN:
return xe_ttm_stolen_io_mem_reserve(xe, mem);
default:
return -EINVAL;
@@ -1160,7 +1183,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
xe_ttm_stolen_cpu_inaccessible(xe))
flags |= XE_BO_CREATE_GGTT_BIT;
- bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
+ bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type,
+ flags | XE_BO_NEEDS_CPU_ACCESS);
if (IS_ERR(bo))
return bo;
@@ -1458,6 +1482,9 @@ int xe_bo_vmap(struct xe_bo *bo)
xe_bo_assert_held(bo);
+ if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+ return -EINVAL;
+
if (!iosys_map_is_null(&bo->vmap))
return 0;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 0699b2b4c5ca..c937ef10fcf3 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -28,6 +28,7 @@
#define XE_BO_DEFER_BACKING BIT(8)
#define XE_BO_SCANOUT_BIT BIT(9)
#define XE_BO_FIXED_PLACEMENT_BIT BIT(10)
+#define XE_BO_NEEDS_CPU_ACCESS BIT(11)
/* this one is trigger internally only */
#define XE_BO_INTERNAL_TEST BIT(30)
#define XE_BO_INTERNAL_64K BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 8dd33ac65499..dd2fe543bc61 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -361,12 +361,16 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
enum dma_data_direction dir,
struct sg_table **sgt)
{
+ struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
struct xe_res_cursor cursor;
struct scatterlist *sg;
int num_entries = 0;
int i, r;
+ if (vres->used_visible_size < res->size)
+ return -EOPNOTSUPP;
+
*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
if (!*sgt)
return -ENOMEM;
--
2.39.2
next prev parent reply other threads:[~2023-02-28 10:42 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-28 10:41 [Intel-xe] [PATCH v2 00/14] small-bar support Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 01/14] drm/xe/display: fix IS_ALDERLAKE_P() Matthew Auld
2023-02-28 14:09 ` Maarten Lankhorst
2023-02-28 17:57 ` Matt Roper
2023-02-28 18:54 ` Lucas De Marchi
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 02/14] drm/xe/display: fix bo leak when unloading module Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 03/14] drm/xe: prefer xe_bo_create_pin_map() Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 04/14] drm/xe/bo: explicitly reject zero sized BO Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 05/14] drm/xe/mmio: s/lmem/vram/ Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 06/14] drm/xe/vram: start tracking the io_size Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 07/14] drm/xe/buddy: remove the virtualized start Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 08/14] drm/xe/buddy: add visible tracking Matthew Auld
2023-02-28 14:24 ` Maarten Lankhorst
2023-02-28 15:05 ` Matthew Auld
2023-02-28 20:26 ` Maarten Lankhorst
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 09/14] drm/xe/buddy: add compatible and intersects hooks Matthew Auld
2023-02-28 10:41 ` Matthew Auld [this message]
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 11/14] drm/xe/migrate: retain CCS aux state for vram -> vram Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 12/14] drm/xe/display: annotate CC buffers with NEEDS_CPU_ACCESS Matthew Auld
2023-02-28 14:48 ` Maarten Lankhorst
2023-02-28 15:20 ` Matthew Auld
2023-03-02 11:51 ` Maarten Lankhorst
2023-03-03 12:12 ` Matthew Auld
2023-03-03 12:58 ` Maarten Lankhorst
2023-02-28 15:22 ` Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 13/14] drm/xe/uapi: add the userspace bits for small-bar Matthew Auld
2023-02-28 10:41 ` [Intel-xe] [PATCH v2 14/14] drm/xe: fully turn on small-bar support Matthew Auld
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230228104137.80965-11-matthew.auld@intel.com \
--to=matthew.auld@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=lucas.demarchi@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.