* [PATCH v8 1/6] drm/ttm: Add a virtual base class for graphics memory backup
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
@ 2024-08-16 13:37 ` Thomas Hellström
2024-08-16 13:37 ` [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages Thomas Hellström
` (7 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Thomas Hellström @ 2024-08-16 13:37 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Initially intended for experimenting with different backup
solutions (shmem vs direct swap cache insertion), abstract
the backup destination using a virtual base class.
Also provide a sample implementation for shmem.
While when settling on a preferred backup solution, one could
perhaps skip the abstraction, this functionality may actually
come in handy for configurable dedicated graphics memory
backup to fast nvme files or similar, whithout affecting
swap-space. Could indeed be useful for VRAM backup on S4 and
other cases.
v5:
- Fix a UAF. (kernel test robot, Dan Carptenter)
v6:
- Rename ttm_backup_shmem_copy_page() function argument
(Matthew Brost)
- Add some missing documentation
v8:
- Use folio_file_page to get to the page we want to writeback
instead of using the first page of the folio.
Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v7
---
drivers/gpu/drm/ttm/Makefile | 2 +-
drivers/gpu/drm/ttm/ttm_backup_shmem.c | 139 +++++++++++++++++++++++++
include/drm/ttm/ttm_backup.h | 137 ++++++++++++++++++++++++
3 files changed, 277 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/ttm/ttm_backup_shmem.c
create mode 100644 include/drm/ttm/ttm_backup.h
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index dad298127226..5e980dd90e41 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,7 +4,7 @@
ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \
- ttm_device.o ttm_sys_manager.o
+ ttm_device.o ttm_sys_manager.o ttm_backup_shmem.o
ttm-$(CONFIG_AGP) += ttm_agp_backend.o
obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_backup_shmem.c b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
new file mode 100644
index 000000000000..cfe4140cc59d
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_backup_shmem.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_backup.h>
+#include <linux/page-flags.h>
+
+/**
+ * struct ttm_backup_shmem - A shmem based ttm_backup subclass.
+ * @backup: The base struct ttm_backup
+ * @filp: The associated shmem object
+ */
+struct ttm_backup_shmem {
+ struct ttm_backup backup;
+ struct file *filp;
+};
+
+static struct ttm_backup_shmem *to_backup_shmem(struct ttm_backup *backup)
+{
+ return container_of(backup, struct ttm_backup_shmem, backup);
+}
+
+static void ttm_backup_shmem_drop(struct ttm_backup *backup, unsigned long handle)
+{
+ handle -= 1;
+ shmem_truncate_range(file_inode(to_backup_shmem(backup)->filp), handle,
+ handle + 1);
+}
+
+static int ttm_backup_shmem_copy_page(struct ttm_backup *backup, struct page *dst,
+ unsigned long handle, bool intr)
+{
+ struct file *filp = to_backup_shmem(backup)->filp;
+ struct address_space *mapping = filp->f_mapping;
+ struct folio *from_folio;
+
+ handle -= 1;
+ from_folio = shmem_read_folio(mapping, handle);
+ if (IS_ERR(from_folio))
+ return PTR_ERR(from_folio);
+
+ /* Note: Use drm_memcpy_from_wc? */
+ copy_highpage(dst, folio_file_page(from_folio, handle));
+ folio_put(from_folio);
+
+ return 0;
+}
+
+static unsigned long
+ttm_backup_shmem_backup_page(struct ttm_backup *backup, struct page *page,
+ bool writeback, pgoff_t i, gfp_t page_gfp,
+ gfp_t alloc_gfp)
+{
+ struct file *filp = to_backup_shmem(backup)->filp;
+ struct address_space *mapping = filp->f_mapping;
+ unsigned long handle = 0;
+ struct folio *to_folio;
+ int ret;
+
+ to_folio = shmem_read_folio_gfp(mapping, i, alloc_gfp);
+ if (IS_ERR(to_folio))
+ return handle;
+
+ folio_mark_accessed(to_folio);
+ folio_lock(to_folio);
+ folio_mark_dirty(to_folio);
+ copy_highpage(folio_file_page(to_folio, i), page);
+ handle = i + 1;
+
+ if (writeback && !folio_mapped(to_folio) && folio_clear_dirty_for_io(to_folio)) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE,
+ .nr_to_write = SWAP_CLUSTER_MAX,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ .for_reclaim = 1,
+ };
+ folio_set_reclaim(to_folio);
+ ret = mapping->a_ops->writepage(folio_file_page(to_folio, i), &wbc);
+ if (!folio_test_writeback(to_folio))
+ folio_clear_reclaim(to_folio);
+ /* If writepage succeeds, it unlocks the folio */
+ if (ret)
+ folio_unlock(to_folio);
+ } else {
+ folio_unlock(to_folio);
+ }
+
+ folio_put(to_folio);
+
+ return handle;
+}
+
+static void ttm_backup_shmem_fini(struct ttm_backup *backup)
+{
+ struct ttm_backup_shmem *sbackup = to_backup_shmem(backup);
+
+ fput(sbackup->filp);
+ kfree(sbackup);
+}
+
+static const struct ttm_backup_ops ttm_backup_shmem_ops = {
+ .drop = ttm_backup_shmem_drop,
+ .copy_backed_up_page = ttm_backup_shmem_copy_page,
+ .backup_page = ttm_backup_shmem_backup_page,
+ .fini = ttm_backup_shmem_fini,
+};
+
+/**
+ * ttm_backup_shmem_create() - Create a shmem-based struct backup.
+ * @size: The maximum size (in bytes) to back up.
+ *
+ * Create a backup utilizing shmem objects.
+ *
+ * Return: A pointer to a struct ttm_backup on success,
+ * an error pointer on error.
+ */
+struct ttm_backup *ttm_backup_shmem_create(loff_t size)
+{
+ struct ttm_backup_shmem *sbackup =
+ kzalloc(sizeof(*sbackup), GFP_KERNEL | __GFP_ACCOUNT);
+ struct file *filp;
+
+ if (!sbackup)
+ return ERR_PTR(-ENOMEM);
+
+ filp = shmem_file_setup("ttm shmem backup", size, 0);
+ if (IS_ERR(filp)) {
+ kfree(sbackup);
+ return ERR_CAST(filp);
+ }
+
+ sbackup->filp = filp;
+ sbackup->backup.ops = &ttm_backup_shmem_ops;
+
+ return &sbackup->backup;
+}
+EXPORT_SYMBOL_GPL(ttm_backup_shmem_create);
diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h
new file mode 100644
index 000000000000..5f8c7d3069ef
--- /dev/null
+++ b/include/drm/ttm/ttm_backup.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _TTM_BACKUP_H_
+#define _TTM_BACKUP_H_
+
+#include <linux/mm_types.h>
+#include <linux/shmem_fs.h>
+
+struct ttm_backup;
+
+/**
+ * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer
+ * @handle: The handle to convert.
+ *
+ * Converts an opaque handle received from the
+ * struct ttm_backoup_ops::backup_page() function to an (invalid)
+ * struct page pointer suitable for a struct page array.
+ *
+ * Return: An (invalid) struct page pointer.
+ */
+static inline struct page *
+ttm_backup_handle_to_page_ptr(unsigned long handle)
+{
+ return (struct page *)(handle << 1 | 1);
+}
+
+/**
+ * ttm_backup_page_ptr_is_handle() - Whether a struct page pointer is a handle
+ * @page: The struct page pointer to check.
+ *
+ * Return: true if the struct page pointer is a handld returned from
+ * ttm_backup_handle_to_page_ptr(). False otherwise.
+ */
+static inline bool ttm_backup_page_ptr_is_handle(const struct page *page)
+{
+ return (unsigned long)page & 1;
+}
+
+/**
+ * ttm_backup_page_ptr_to_handle() - Convert a struct page pointer to a handle
+ * @page: The struct page pointer to convert
+ *
+ * Return: The handle that was previously used in
+ * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable
+ * for use as argument in the struct ttm_backup_ops drop() or
+ * copy_backed_up_page() functions.
+ */
+static inline unsigned long
+ttm_backup_page_ptr_to_handle(const struct page *page)
+{
+ WARN_ON(!ttm_backup_page_ptr_is_handle(page));
+ return (unsigned long)page >> 1;
+}
+
+/** struct ttm_backup_ops - A struct ttm_backup backend operations */
+struct ttm_backup_ops {
+ /**
+ * drop - release memory associated with a handle
+ * @backup: The struct backup pointer used to obtain the handle
+ * @handle: The handle obtained from the @backup_page function.
+ */
+ void (*drop)(struct ttm_backup *backup, unsigned long handle);
+
+ /**
+ * copy_backed_up_page - Copy the contents of a previously backed
+ * up page
+ * @backup: The struct backup pointer used to back up the page.
+ * @dst: The struct page to copy into.
+ * @handle: The handle returned when the page was backed up.
+ * @intr: Try to perform waits interruptable or at least killable.
+ *
+ * Return: 0 on success, Negative error code on failure, notably
+ * -EINTR if @intr was set to true and a signal is pending.
+ */
+ int (*copy_backed_up_page)(struct ttm_backup *backup, struct page *dst,
+ unsigned long handle, bool intr);
+
+ /**
+ * backup_page - Backup a page
+ * @backup: The struct backup pointer to use.
+ * @page: The page to back up.
+ * @writeback: Whether to perform immediate writeback of the page.
+ * This may have performance implications.
+ * @i: A unique integer for each page and each struct backup.
+ * This is a hint allowing the backup backend to avoid managing
+ * its address space separately.
+ * @page_gfp: The gfp value used when the page was allocated.
+ * This is used for accounting purposes.
+ * @alloc_gfp: The gpf to be used when the backend needs to allocaete
+ * memory.
+ *
+ * Return: A handle on success. 0 on failure.
+ * (This is following the swp_entry_t convention).
+ *
+ * Note: This function could be extended to back up a folio and
+ * backends would then split the folio internally if needed.
+ * Drawback is that the caller would then have to keep track of
+ * the folio size- and usage.
+ */
+ unsigned long (*backup_page)(struct ttm_backup *backup, struct page *page,
+ bool writeback, pgoff_t i, gfp_t page_gfp,
+ gfp_t alloc_gfp);
+ /**
+ * fini - Free the struct backup resources after last use.
+ * @backup: Pointer to the struct backup whose resources to free.
+ *
+ * After a call to @fini, it's illegal to use the @backup pointer.
+ */
+ void (*fini)(struct ttm_backup *backup);
+};
+
+/**
+ * struct ttm_backup - Abstract a backup backend.
+ * @ops: The operations as described above.
+ *
+ * The struct ttm_backup is intended to be subclassed by the
+ * backend implementation.
+ */
+struct ttm_backup {
+ const struct ttm_backup_ops *ops;
+};
+
+/**
+ * ttm_backup_shmem_create() - Create a shmem-based struct backup.
+ * @size: The maximum size (in bytes) to back up.
+ *
+ * Create a backup utilizing shmem objects.
+ *
+ * Return: A pointer to a struct ttm_backup on success,
+ * an error pointer on error.
+ */
+struct ttm_backup *ttm_backup_shmem_create(loff_t size);
+
+#endif
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
2024-08-16 13:37 ` [PATCH v8 1/6] drm/ttm: Add a virtual base class for graphics memory backup Thomas Hellström
@ 2024-08-16 13:37 ` Thomas Hellström
2024-08-16 14:10 ` Matthew Brost
2024-08-16 13:37 ` [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths Thomas Hellström
` (6 subsequent siblings)
8 siblings, 1 reply; 17+ messages in thread
From: Thomas Hellström @ 2024-08-16 13:37 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Provide a helper to shrink ttm_tt page-vectors on a per-page
basis. A ttm_backup backend could then in theory get away with
allocating a single temporary page for each struct ttm_tt.
This is accomplished by splitting larger pages before trying to
back them up.
In the future we could allow ttm_backup to handle backing up
large pages as well, but currently there's no benefit in
doing that, since the shmem backup backend would have to
split those anyway to avoid allocating too much temporary
memory, and if the backend instead inserts pages into the
swap-cache, those are split on reclaim by the core.
Due to potential backup- and recover errors, allow partially swapped
out struct ttm_tt's, although mark them as swapped out stopping them
from being swapped out a second time. More details in the ttm_pool.c
DOC section.
v2:
- A couple of cleanups and error fixes in ttm_pool_back_up_tt.
- s/back_up/backup/
- Add a writeback parameter to the exported interface.
v8:
- Use a struct for flags for readability (Matt Brost)
- Address misc other review comments (Matt Brost)
Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_pool.c | 394 +++++++++++++++++++++++++++++++--
drivers/gpu/drm/ttm/ttm_tt.c | 37 ++++
include/drm/ttm/ttm_pool.h | 6 +
include/drm/ttm/ttm_tt.h | 29 +++
4 files changed, 453 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 8504dbe19c1a..0d224cd9f8eb 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -41,6 +41,7 @@
#include <asm/set_memory.h>
#endif
+#include <drm/ttm/ttm_backup.h>
#include <drm/ttm/ttm_pool.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/ttm/ttm_bo.h>
@@ -58,6 +59,32 @@ struct ttm_pool_dma {
unsigned long vaddr;
};
+/**
+ * struct ttm_pool_tt_restore - State representing restore from backup
+ * @alloced_pages: Total number of already allocated pages for the ttm_tt.
+ * @restored_pages: Number of (sub) pages restored from swap for this
+ * chunk of 1 << @order pages.
+ * @first_page: The ttm page ptr representing for @old_pages[0].
+ * @caching_divide: Page pointer where subsequent pages are cached.
+ * @old_pages: Backup copy of page pointers that were replaced by the new
+ * page allocation.
+ * @pool: The pool used for page allocation while restoring.
+ * @order: The order of the last page allocated while restoring.
+ *
+ * Recovery from backup might fail when we've recovered less than the
+ * full ttm_tt. In order not to loose any data (yet), keep information
+ * around that allows us to restart a failed ttm backup recovery.
+ */
+struct ttm_pool_tt_restore {
+ pgoff_t alloced_pages;
+ pgoff_t restored_pages;
+ struct page **first_page;
+ struct page **caching_divide;
+ struct ttm_pool *pool;
+ unsigned int order;
+ struct page *old_pages[];
+};
+
static unsigned long page_pool_size;
MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
@@ -354,11 +381,102 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
return p->private;
}
+/*
+ * To be able to insert single pages into backup directly,
+ * we need to split multi-order page allocations and make them look
+ * like single-page allocations.
+ */
+static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
+{
+ unsigned int order = ttm_pool_page_order(pool, p);
+ pgoff_t nr;
+
+ if (!order)
+ return;
+
+ split_page(p, order);
+ nr = 1UL << order;
+ while (nr--)
+ (p++)->private = 0;
+}
+
+/**
+ * DOC: Partial backup and restoration of a struct ttm_tt.
+ *
+ * Swapout using ttm_backup::ops::backup_page() and swapin using
+ * ttm_backup::ops::copy_backed_up_page() may fail.
+ * The former most likely due to lack of swap-space or memory, the latter due
+ * to lack of memory or because of signal interruption during waits.
+ *
+ * Backupfailure is easily handled by using a ttm_tt pages vector that holds
+ * both swap entries and page pointers. This has to be taken into account when
+ * restoring such a ttm_tt from backup, and when freeing it while backed up.
+ * When restoring, for simplicity, new pages are actually allocated from the
+ * pool and the contents of any old pages are copied in and then the old pages
+ * are released.
+ *
+ * For restoration failures, the struct ttm_pool_tt_restore holds sufficient state
+ * to be able to resume an interrupted restore, and that structure is freed once
+ * the restoration is complete. If the struct ttm_tt is destroyed while there
+ * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
+ * care of.
+ */
+
+static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
+{
+ return restore && restore->restored_pages < (1 << restore->order);
+}
+
+static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
+ struct ttm_backup *backup,
+ struct ttm_operation_ctx *ctx)
+{
+ unsigned int i, nr = 1 << restore->order;
+ int ret = 0;
+
+ if (!ttm_pool_restore_valid(restore))
+ return 0;
+
+ for (i = restore->restored_pages; i < nr; ++i) {
+ struct page *p = restore->old_pages[i];
+
+ if (ttm_backup_page_ptr_is_handle(p)) {
+ unsigned long handle = ttm_backup_page_ptr_to_handle(p);
+
+ if (handle == 0)
+ continue;
+
+ ret = backup->ops->copy_backed_up_page
+ (backup, restore->first_page[i],
+ handle, ctx->interruptible);
+ if (ret)
+ break;
+
+ backup->ops->drop(backup, handle);
+ } else if (p) {
+ /*
+ * We could probably avoid splitting the old page
+ * using clever logic, but ATM we don't care.
+ */
+ ttm_pool_split_for_swap(restore->pool, p);
+ copy_highpage(restore->first_page[i], p);
+ __free_pages(p, 0);
+ }
+
+ restore->restored_pages++;
+ restore->old_pages[i] = NULL;
+ cond_resched();
+ }
+
+ return ret;
+}
+
/* Called when we got a page, either from a pool or newly allocated */
static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
struct page *p, dma_addr_t **dma_addr,
unsigned long *num_pages,
- struct page ***pages)
+ struct page ***pages,
+ struct ttm_pool_tt_restore *restore)
{
unsigned int i;
int r;
@@ -369,6 +487,16 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
return r;
}
+ if (restore) {
+ memcpy(restore->old_pages, *pages,
+ (1 << order) * sizeof(*restore->old_pages));
+ memset(*pages, 0, (1 << order) * sizeof(**pages));
+ restore->order = order;
+ restore->restored_pages = 0;
+ restore->first_page = *pages;
+ restore->alloced_pages += 1UL << order;
+ }
+
*num_pages -= 1 << order;
for (i = 1 << order; i; --i, ++(*pages), ++p)
**pages = p;
@@ -394,22 +522,39 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
pgoff_t start_page, pgoff_t end_page)
{
struct page **pages = &tt->pages[start_page];
+ struct ttm_backup *backup = tt->backup;
unsigned int order;
pgoff_t i, nr;
for (i = start_page; i < end_page; i += nr, pages += nr) {
struct ttm_pool_type *pt = NULL;
+ struct page *p = *pages;
+
+ if (ttm_backup_page_ptr_is_handle(p)) {
+ unsigned long handle = ttm_backup_page_ptr_to_handle(p);
+
+ nr = 1;
+ if (handle != 0)
+ backup->ops->drop(backup, handle);
+ continue;
+ }
+
+ if (pool) {
+ order = ttm_pool_page_order(pool, p);
+ nr = (1UL << order);
+ if (tt->dma_address)
+ ttm_pool_unmap(pool, tt->dma_address[i], nr);
- order = ttm_pool_page_order(pool, *pages);
- nr = (1UL << order);
- if (tt->dma_address)
- ttm_pool_unmap(pool, tt->dma_address[i], nr);
+ pt = ttm_pool_select_type(pool, caching, order);
+ } else {
+ order = p->private;
+ nr = (1UL << order);
+ }
- pt = ttm_pool_select_type(pool, caching, order);
if (pt)
- ttm_pool_type_give(pt, *pages);
+ ttm_pool_type_give(pt, p);
else
- ttm_pool_free_page(pool, caching, order, *pages);
+ ttm_pool_free_page(pool, caching, order, p);
}
}
@@ -453,9 +598,36 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
else
gfp_flags |= GFP_HIGHUSER;
- for (order = min_t(unsigned int, MAX_PAGE_ORDER, __fls(num_pages));
- num_pages;
- order = min_t(unsigned int, order, __fls(num_pages))) {
+ order = min_t(unsigned int, MAX_PAGE_ORDER, __fls(num_pages));
+
+ if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
+ if (!tt->restore) {
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+
+ if (ctx->gfp_retry_mayfail)
+ gfp |= __GFP_RETRY_MAYFAIL;
+
+ tt->restore =
+ kvzalloc(struct_size(tt->restore, old_pages,
+ (size_t)1 << order), gfp);
+ if (!tt->restore)
+ return -ENOMEM;
+ } else if (ttm_pool_restore_valid(tt->restore)) {
+ struct ttm_pool_tt_restore *restore = tt->restore;
+
+ num_pages -= restore->alloced_pages;
+ order = min_t(unsigned int, order, __fls(num_pages));
+ pages += restore->alloced_pages;
+ r = ttm_pool_restore_tt(restore, tt->backup, ctx);
+ if (r)
+ return r;
+ caching = restore->caching_divide;
+ }
+
+ tt->restore->pool = pool;
+ }
+
+ for (; num_pages; order = min_t(unsigned int, order, __fls(num_pages))) {
struct ttm_pool_type *pt;
page_caching = tt->caching;
@@ -472,11 +644,19 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
r = ttm_pool_page_allocated(pool, order, p,
&dma_addr,
&num_pages,
- &pages);
+ &pages,
+ tt->restore);
if (r)
goto error_free_page;
caching = pages;
+ if (ttm_pool_restore_valid(tt->restore)) {
+ r = ttm_pool_restore_tt(tt->restore, tt->backup,
+ ctx);
+ if (r)
+ goto error_free_all;
+ }
+
if (num_pages < (1 << order))
break;
@@ -496,9 +676,17 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
caching = pages;
}
r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
- &num_pages, &pages);
+ &num_pages, &pages,
+ tt->restore);
if (r)
goto error_free_page;
+
+ if (ttm_pool_restore_valid(tt->restore)) {
+ r = ttm_pool_restore_tt(tt->restore, tt->backup, ctx);
+ if (r)
+ goto error_free_all;
+ }
+
if (PageHighMem(p))
caching = pages;
}
@@ -517,12 +705,26 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
if (r)
goto error_free_all;
+ if (tt->restore) {
+ kvfree(tt->restore);
+ tt->restore = NULL;
+ }
+
+ if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP)
+ tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP |
+ TTM_TT_FLAG_SWAPPED);
+
return 0;
error_free_page:
ttm_pool_free_page(pool, page_caching, order, p);
error_free_all:
+ if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
+ tt->restore->caching_divide = caching;
+ return r;
+ }
+
num_pages = tt->num_pages - num_pages;
caching_divide = caching - tt->pages;
ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide);
@@ -549,6 +751,172 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
}
EXPORT_SYMBOL(ttm_pool_free);
+/**
+ * ttm_pool_release_backed_up() - Release content of a swapped-out struct ttm_tt
+ * @tt: The struct ttm_tt.
+ *
+ * Release handles with associated content or any remaining pages of
+ * a backed-up struct ttm_tt.
+ */
+void ttm_pool_release_backed_up(struct ttm_tt *tt)
+{
+ struct ttm_backup *backup = tt->backup;
+ struct ttm_pool_tt_restore *restore;
+ pgoff_t i, start_page = 0;
+ unsigned long handle;
+
+ if (!(tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
+ return;
+
+ restore = tt->restore;
+
+ if (ttm_pool_restore_valid(restore)) {
+ pgoff_t nr = 1UL << restore->order;
+
+ for (i = restore->restored_pages; i < nr; ++i) {
+ struct page *p = restore->old_pages[i];
+
+ if (ttm_backup_page_ptr_is_handle(p)) {
+ handle = ttm_backup_page_ptr_to_handle(p);
+ if (handle == 0)
+ continue;
+
+ backup->ops->drop(backup, handle);
+ } else if (p) {
+ ttm_pool_split_for_swap(restore->pool, p);
+ __free_pages(p, 0);
+ }
+ }
+ }
+
+ if (restore) {
+ pgoff_t mid = restore->caching_divide - tt->pages;
+
+ start_page = restore->alloced_pages;
+ /* Pages that might be dma-mapped and non-cached */
+ ttm_pool_free_range(restore->pool, tt, tt->caching,
+ 0, mid);
+ /* Pages that might be dma-mapped but cached */
+ ttm_pool_free_range(restore->pool, tt, ttm_cached,
+ mid, restore->alloced_pages);
+ }
+
+ /* Shrunken pages. Cached and not dma-mapped. */
+ ttm_pool_free_range(NULL, tt, ttm_cached, start_page, tt->num_pages);
+
+ if (restore) {
+ kvfree(restore);
+ tt->restore = NULL;
+ }
+
+ tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP | TTM_TT_FLAG_SWAPPED);
+}
+
+/**
+ * ttm_pool_backup_tt() - Back up or purge a struct ttm_tt
+ * @pool: The pool used when allocating the struct ttm_tt.
+ * @ttm: The struct ttm_tt.
+ * @flags: Flags to govern the backup behaviour.
+ *
+ * Back up or purge a struct ttm_tt. If @purge is true, then
+ * all pages will be freed directly to the system rather than to the pool
+ * they were allocated from, making the function behave similarly to
+ * ttm_pool_free(). If @purge is false the pages will be backed up instead,
+ * exchanged for handles.
+ * A subsequent call to ttm_pool_alloc() will then read back the content and
+ * a subsequent call to ttm_pool_release_shrunken() will drop it.
+ * If backup of a page fails for whatever reason, @ttm will still be
+ * partially backed up, retaining those pages for which backup fails.
+ *
+ * Return: Number of pages actually backed up or freed, or negative
+ * error code on error.
+ */
+long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
+ const struct ttm_backup_flags *flags)
+{
+ struct ttm_backup *backup = ttm->backup;
+ struct page *page;
+ unsigned long handle;
+ gfp_t alloc_gfp;
+ gfp_t gfp;
+ int ret = 0;
+ pgoff_t shrunken = 0;
+ pgoff_t i, num_pages;
+
+ if ((!get_nr_swap_pages() && !flags->purge) ||
+ pool->use_dma_alloc ||
+ (ttm->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
+ return -EBUSY;
+
+#ifdef CONFIG_X86
+ /* Anything returned to the system needs to be cached. */
+ if (ttm->caching != ttm_cached)
+ set_pages_array_wb(ttm->pages, ttm->num_pages);
+#endif
+
+ if (ttm->dma_address || flags->purge) {
+ for (i = 0; i < ttm->num_pages; i += num_pages) {
+ unsigned int order;
+
+ page = ttm->pages[i];
+ if (unlikely(!page)) {
+ num_pages = 1;
+ continue;
+ }
+
+ order = ttm_pool_page_order(pool, page);
+ num_pages = 1UL << order;
+ if (ttm->dma_address)
+ ttm_pool_unmap(pool, ttm->dma_address[i],
+ num_pages);
+ if (flags->purge) {
+ shrunken += num_pages;
+ page->private = 0;
+ __free_pages(page, order);
+ memset(ttm->pages + i, 0,
+ num_pages * sizeof(*ttm->pages));
+ }
+ }
+ }
+
+ if (flags->purge)
+ return shrunken;
+
+ if (pool->use_dma32)
+ gfp = GFP_DMA32;
+ else
+ gfp = GFP_HIGHUSER;
+
+ alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL;
+
+ for (i = 0; i < ttm->num_pages; ++i) {
+ page = ttm->pages[i];
+ if (unlikely(!page))
+ continue;
+
+ ttm_pool_split_for_swap(pool, page);
+
+ handle = backup->ops->backup_page(backup, page, flags->writeback, i,
+ gfp, alloc_gfp);
+ if (handle) {
+ ttm->pages[i] = ttm_backup_handle_to_page_ptr(handle);
+ put_page(page);
+ shrunken++;
+ } else {
+ /* We allow partially shrunken tts */
+ ret = -ENOMEM;
+ break;
+ }
+ cond_resched();
+ }
+
+ if (shrunken)
+ ttm->page_flags |= (TTM_TT_FLAG_PRIV_BACKED_UP |
+ TTM_TT_FLAG_SWAPPED);
+
+ return shrunken ? shrunken : ret;
+}
+
/**
* ttm_pool_init - Initialize a pool
*
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 4b51b9023126..f520b8c93f03 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -40,6 +40,7 @@
#include <drm/drm_cache.h>
#include <drm/drm_device.h>
#include <drm/drm_util.h>
+#include <drm/ttm/ttm_backup.h>
#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_tt.h>
@@ -158,6 +159,8 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm,
ttm->swap_storage = NULL;
ttm->sg = bo->sg;
ttm->caching = caching;
+ ttm->restore = NULL;
+ ttm->backup = NULL;
}
int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
@@ -182,6 +185,12 @@ void ttm_tt_fini(struct ttm_tt *ttm)
fput(ttm->swap_storage);
ttm->swap_storage = NULL;
+ ttm_pool_release_backed_up(ttm);
+ if (ttm->backup) {
+ ttm->backup->ops->fini(ttm->backup);
+ ttm->backup = NULL;
+ }
+
if (ttm->pages)
kvfree(ttm->pages);
else
@@ -253,6 +262,34 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
}
EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_tt_swapin);
+/**
+ * ttm_tt_backup() - Helper to back up a struct ttm_tt.
+ * @bdev: The TTM device.
+ * @tt: The struct ttm_tt.
+ * @flags: Flags that govern the backup behaviour.
+ *
+ * Update the page accounting and call ttm_pool_shrink_tt to free pages
+ * or back them up.
+ *
+ * Return: Number of pages freed or swapped out, or negative error code on
+ * error.
+ */
+long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
+ const struct ttm_backup_flags flags)
+{
+ long ret;
+
+ if (WARN_ON(IS_ERR_OR_NULL(tt->backup)))
+ return 0;
+
+ ret = ttm_pool_backup_tt(&bdev->pool, tt, &flags);
+
+ if (ret > 0)
+ tt->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED;
+
+ return ret;
+}
+
/**
* ttm_tt_swapout - swap out tt object
*
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index 160d954a261e..3112a4be835c 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -33,6 +33,7 @@
struct device;
struct seq_file;
+struct ttm_backup_flags;
struct ttm_operation_ctx;
struct ttm_pool;
struct ttm_tt;
@@ -89,6 +90,11 @@ void ttm_pool_fini(struct ttm_pool *pool);
int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
+void ttm_pool_release_backed_up(struct ttm_tt *tt);
+
+long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
+ const struct ttm_backup_flags *flags);
+
int ttm_pool_mgr_init(unsigned long num_pages);
void ttm_pool_mgr_fini(void);
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 2b9d856ff388..e42a75cff502 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -32,11 +32,13 @@
#include <drm/ttm/ttm_caching.h>
#include <drm/ttm/ttm_kmap_iter.h>
+struct ttm_backup;
struct ttm_device;
struct ttm_tt;
struct ttm_resource;
struct ttm_buffer_object;
struct ttm_operation_ctx;
+struct ttm_pool_tt_restore;
/**
* struct ttm_tt - This is a structure holding the pages, caching- and aperture
@@ -85,6 +87,9 @@ struct ttm_tt {
* fault handling abuses the DMA api a bit and dma_map_attrs can't be
* used to assure pgprot always matches.
*
+ * TTM_TT_FLAG_PRIV_BACKED_UP: TTM internal only. This is set if the
+ * struct ttm_tt has been (possibly partially) backed up.
+ *
* TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
* set by TTM after ttm_tt_populate() has successfully returned, and is
* then unset when TTM calls ttm_tt_unpopulate().
@@ -96,6 +101,7 @@ struct ttm_tt {
#define TTM_TT_FLAG_DECRYPTED BIT(4)
#define TTM_TT_FLAG_PRIV_POPULATED BIT(5)
+#define TTM_TT_FLAG_PRIV_BACKED_UP BIT(6)
uint32_t page_flags;
/** @num_pages: Number of pages in the page array. */
uint32_t num_pages;
@@ -105,11 +111,19 @@ struct ttm_tt {
dma_addr_t *dma_address;
/** @swap_storage: Pointer to shmem struct file for swap storage. */
struct file *swap_storage;
+ /**
+ * @backup: Pointer to backup struct for backed up tts.
+ * Could be unified with @swap_storage. Meanwhile, this is
+ * a driver-owned field.
+ */
+ struct ttm_backup *backup;
/**
* @caching: The current caching state of the pages, see enum
* ttm_caching.
*/
enum ttm_caching caching;
+ /** @restore: Partial restoration from backup state. TTM private */
+ struct ttm_pool_tt_restore *restore;
};
/**
@@ -230,6 +244,21 @@ void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages);
struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
struct ttm_tt *tt);
unsigned long ttm_tt_pages_limit(void);
+
+/**
+ * struct ttm_backup_flags - Flags to govern backup behaviour.
+ * @purge: Free pages without backing up. Bypass pools.
+ * @writeback: Attempt to copy contents directly to swap space, even
+ * if that means blocking on writes to external memory.
+ */
+struct ttm_backup_flags {
+ u32 purge : 1;
+ u32 writeback : 1;
+};
+
+long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
+ const struct ttm_backup_flags flags);
+
#if IS_ENABLED(CONFIG_AGP)
#include <linux/agp_backend.h>
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages
2024-08-16 13:37 ` [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages Thomas Hellström
@ 2024-08-16 14:10 ` Matthew Brost
2024-08-19 8:07 ` Thomas Hellström
0 siblings, 1 reply; 17+ messages in thread
From: Matthew Brost @ 2024-08-16 14:10 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, Christian König, Somalapuram Amaranath, dri-devel,
Paulo Zanoni
On Fri, Aug 16, 2024 at 03:37:13PM +0200, Thomas Hellström wrote:
> Provide a helper to shrink ttm_tt page-vectors on a per-page
> basis. A ttm_backup backend could then in theory get away with
> allocating a single temporary page for each struct ttm_tt.
>
> This is accomplished by splitting larger pages before trying to
> back them up.
>
> In the future we could allow ttm_backup to handle backing up
> large pages as well, but currently there's no benefit in
> doing that, since the shmem backup backend would have to
> split those anyway to avoid allocating too much temporary
> memory, and if the backend instead inserts pages into the
> swap-cache, those are split on reclaim by the core.
>
> Due to potential backup- and recover errors, allow partially swapped
> out struct ttm_tt's, although mark them as swapped out stopping them
> from being swapped out a second time. More details in the ttm_pool.c
> DOC section.
>
> v2:
> - A couple of cleanups and error fixes in ttm_pool_back_up_tt.
> - s/back_up/backup/
> - Add a writeback parameter to the exported interface.
> v8:
> - Use a struct for flags for readability (Matt Brost)
> - Address misc other review comments (Matt Brost)
>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: <dri-devel@lists.freedesktop.org>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/ttm/ttm_pool.c | 394 +++++++++++++++++++++++++++++++--
> drivers/gpu/drm/ttm/ttm_tt.c | 37 ++++
> include/drm/ttm/ttm_pool.h | 6 +
> include/drm/ttm/ttm_tt.h | 29 +++
> 4 files changed, 453 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 8504dbe19c1a..0d224cd9f8eb 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -41,6 +41,7 @@
> #include <asm/set_memory.h>
> #endif
>
> +#include <drm/ttm/ttm_backup.h>
> #include <drm/ttm/ttm_pool.h>
> #include <drm/ttm/ttm_tt.h>
> #include <drm/ttm/ttm_bo.h>
> @@ -58,6 +59,32 @@ struct ttm_pool_dma {
> unsigned long vaddr;
> };
>
> +/**
> + * struct ttm_pool_tt_restore - State representing restore from backup
> + * @alloced_pages: Total number of already allocated pages for the ttm_tt.
> + * @restored_pages: Number of (sub) pages restored from swap for this
> + * chunk of 1 << @order pages.
> + * @first_page: The ttm page ptr representing for @old_pages[0].
> + * @caching_divide: Page pointer where subsequent pages are cached.
> + * @old_pages: Backup copy of page pointers that were replaced by the new
> + * page allocation.
> + * @pool: The pool used for page allocation while restoring.
> + * @order: The order of the last page allocated while restoring.
> + *
> + * Recovery from backup might fail when we've recovered less than the
> + * full ttm_tt. In order not to loose any data (yet), keep information
> + * around that allows us to restart a failed ttm backup recovery.
> + */
> +struct ttm_pool_tt_restore {
> + pgoff_t alloced_pages;
> + pgoff_t restored_pages;
> + struct page **first_page;
> + struct page **caching_divide;
> + struct ttm_pool *pool;
> + unsigned int order;
> + struct page *old_pages[];
> +};
> +
> static unsigned long page_pool_size;
>
> MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool");
> @@ -354,11 +381,102 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
> return p->private;
> }
>
> +/*
> + * To be able to insert single pages into backup directly,
> + * we need to split multi-order page allocations and make them look
> + * like single-page allocations.
> + */
> +static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct page *p)
> +{
> + unsigned int order = ttm_pool_page_order(pool, p);
> + pgoff_t nr;
> +
> + if (!order)
> + return;
> +
> + split_page(p, order);
> + nr = 1UL << order;
> + while (nr--)
> + (p++)->private = 0;
> +}
> +
> +/**
> + * DOC: Partial backup and restoration of a struct ttm_tt.
> + *
> + * Swapout using ttm_backup::ops::backup_page() and swapin using
> + * ttm_backup::ops::copy_backed_up_page() may fail.
> + * The former most likely due to lack of swap-space or memory, the latter due
> + * to lack of memory or because of signal interruption during waits.
> + *
> + * Backupfailure is easily handled by using a ttm_tt pages vector that holds
> + * both swap entries and page pointers. This has to be taken into account when
> + * restoring such a ttm_tt from backup, and when freeing it while backed up.
> + * When restoring, for simplicity, new pages are actually allocated from the
> + * pool and the contents of any old pages are copied in and then the old pages
> + * are released.
> + *
> + * For restoration failures, the struct ttm_pool_tt_restore holds sufficient state
> + * to be able to resume an interrupted restore, and that structure is freed once
> + * the restoration is complete. If the struct ttm_tt is destroyed while there
> + * is a valid struct ttm_pool_tt_restore attached, that is also properly taken
> + * care of.
> + */
> +
> +static bool ttm_pool_restore_valid(const struct ttm_pool_tt_restore *restore)
> +{
> + return restore && restore->restored_pages < (1 << restore->order);
> +}
> +
> +static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
> + struct ttm_backup *backup,
> + struct ttm_operation_ctx *ctx)
> +{
> + unsigned int i, nr = 1 << restore->order;
> + int ret = 0;
> +
> + if (!ttm_pool_restore_valid(restore))
> + return 0;
> +
> + for (i = restore->restored_pages; i < nr; ++i) {
> + struct page *p = restore->old_pages[i];
> +
> + if (ttm_backup_page_ptr_is_handle(p)) {
> + unsigned long handle = ttm_backup_page_ptr_to_handle(p);
> +
> + if (handle == 0)
> + continue;
> +
> + ret = backup->ops->copy_backed_up_page
> + (backup, restore->first_page[i],
> + handle, ctx->interruptible);
> + if (ret)
> + break;
> +
> + backup->ops->drop(backup, handle);
> + } else if (p) {
> + /*
> + * We could probably avoid splitting the old page
> + * using clever logic, but ATM we don't care.
> + */
> + ttm_pool_split_for_swap(restore->pool, p);
> + copy_highpage(restore->first_page[i], p);
> + __free_pages(p, 0);
> + }
> +
> + restore->restored_pages++;
> + restore->old_pages[i] = NULL;
> + cond_resched();
> + }
> +
> + return ret;
> +}
> +
> /* Called when we got a page, either from a pool or newly allocated */
> static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
> struct page *p, dma_addr_t **dma_addr,
> unsigned long *num_pages,
> - struct page ***pages)
> + struct page ***pages,
> + struct ttm_pool_tt_restore *restore)
> {
> unsigned int i;
> int r;
> @@ -369,6 +487,16 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
> return r;
> }
>
> + if (restore) {
> + memcpy(restore->old_pages, *pages,
> + (1 << order) * sizeof(*restore->old_pages));
> + memset(*pages, 0, (1 << order) * sizeof(**pages));
> + restore->order = order;
> + restore->restored_pages = 0;
> + restore->first_page = *pages;
> + restore->alloced_pages += 1UL << order;
> + }
> +
> *num_pages -= 1 << order;
> for (i = 1 << order; i; --i, ++(*pages), ++p)
> **pages = p;
> @@ -394,22 +522,39 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
> pgoff_t start_page, pgoff_t end_page)
> {
> struct page **pages = &tt->pages[start_page];
> + struct ttm_backup *backup = tt->backup;
> unsigned int order;
> pgoff_t i, nr;
>
> for (i = start_page; i < end_page; i += nr, pages += nr) {
> struct ttm_pool_type *pt = NULL;
> + struct page *p = *pages;
> +
> + if (ttm_backup_page_ptr_is_handle(p)) {
> + unsigned long handle = ttm_backup_page_ptr_to_handle(p);
> +
> + nr = 1;
> + if (handle != 0)
> + backup->ops->drop(backup, handle);
> + continue;
> + }
> +
> + if (pool) {
> + order = ttm_pool_page_order(pool, p);
> + nr = (1UL << order);
> + if (tt->dma_address)
> + ttm_pool_unmap(pool, tt->dma_address[i], nr);
>
> - order = ttm_pool_page_order(pool, *pages);
> - nr = (1UL << order);
> - if (tt->dma_address)
> - ttm_pool_unmap(pool, tt->dma_address[i], nr);
> + pt = ttm_pool_select_type(pool, caching, order);
> + } else {
> + order = p->private;
> + nr = (1UL << order);
> + }
>
> - pt = ttm_pool_select_type(pool, caching, order);
> if (pt)
> - ttm_pool_type_give(pt, *pages);
> + ttm_pool_type_give(pt, p);
> else
> - ttm_pool_free_page(pool, caching, order, *pages);
> + ttm_pool_free_page(pool, caching, order, p);
> }
> }
>
> @@ -453,9 +598,36 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> else
> gfp_flags |= GFP_HIGHUSER;
>
> - for (order = min_t(unsigned int, MAX_PAGE_ORDER, __fls(num_pages));
> - num_pages;
> - order = min_t(unsigned int, order, __fls(num_pages))) {
> + order = min_t(unsigned int, MAX_PAGE_ORDER, __fls(num_pages));
> +
> + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
> + if (!tt->restore) {
> + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
> +
> + if (ctx->gfp_retry_mayfail)
> + gfp |= __GFP_RETRY_MAYFAIL;
> +
> + tt->restore =
> + kvzalloc(struct_size(tt->restore, old_pages,
> + (size_t)1 << order), gfp);
> + if (!tt->restore)
> + return -ENOMEM;
> + } else if (ttm_pool_restore_valid(tt->restore)) {
> + struct ttm_pool_tt_restore *restore = tt->restore;
> +
> + num_pages -= restore->alloced_pages;
> + order = min_t(unsigned int, order, __fls(num_pages));
> + pages += restore->alloced_pages;
> + r = ttm_pool_restore_tt(restore, tt->backup, ctx);
> + if (r)
> + return r;
> + caching = restore->caching_divide;
> + }
> +
> + tt->restore->pool = pool;
> + }
> +
> + for (; num_pages; order = min_t(unsigned int, order, __fls(num_pages))) {
> struct ttm_pool_type *pt;
>
> page_caching = tt->caching;
> @@ -472,11 +644,19 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> r = ttm_pool_page_allocated(pool, order, p,
> &dma_addr,
> &num_pages,
> - &pages);
> + &pages,
> + tt->restore);
> if (r)
> goto error_free_page;
>
> caching = pages;
> + if (ttm_pool_restore_valid(tt->restore)) {
> + r = ttm_pool_restore_tt(tt->restore, tt->backup,
> + ctx);
> + if (r)
> + goto error_free_all;
> + }
> +
> if (num_pages < (1 << order))
> break;
>
> @@ -496,9 +676,17 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> caching = pages;
> }
> r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
> - &num_pages, &pages);
> + &num_pages, &pages,
> + tt->restore);
> if (r)
> goto error_free_page;
> +
> + if (ttm_pool_restore_valid(tt->restore)) {
> + r = ttm_pool_restore_tt(tt->restore, tt->backup, ctx);
> + if (r)
> + goto error_free_all;
> + }
> +
> if (PageHighMem(p))
> caching = pages;
> }
> @@ -517,12 +705,26 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> if (r)
> goto error_free_all;
>
> + if (tt->restore) {
> + kvfree(tt->restore);
> + tt->restore = NULL;
> + }
> +
> + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP)
> + tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP |
> + TTM_TT_FLAG_SWAPPED);
> +
> return 0;
>
> error_free_page:
> ttm_pool_free_page(pool, page_caching, order, p);
>
> error_free_all:
> + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
> + tt->restore->caching_divide = caching;
> + return r;
> + }
> +
> num_pages = tt->num_pages - num_pages;
> caching_divide = caching - tt->pages;
> ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide);
> @@ -549,6 +751,172 @@ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
> }
> EXPORT_SYMBOL(ttm_pool_free);
>
> +/**
> + * ttm_pool_release_backed_up() - Release content of a swapped-out struct ttm_tt
> + * @tt: The struct ttm_tt.
> + *
> + * Release handles with associated content or any remaining pages of
> + * a backed-up struct ttm_tt.
> + */
> +void ttm_pool_release_backed_up(struct ttm_tt *tt)
> +{
> + struct ttm_backup *backup = tt->backup;
> + struct ttm_pool_tt_restore *restore;
> + pgoff_t i, start_page = 0;
> + unsigned long handle;
> +
> + if (!(tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
> + return;
> +
> + restore = tt->restore;
> +
> + if (ttm_pool_restore_valid(restore)) {
> + pgoff_t nr = 1UL << restore->order;
> +
> + for (i = restore->restored_pages; i < nr; ++i) {
> + struct page *p = restore->old_pages[i];
> +
> + if (ttm_backup_page_ptr_is_handle(p)) {
> + handle = ttm_backup_page_ptr_to_handle(p);
> + if (handle == 0)
> + continue;
> +
> + backup->ops->drop(backup, handle);
> + } else if (p) {
> + ttm_pool_split_for_swap(restore->pool, p);
> + __free_pages(p, 0);
> + }
> + }
> + }
> +
> + if (restore) {
> + pgoff_t mid = restore->caching_divide - tt->pages;
> +
> + start_page = restore->alloced_pages;
> + /* Pages that might be dma-mapped and non-cached */
> + ttm_pool_free_range(restore->pool, tt, tt->caching,
> + 0, mid);
> + /* Pages that might be dma-mapped but cached */
> + ttm_pool_free_range(restore->pool, tt, ttm_cached,
> + mid, restore->alloced_pages);
> + }
> +
> + /* Shrunken pages. Cached and not dma-mapped. */
> + ttm_pool_free_range(NULL, tt, ttm_cached, start_page, tt->num_pages);
> +
> + if (restore) {
> + kvfree(restore);
> + tt->restore = NULL;
> + }
> +
> + tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP | TTM_TT_FLAG_SWAPPED);
> +}
> +
> +/**
> + * ttm_pool_backup_tt() - Back up or purge a struct ttm_tt
> + * @pool: The pool used when allocating the struct ttm_tt.
> + * @ttm: The struct ttm_tt.
> + * @flags: Flags to govern the backup behaviour.
> + *
> + * Back up or purge a struct ttm_tt. If @purge is true, then
> + * all pages will be freed directly to the system rather than to the pool
> + * they were allocated from, making the function behave similarly to
> + * ttm_pool_free(). If @purge is false the pages will be backed up instead,
> + * exchanged for handles.
> + * A subsequent call to ttm_pool_alloc() will then read back the content and
> + * a subsequent call to ttm_pool_release_shrunken() will drop it.
> + * If backup of a page fails for whatever reason, @ttm will still be
> + * partially backed up, retaining those pages for which backup fails.
> + *
> + * Return: Number of pages actually backed up or freed, or negative
> + * error code on error.
> + */
> +long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
> + const struct ttm_backup_flags *flags)
> +{
> + struct ttm_backup *backup = ttm->backup;
> + struct page *page;
> + unsigned long handle;
> + gfp_t alloc_gfp;
> + gfp_t gfp;
> + int ret = 0;
> + pgoff_t shrunken = 0;
> + pgoff_t i, num_pages;
> +
> + if ((!get_nr_swap_pages() && !flags->purge) ||
> + pool->use_dma_alloc ||
> + (ttm->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
> + return -EBUSY;
> +
> +#ifdef CONFIG_X86
> + /* Anything returned to the system needs to be cached. */
> + if (ttm->caching != ttm_cached)
> + set_pages_array_wb(ttm->pages, ttm->num_pages);
> +#endif
> +
> + if (ttm->dma_address || flags->purge) {
> + for (i = 0; i < ttm->num_pages; i += num_pages) {
> + unsigned int order;
> +
> + page = ttm->pages[i];
> + if (unlikely(!page)) {
> + num_pages = 1;
> + continue;
> + }
> +
> + order = ttm_pool_page_order(pool, page);
> + num_pages = 1UL << order;
> + if (ttm->dma_address)
> + ttm_pool_unmap(pool, ttm->dma_address[i],
> + num_pages);
> + if (flags->purge) {
> + shrunken += num_pages;
> + page->private = 0;
> + __free_pages(page, order);
> + memset(ttm->pages + i, 0,
> + num_pages * sizeof(*ttm->pages));
> + }
> + }
> + }
> +
> + if (flags->purge)
> + return shrunken;
> +
> + if (pool->use_dma32)
> + gfp = GFP_DMA32;
> + else
> + gfp = GFP_HIGHUSER;
> +
> + alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL;
> +
> + for (i = 0; i < ttm->num_pages; ++i) {
> + page = ttm->pages[i];
> + if (unlikely(!page))
> + continue;
> +
> + ttm_pool_split_for_swap(pool, page);
> +
> + handle = backup->ops->backup_page(backup, page, flags->writeback, i,
> + gfp, alloc_gfp);
> + if (handle) {
> + ttm->pages[i] = ttm_backup_handle_to_page_ptr(handle);
> + put_page(page);
> + shrunken++;
> + } else {
> + /* We allow partially shrunken tts */
> + ret = -ENOMEM;
> + break;
> + }
> + cond_resched();
> + }
> +
> + if (shrunken)
> + ttm->page_flags |= (TTM_TT_FLAG_PRIV_BACKED_UP |
> + TTM_TT_FLAG_SWAPPED);
> +
> + return shrunken ? shrunken : ret;
> +}
> +
> /**
> * ttm_pool_init - Initialize a pool
> *
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 4b51b9023126..f520b8c93f03 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -40,6 +40,7 @@
> #include <drm/drm_cache.h>
> #include <drm/drm_device.h>
> #include <drm/drm_util.h>
> +#include <drm/ttm/ttm_backup.h>
> #include <drm/ttm/ttm_bo.h>
> #include <drm/ttm/ttm_tt.h>
>
> @@ -158,6 +159,8 @@ static void ttm_tt_init_fields(struct ttm_tt *ttm,
> ttm->swap_storage = NULL;
> ttm->sg = bo->sg;
> ttm->caching = caching;
> + ttm->restore = NULL;
> + ttm->backup = NULL;
> }
>
> int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> @@ -182,6 +185,12 @@ void ttm_tt_fini(struct ttm_tt *ttm)
> fput(ttm->swap_storage);
> ttm->swap_storage = NULL;
>
> + ttm_pool_release_backed_up(ttm);
> + if (ttm->backup) {
Sorry for the conflicting comments in the last rev, but my last one here
[1] regardind making this fully driver-owned didn’t receive a response.
I’ll state it again: Do you think the backup fini should be owned by the
driver? This would allow the driver to use a global backup for all TT if
it wanted to. It would also make it consistent in the sense that the
driver would own both the allocation and fini of the backup.
Matt
[1] https://patchwork.freedesktop.org/patch/602165/?series=131815&rev=6#comment_1104556
> + ttm->backup->ops->fini(ttm->backup);
> + ttm->backup = NULL;
> + }
> +
> if (ttm->pages)
> kvfree(ttm->pages);
> else
> @@ -253,6 +262,34 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
> }
> EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_tt_swapin);
>
> +/**
> + * ttm_tt_backup() - Helper to back up a struct ttm_tt.
> + * @bdev: The TTM device.
> + * @tt: The struct ttm_tt.
> + * @flags: Flags that govern the backup behaviour.
> + *
> + * Update the page accounting and call ttm_pool_shrink_tt to free pages
> + * or back them up.
> + *
> + * Return: Number of pages freed or swapped out, or negative error code on
> + * error.
> + */
> +long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
> + const struct ttm_backup_flags flags)
> +{
> + long ret;
> +
> + if (WARN_ON(IS_ERR_OR_NULL(tt->backup)))
> + return 0;
> +
> + ret = ttm_pool_backup_tt(&bdev->pool, tt, &flags);
> +
> + if (ret > 0)
> + tt->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED;
> +
> + return ret;
> +}
> +
> /**
> * ttm_tt_swapout - swap out tt object
> *
> diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
> index 160d954a261e..3112a4be835c 100644
> --- a/include/drm/ttm/ttm_pool.h
> +++ b/include/drm/ttm/ttm_pool.h
> @@ -33,6 +33,7 @@
>
> struct device;
> struct seq_file;
> +struct ttm_backup_flags;
> struct ttm_operation_ctx;
> struct ttm_pool;
> struct ttm_tt;
> @@ -89,6 +90,11 @@ void ttm_pool_fini(struct ttm_pool *pool);
>
> int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
>
> +void ttm_pool_release_backed_up(struct ttm_tt *tt);
> +
> +long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
> + const struct ttm_backup_flags *flags);
> +
> int ttm_pool_mgr_init(unsigned long num_pages);
> void ttm_pool_mgr_fini(void);
>
> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> index 2b9d856ff388..e42a75cff502 100644
> --- a/include/drm/ttm/ttm_tt.h
> +++ b/include/drm/ttm/ttm_tt.h
> @@ -32,11 +32,13 @@
> #include <drm/ttm/ttm_caching.h>
> #include <drm/ttm/ttm_kmap_iter.h>
>
> +struct ttm_backup;
> struct ttm_device;
> struct ttm_tt;
> struct ttm_resource;
> struct ttm_buffer_object;
> struct ttm_operation_ctx;
> +struct ttm_pool_tt_restore;
>
> /**
> * struct ttm_tt - This is a structure holding the pages, caching- and aperture
> @@ -85,6 +87,9 @@ struct ttm_tt {
> * fault handling abuses the DMA api a bit and dma_map_attrs can't be
> * used to assure pgprot always matches.
> *
> + * TTM_TT_FLAG_PRIV_BACKED_UP: TTM internal only. This is set if the
> + * struct ttm_tt has been (possibly partially) backed up.
> + *
> * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
> * set by TTM after ttm_tt_populate() has successfully returned, and is
> * then unset when TTM calls ttm_tt_unpopulate().
> @@ -96,6 +101,7 @@ struct ttm_tt {
> #define TTM_TT_FLAG_DECRYPTED BIT(4)
>
> #define TTM_TT_FLAG_PRIV_POPULATED BIT(5)
> +#define TTM_TT_FLAG_PRIV_BACKED_UP BIT(6)
> uint32_t page_flags;
> /** @num_pages: Number of pages in the page array. */
> uint32_t num_pages;
> @@ -105,11 +111,19 @@ struct ttm_tt {
> dma_addr_t *dma_address;
> /** @swap_storage: Pointer to shmem struct file for swap storage. */
> struct file *swap_storage;
> + /**
> + * @backup: Pointer to backup struct for backed up tts.
> + * Could be unified with @swap_storage. Meanwhile, this is
> + * a driver-owned field.
> + */
> + struct ttm_backup *backup;
> /**
> * @caching: The current caching state of the pages, see enum
> * ttm_caching.
> */
> enum ttm_caching caching;
> + /** @restore: Partial restoration from backup state. TTM private */
> + struct ttm_pool_tt_restore *restore;
> };
>
> /**
> @@ -230,6 +244,21 @@ void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages);
> struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt,
> struct ttm_tt *tt);
> unsigned long ttm_tt_pages_limit(void);
> +
> +/**
> + * struct ttm_backup_flags - Flags to govern backup behaviour.
> + * @purge: Free pages without backing up. Bypass pools.
> + * @writeback: Attempt to copy contents directly to swap space, even
> + * if that means blocking on writes to external memory.
> + */
> +struct ttm_backup_flags {
> + u32 purge : 1;
> + u32 writeback : 1;
> +};
> +
> +long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
> + const struct ttm_backup_flags flags);
> +
> #if IS_ENABLED(CONFIG_AGP)
> #include <linux/agp_backend.h>
>
> --
> 2.44.0
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages
2024-08-16 14:10 ` Matthew Brost
@ 2024-08-19 8:07 ` Thomas Hellström
2024-08-20 17:46 ` Matthew Brost
0 siblings, 1 reply; 17+ messages in thread
From: Thomas Hellström @ 2024-08-19 8:07 UTC (permalink / raw)
To: Matthew Brost
Cc: intel-xe, Christian König, Somalapuram Amaranath, dri-devel,
Paulo Zanoni
Hi, Matt.
On Fri, 2024-08-16 at 14:10 +0000, Matthew Brost wrote:
> On Fri, Aug 16, 2024 at 03:37:13PM +0200, Thomas Hellström wrote:
> > Provide a helper to shrink ttm_tt page-vectors on a per-page
> > basis. A ttm_backup backend could then in theory get away with
> > allocating a single temporary page for each struct ttm_tt.
> >
> > This is accomplished by splitting larger pages before trying to
> > back them up.
> >
> > In the future we could allow ttm_backup to handle backing up
> > large pages as well, but currently there's no benefit in
> > doing that, since the shmem backup backend would have to
> > split those anyway to avoid allocating too much temporary
> > memory, and if the backend instead inserts pages into the
> > swap-cache, those are split on reclaim by the core.
> >
> > Due to potential backup- and recover errors, allow partially
> > swapped
> > out struct ttm_tt's, although mark them as swapped out stopping
> > them
> > from being swapped out a second time. More details in the
> > ttm_pool.c
> > DOC section.
> >
> > v2:
> > - A couple of cleanups and error fixes in ttm_pool_back_up_tt.
> > - s/back_up/backup/
> > - Add a writeback parameter to the exported interface.
> > v8:
> > - Use a struct for flags for readability (Matt Brost)
> > - Address misc other review comments (Matt Brost)
> >
> > Cc: Christian König <christian.koenig@amd.com>
> > Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
> > Cc: Matthew Brost <matthew.brost@intel.com>
> > Cc: <dri-devel@lists.freedesktop.org>
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> > drivers/gpu/drm/ttm/ttm_pool.c | 394
> > +++++++++++++++++++++++++++++++--
> > drivers/gpu/drm/ttm/ttm_tt.c | 37 ++++
> > include/drm/ttm/ttm_pool.h | 6 +
> > include/drm/ttm/ttm_tt.h | 29 +++
> > 4 files changed, 453 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
> > b/drivers/gpu/drm/ttm/ttm_pool.c
> > index 8504dbe19c1a..0d224cd9f8eb 100644
> > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > @@ -41,6 +41,7 @@
> > #include <asm/set_memory.h>
> > #endif
> >
> > +#include <drm/ttm/ttm_backup.h>
> > #include <drm/ttm/ttm_pool.h>
> > #include <drm/ttm/ttm_tt.h>
> > #include <drm/ttm/ttm_bo.h>
> > @@ -58,6 +59,32 @@ struct ttm_pool_dma {
> > unsigned long vaddr;
> > };
> >
> > +/**
> > + * struct ttm_pool_tt_restore - State representing restore from
> > backup
> > + * @alloced_pages: Total number of already allocated pages for the
> > ttm_tt.
> > + * @restored_pages: Number of (sub) pages restored from swap for
> > this
> > + * chunk of 1 << @order pages.
> > + * @first_page: The ttm page ptr representing for @old_pages[0].
> > + * @caching_divide: Page pointer where subsequent pages are
> > cached.
> > + * @old_pages: Backup copy of page pointers that were replaced by
> > the new
> > + * page allocation.
> > + * @pool: The pool used for page allocation while restoring.
> > + * @order: The order of the last page allocated while restoring.
> > + *
> > + * Recovery from backup might fail when we've recovered less than
> > the
> > + * full ttm_tt. In order not to loose any data (yet), keep
> > information
> > + * around that allows us to restart a failed ttm backup recovery.
> > + */
> > +struct ttm_pool_tt_restore {
> > + pgoff_t alloced_pages;
> > + pgoff_t restored_pages;
> > + struct page **first_page;
> > + struct page **caching_divide;
> > + struct ttm_pool *pool;
> > + unsigned int order;
> > + struct page *old_pages[];
> > +};
> > +
> > static unsigned long page_pool_size;
> >
> > MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA
> > pool");
> > @@ -354,11 +381,102 @@ static unsigned int
> > ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
> > return p->private;
> > }
> >
> > +/*
> > + * To be able to insert single pages into backup directly,
> > + * we need to split multi-order page allocations and make them
> > look
> > + * like single-page allocations.
> > + */
> > +static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct
> > page *p)
> > +{
> > + unsigned int order = ttm_pool_page_order(pool, p);
> > + pgoff_t nr;
> > +
> > + if (!order)
> > + return;
> > +
> > + split_page(p, order);
> > + nr = 1UL << order;
> > + while (nr--)
> > + (p++)->private = 0;
> > +}
> > +
> > +/**
> > + * DOC: Partial backup and restoration of a struct ttm_tt.
> > + *
> > + * Swapout using ttm_backup::ops::backup_page() and swapin using
> > + * ttm_backup::ops::copy_backed_up_page() may fail.
> > + * The former most likely due to lack of swap-space or memory, the
> > latter due
> > + * to lack of memory or because of signal interruption during
> > waits.
> > + *
> > + * Backupfailure is easily handled by using a ttm_tt pages vector
> > that holds
> > + * both swap entries and page pointers. This has to be taken into
> > account when
> > + * restoring such a ttm_tt from backup, and when freeing it while
> > backed up.
> > + * When restoring, for simplicity, new pages are actually
> > allocated from the
> > + * pool and the contents of any old pages are copied in and then
> > the old pages
> > + * are released.
> > + *
> > + * For restoration failures, the struct ttm_pool_tt_restore holds
> > sufficient state
> > + * to be able to resume an interrupted restore, and that structure
> > is freed once
> > + * the restoration is complete. If the struct ttm_tt is destroyed
> > while there
> > + * is a valid struct ttm_pool_tt_restore attached, that is also
> > properly taken
> > + * care of.
> > + */
> > +
> > +static bool ttm_pool_restore_valid(const struct
> > ttm_pool_tt_restore *restore)
> > +{
> > + return restore && restore->restored_pages < (1 << restore-
> > >order);
> > +}
> > +
> > +static int ttm_pool_restore_tt(struct ttm_pool_tt_restore
> > *restore,
> > + struct ttm_backup *backup,
> > + struct ttm_operation_ctx *ctx)
> > +{
> > + unsigned int i, nr = 1 << restore->order;
> > + int ret = 0;
> > +
> > + if (!ttm_pool_restore_valid(restore))
> > + return 0;
> > +
> > + for (i = restore->restored_pages; i < nr; ++i) {
> > + struct page *p = restore->old_pages[i];
> > +
> > + if (ttm_backup_page_ptr_is_handle(p)) {
> > + unsigned long handle =
> > ttm_backup_page_ptr_to_handle(p);
> > +
> > + if (handle == 0)
> > + continue;
> > +
> > + ret = backup->ops->copy_backed_up_page
> > + (backup, restore->first_page[i],
> > + handle, ctx->interruptible);
> > + if (ret)
> > + break;
> > +
> > + backup->ops->drop(backup, handle);
> > + } else if (p) {
> > + /*
> > + * We could probably avoid splitting the
> > old page
> > + * using clever logic, but ATM we don't
> > care.
> > + */
> > + ttm_pool_split_for_swap(restore->pool, p);
> > + copy_highpage(restore->first_page[i], p);
> > + __free_pages(p, 0);
> > + }
> > +
> > + restore->restored_pages++;
> > + restore->old_pages[i] = NULL;
> > + cond_resched();
> > + }
> > +
> > + return ret;
> > +}
> > +
> > /* Called when we got a page, either from a pool or newly
> > allocated */
> > static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned
> > int order,
> > struct page *p, dma_addr_t
> > **dma_addr,
> > unsigned long *num_pages,
> > - struct page ***pages)
> > + struct page ***pages,
> > + struct ttm_pool_tt_restore
> > *restore)
> > {
> > unsigned int i;
> > int r;
> > @@ -369,6 +487,16 @@ static int ttm_pool_page_allocated(struct
> > ttm_pool *pool, unsigned int order,
> > return r;
> > }
> >
> > + if (restore) {
> > + memcpy(restore->old_pages, *pages,
> > + (1 << order) * sizeof(*restore-
> > >old_pages));
> > + memset(*pages, 0, (1 << order) * sizeof(**pages));
> > + restore->order = order;
> > + restore->restored_pages = 0;
> > + restore->first_page = *pages;
> > + restore->alloced_pages += 1UL << order;
> > + }
> > +
> > *num_pages -= 1 << order;
> > for (i = 1 << order; i; --i, ++(*pages), ++p)
> > **pages = p;
> > @@ -394,22 +522,39 @@ static void ttm_pool_free_range(struct
> > ttm_pool *pool, struct ttm_tt *tt,
> > pgoff_t start_page, pgoff_t
> > end_page)
> > {
> > struct page **pages = &tt->pages[start_page];
> > + struct ttm_backup *backup = tt->backup;
> > unsigned int order;
> > pgoff_t i, nr;
> >
> > for (i = start_page; i < end_page; i += nr, pages += nr) {
> > struct ttm_pool_type *pt = NULL;
> > + struct page *p = *pages;
> > +
> > + if (ttm_backup_page_ptr_is_handle(p)) {
> > + unsigned long handle =
> > ttm_backup_page_ptr_to_handle(p);
> > +
> > + nr = 1;
> > + if (handle != 0)
> > + backup->ops->drop(backup, handle);
> > + continue;
> > + }
> > +
> > + if (pool) {
> > + order = ttm_pool_page_order(pool, p);
> > + nr = (1UL << order);
> > + if (tt->dma_address)
> > + ttm_pool_unmap(pool, tt-
> > >dma_address[i], nr);
> >
> > - order = ttm_pool_page_order(pool, *pages);
> > - nr = (1UL << order);
> > - if (tt->dma_address)
> > - ttm_pool_unmap(pool, tt->dma_address[i],
> > nr);
> > + pt = ttm_pool_select_type(pool, caching,
> > order);
> > + } else {
> > + order = p->private;
> > + nr = (1UL << order);
> > + }
> >
> > - pt = ttm_pool_select_type(pool, caching, order);
> > if (pt)
> > - ttm_pool_type_give(pt, *pages);
> > + ttm_pool_type_give(pt, p);
> > else
> > - ttm_pool_free_page(pool, caching, order,
> > *pages);
> > + ttm_pool_free_page(pool, caching, order,
> > p);
> > }
> > }
> >
> > @@ -453,9 +598,36 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > struct ttm_tt *tt,
> > else
> > gfp_flags |= GFP_HIGHUSER;
> >
> > - for (order = min_t(unsigned int, MAX_PAGE_ORDER,
> > __fls(num_pages));
> > - num_pages;
> > - order = min_t(unsigned int, order, __fls(num_pages)))
> > {
> > + order = min_t(unsigned int, MAX_PAGE_ORDER,
> > __fls(num_pages));
> > +
> > + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
> > + if (!tt->restore) {
> > + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
> > +
> > + if (ctx->gfp_retry_mayfail)
> > + gfp |= __GFP_RETRY_MAYFAIL;
> > +
> > + tt->restore =
> > + kvzalloc(struct_size(tt->restore,
> > old_pages,
> > + (size_t)1 <<
> > order), gfp);
> > + if (!tt->restore)
> > + return -ENOMEM;
> > + } else if (ttm_pool_restore_valid(tt->restore)) {
> > + struct ttm_pool_tt_restore *restore = tt-
> > >restore;
> > +
> > + num_pages -= restore->alloced_pages;
> > + order = min_t(unsigned int, order,
> > __fls(num_pages));
> > + pages += restore->alloced_pages;
> > + r = ttm_pool_restore_tt(restore, tt-
> > >backup, ctx);
> > + if (r)
> > + return r;
> > + caching = restore->caching_divide;
> > + }
> > +
> > + tt->restore->pool = pool;
> > + }
> > +
> > + for (; num_pages; order = min_t(unsigned int, order,
> > __fls(num_pages))) {
> > struct ttm_pool_type *pt;
> >
> > page_caching = tt->caching;
> > @@ -472,11 +644,19 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > struct ttm_tt *tt,
> > r = ttm_pool_page_allocated(pool,
> > order, p,
> >
> > &dma_addr,
> >
> > &num_pages,
> > -
> > &pages);
> > +
> > &pages,
> > + tt-
> > >restore);
> > if (r)
> > goto error_free_page;
> >
> > caching = pages;
> > + if (ttm_pool_restore_valid(tt-
> > >restore)) {
> > + r =
> > ttm_pool_restore_tt(tt->restore, tt->backup,
> > + ct
> > x);
> > + if (r)
> > + goto
> > error_free_all;
> > + }
> > +
> > if (num_pages < (1 << order))
> > break;
> >
> > @@ -496,9 +676,17 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > struct ttm_tt *tt,
> > caching = pages;
> > }
> > r = ttm_pool_page_allocated(pool, order,
> > p, &dma_addr,
> > - &num_pages,
> > &pages);
> > + &num_pages,
> > &pages,
> > + tt->restore);
> > if (r)
> > goto error_free_page;
> > +
> > + if (ttm_pool_restore_valid(tt->restore)) {
> > + r = ttm_pool_restore_tt(tt-
> > >restore, tt->backup, ctx);
> > + if (r)
> > + goto error_free_all;
> > + }
> > +
> > if (PageHighMem(p))
> > caching = pages;
> > }
> > @@ -517,12 +705,26 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > struct ttm_tt *tt,
> > if (r)
> > goto error_free_all;
> >
> > + if (tt->restore) {
> > + kvfree(tt->restore);
> > + tt->restore = NULL;
> > + }
> > +
> > + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP)
> > + tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP |
> > + TTM_TT_FLAG_SWAPPED);
> > +
> > return 0;
> >
> > error_free_page:
> > ttm_pool_free_page(pool, page_caching, order, p);
> >
> > error_free_all:
> > + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
> > + tt->restore->caching_divide = caching;
> > + return r;
> > + }
> > +
> > num_pages = tt->num_pages - num_pages;
> > caching_divide = caching - tt->pages;
> > ttm_pool_free_range(pool, tt, tt->caching, 0,
> > caching_divide);
> > @@ -549,6 +751,172 @@ void ttm_pool_free(struct ttm_pool *pool,
> > struct ttm_tt *tt)
> > }
> > EXPORT_SYMBOL(ttm_pool_free);
> >
> > +/**
> > + * ttm_pool_release_backed_up() - Release content of a swapped-out
> > struct ttm_tt
> > + * @tt: The struct ttm_tt.
> > + *
> > + * Release handles with associated content or any remaining pages
> > of
> > + * a backed-up struct ttm_tt.
> > + */
> > +void ttm_pool_release_backed_up(struct ttm_tt *tt)
> > +{
> > + struct ttm_backup *backup = tt->backup;
> > + struct ttm_pool_tt_restore *restore;
> > + pgoff_t i, start_page = 0;
> > + unsigned long handle;
> > +
> > + if (!(tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
> > + return;
> > +
> > + restore = tt->restore;
> > +
> > + if (ttm_pool_restore_valid(restore)) {
> > + pgoff_t nr = 1UL << restore->order;
> > +
> > + for (i = restore->restored_pages; i < nr; ++i) {
> > + struct page *p = restore->old_pages[i];
> > +
> > + if (ttm_backup_page_ptr_is_handle(p)) {
> > + handle =
> > ttm_backup_page_ptr_to_handle(p);
> > + if (handle == 0)
> > + continue;
> > +
> > + backup->ops->drop(backup, handle);
> > + } else if (p) {
> > + ttm_pool_split_for_swap(restore-
> > >pool, p);
> > + __free_pages(p, 0);
> > + }
> > + }
> > + }
> > +
> > + if (restore) {
> > + pgoff_t mid = restore->caching_divide - tt->pages;
> > +
> > + start_page = restore->alloced_pages;
> > + /* Pages that might be dma-mapped and non-cached
> > */
> > + ttm_pool_free_range(restore->pool, tt, tt-
> > >caching,
> > + 0, mid);
> > + /* Pages that might be dma-mapped but cached */
> > + ttm_pool_free_range(restore->pool, tt, ttm_cached,
> > + mid, restore->alloced_pages);
> > + }
> > +
> > + /* Shrunken pages. Cached and not dma-mapped. */
> > + ttm_pool_free_range(NULL, tt, ttm_cached, start_page, tt-
> > >num_pages);
> > +
> > + if (restore) {
> > + kvfree(restore);
> > + tt->restore = NULL;
> > + }
> > +
> > + tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP |
> > TTM_TT_FLAG_SWAPPED);
> > +}
> > +
> > +/**
> > + * ttm_pool_backup_tt() - Back up or purge a struct ttm_tt
> > + * @pool: The pool used when allocating the struct ttm_tt.
> > + * @ttm: The struct ttm_tt.
> > + * @flags: Flags to govern the backup behaviour.
> > + *
> > + * Back up or purge a struct ttm_tt. If @purge is true, then
> > + * all pages will be freed directly to the system rather than to
> > the pool
> > + * they were allocated from, making the function behave similarly
> > to
> > + * ttm_pool_free(). If @purge is false the pages will be backed up
> > instead,
> > + * exchanged for handles.
> > + * A subsequent call to ttm_pool_alloc() will then read back the
> > content and
> > + * a subsequent call to ttm_pool_release_shrunken() will drop it.
> > + * If backup of a page fails for whatever reason, @ttm will still
> > be
> > + * partially backed up, retaining those pages for which backup
> > fails.
> > + *
> > + * Return: Number of pages actually backed up or freed, or
> > negative
> > + * error code on error.
> > + */
> > +long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
> > + const struct ttm_backup_flags *flags)
> > +{
> > + struct ttm_backup *backup = ttm->backup;
> > + struct page *page;
> > + unsigned long handle;
> > + gfp_t alloc_gfp;
> > + gfp_t gfp;
> > + int ret = 0;
> > + pgoff_t shrunken = 0;
> > + pgoff_t i, num_pages;
> > +
> > + if ((!get_nr_swap_pages() && !flags->purge) ||
> > + pool->use_dma_alloc ||
> > + (ttm->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
> > + return -EBUSY;
> > +
> > +#ifdef CONFIG_X86
> > + /* Anything returned to the system needs to be cached. */
> > + if (ttm->caching != ttm_cached)
> > + set_pages_array_wb(ttm->pages, ttm->num_pages);
> > +#endif
> > +
> > + if (ttm->dma_address || flags->purge) {
> > + for (i = 0; i < ttm->num_pages; i += num_pages) {
> > + unsigned int order;
> > +
> > + page = ttm->pages[i];
> > + if (unlikely(!page)) {
> > + num_pages = 1;
> > + continue;
> > + }
> > +
> > + order = ttm_pool_page_order(pool, page);
> > + num_pages = 1UL << order;
> > + if (ttm->dma_address)
> > + ttm_pool_unmap(pool, ttm-
> > >dma_address[i],
> > + num_pages);
> > + if (flags->purge) {
> > + shrunken += num_pages;
> > + page->private = 0;
> > + __free_pages(page, order);
> > + memset(ttm->pages + i, 0,
> > + num_pages * sizeof(*ttm-
> > >pages));
> > + }
> > + }
> > + }
> > +
> > + if (flags->purge)
> > + return shrunken;
> > +
> > + if (pool->use_dma32)
> > + gfp = GFP_DMA32;
> > + else
> > + gfp = GFP_HIGHUSER;
> > +
> > + alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN |
> > __GFP_RETRY_MAYFAIL;
> > +
> > + for (i = 0; i < ttm->num_pages; ++i) {
> > + page = ttm->pages[i];
> > + if (unlikely(!page))
> > + continue;
> > +
> > + ttm_pool_split_for_swap(pool, page);
> > +
> > + handle = backup->ops->backup_page(backup, page,
> > flags->writeback, i,
> > + gfp, alloc_gfp);
> > + if (handle) {
> > + ttm->pages[i] =
> > ttm_backup_handle_to_page_ptr(handle);
> > + put_page(page);
> > + shrunken++;
> > + } else {
> > + /* We allow partially shrunken tts */
> > + ret = -ENOMEM;
> > + break;
> > + }
> > + cond_resched();
> > + }
> > +
> > + if (shrunken)
> > + ttm->page_flags |= (TTM_TT_FLAG_PRIV_BACKED_UP |
> > + TTM_TT_FLAG_SWAPPED);
> > +
> > + return shrunken ? shrunken : ret;
> > +}
> > +
> > /**
> > * ttm_pool_init - Initialize a pool
> > *
> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c
> > b/drivers/gpu/drm/ttm/ttm_tt.c
> > index 4b51b9023126..f520b8c93f03 100644
> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > @@ -40,6 +40,7 @@
> > #include <drm/drm_cache.h>
> > #include <drm/drm_device.h>
> > #include <drm/drm_util.h>
> > +#include <drm/ttm/ttm_backup.h>
> > #include <drm/ttm/ttm_bo.h>
> > #include <drm/ttm/ttm_tt.h>
> >
> > @@ -158,6 +159,8 @@ static void ttm_tt_init_fields(struct ttm_tt
> > *ttm,
> > ttm->swap_storage = NULL;
> > ttm->sg = bo->sg;
> > ttm->caching = caching;
> > + ttm->restore = NULL;
> > + ttm->backup = NULL;
> > }
> >
> > int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> > @@ -182,6 +185,12 @@ void ttm_tt_fini(struct ttm_tt *ttm)
> > fput(ttm->swap_storage);
> > ttm->swap_storage = NULL;
> >
> > + ttm_pool_release_backed_up(ttm);
> > + if (ttm->backup) {
>
> Sorry for the conflicting comments in the last rev, but my last one
> here
> [1] regardind making this fully driver-owned didn’t receive a
> response.
> I’ll state it again: Do you think the backup fini should be owned by
> the
> driver? This would allow the driver to use a global backup for all TT
> if
> it wanted to. It would also make it consistent in the sense that the
> driver would own both the allocation and fini of the backup.
>
> Matt
>
> [1]
> https://patchwork.freedesktop.org/patch/602165/?series=131815&rev=6#comment_1104556
Sorry, I typed a lengthy reply to that comment but it somehow got lost.
Anyway, a single backup structure is, I think, possible with the
current implementation as well. In fact I use to have one of those
around for the direct-to-swap-cache implementation; basically the
backup_fini was a NOP IIRC.
One could also imagine the driver registering a large NVME file to use
for backup, In this each subclassed backup struct would probably want
to carry a refcount to the underlying file and each page position in
the file would be allocated using a struct ida or similar..
So for making it driver owned, I'm not completely following what you
mean. I noted in the docs that the driver needs to assign this value,
but thinking of it that should probably be done using the ttm_tt_init()
helper. Although that will require changing the ttm_tt_init() interface
for all drivers.
But if I missed the point, please get back. Meanwhile, I'm sending out
v9 with a couple of compilation fixes.
Thanks,
Thomas
>
> > + ttm->backup->ops->fini(ttm->backup);
> > + ttm->backup = NULL;
> > + }
> > +
> > if (ttm->pages)
> > kvfree(ttm->pages);
> > else
> > @@ -253,6 +262,34 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
> > }
> > EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_tt_swapin);
> >
> > +/**
> > + * ttm_tt_backup() - Helper to back up a struct ttm_tt.
> > + * @bdev: The TTM device.
> > + * @tt: The struct ttm_tt.
> > + * @flags: Flags that govern the backup behaviour.
> > + *
> > + * Update the page accounting and call ttm_pool_shrink_tt to free
> > pages
> > + * or back them up.
> > + *
> > + * Return: Number of pages freed or swapped out, or negative error
> > code on
> > + * error.
> > + */
> > +long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
> > + const struct ttm_backup_flags flags)
> > +{
> > + long ret;
> > +
> > + if (WARN_ON(IS_ERR_OR_NULL(tt->backup)))
> > + return 0;
> > +
> > + ret = ttm_pool_backup_tt(&bdev->pool, tt, &flags);
> > +
> > + if (ret > 0)
> > + tt->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED;
> > +
> > + return ret;
> > +}
> > +
> > /**
> > * ttm_tt_swapout - swap out tt object
> > *
> > diff --git a/include/drm/ttm/ttm_pool.h
> > b/include/drm/ttm/ttm_pool.h
> > index 160d954a261e..3112a4be835c 100644
> > --- a/include/drm/ttm/ttm_pool.h
> > +++ b/include/drm/ttm/ttm_pool.h
> > @@ -33,6 +33,7 @@
> >
> > struct device;
> > struct seq_file;
> > +struct ttm_backup_flags;
> > struct ttm_operation_ctx;
> > struct ttm_pool;
> > struct ttm_tt;
> > @@ -89,6 +90,11 @@ void ttm_pool_fini(struct ttm_pool *pool);
> >
> > int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
> >
> > +void ttm_pool_release_backed_up(struct ttm_tt *tt);
> > +
> > +long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
> > + const struct ttm_backup_flags *flags);
> > +
> > int ttm_pool_mgr_init(unsigned long num_pages);
> > void ttm_pool_mgr_fini(void);
> >
> > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> > index 2b9d856ff388..e42a75cff502 100644
> > --- a/include/drm/ttm/ttm_tt.h
> > +++ b/include/drm/ttm/ttm_tt.h
> > @@ -32,11 +32,13 @@
> > #include <drm/ttm/ttm_caching.h>
> > #include <drm/ttm/ttm_kmap_iter.h>
> >
> > +struct ttm_backup;
> > struct ttm_device;
> > struct ttm_tt;
> > struct ttm_resource;
> > struct ttm_buffer_object;
> > struct ttm_operation_ctx;
> > +struct ttm_pool_tt_restore;
> >
> > /**
> > * struct ttm_tt - This is a structure holding the pages, caching-
> > and aperture
> > @@ -85,6 +87,9 @@ struct ttm_tt {
> > * fault handling abuses the DMA api a bit and
> > dma_map_attrs can't be
> > * used to assure pgprot always matches.
> > *
> > + * TTM_TT_FLAG_PRIV_BACKED_UP: TTM internal only. This is
> > set if the
> > + * struct ttm_tt has been (possibly partially) backed up.
> > + *
> > * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
> > USE. This is
> > * set by TTM after ttm_tt_populate() has successfully
> > returned, and is
> > * then unset when TTM calls ttm_tt_unpopulate().
> > @@ -96,6 +101,7 @@ struct ttm_tt {
> > #define TTM_TT_FLAG_DECRYPTED BIT(4)
> >
> > #define TTM_TT_FLAG_PRIV_POPULATED BIT(5)
> > +#define TTM_TT_FLAG_PRIV_BACKED_UP BIT(6)
> > uint32_t page_flags;
> > /** @num_pages: Number of pages in the page array. */
> > uint32_t num_pages;
> > @@ -105,11 +111,19 @@ struct ttm_tt {
> > dma_addr_t *dma_address;
> > /** @swap_storage: Pointer to shmem struct file for swap
> > storage. */
> > struct file *swap_storage;
> > + /**
> > + * @backup: Pointer to backup struct for backed up tts.
> > + * Could be unified with @swap_storage. Meanwhile, this is
> > + * a driver-owned field.
> > + */
> > + struct ttm_backup *backup;
> > /**
> > * @caching: The current caching state of the pages, see
> > enum
> > * ttm_caching.
> > */
> > enum ttm_caching caching;
> > + /** @restore: Partial restoration from backup state. TTM
> > private */
> > + struct ttm_pool_tt_restore *restore;
> > };
> >
> > /**
> > @@ -230,6 +244,21 @@ void ttm_tt_mgr_init(unsigned long num_pages,
> > unsigned long num_dma32_pages);
> > struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct
> > ttm_kmap_iter_tt *iter_tt,
> > struct ttm_tt *tt);
> > unsigned long ttm_tt_pages_limit(void);
> > +
> > +/**
> > + * struct ttm_backup_flags - Flags to govern backup behaviour.
> > + * @purge: Free pages without backing up. Bypass pools.
> > + * @writeback: Attempt to copy contents directly to swap space,
> > even
> > + * if that means blocking on writes to external memory.
> > + */
> > +struct ttm_backup_flags {
> > + u32 purge : 1;
> > + u32 writeback : 1;
> > +};
> > +
> > +long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
> > + const struct ttm_backup_flags flags);
> > +
> > #if IS_ENABLED(CONFIG_AGP)
> > #include <linux/agp_backend.h>
> >
> > --
> > 2.44.0
> >
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages
2024-08-19 8:07 ` Thomas Hellström
@ 2024-08-20 17:46 ` Matthew Brost
0 siblings, 0 replies; 17+ messages in thread
From: Matthew Brost @ 2024-08-20 17:46 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, Christian König, Somalapuram Amaranath, dri-devel,
Paulo Zanoni
On Mon, Aug 19, 2024 at 10:07:16AM +0200, Thomas Hellström wrote:
> Hi, Matt.
>
> On Fri, 2024-08-16 at 14:10 +0000, Matthew Brost wrote:
> > On Fri, Aug 16, 2024 at 03:37:13PM +0200, Thomas Hellström wrote:
> > > Provide a helper to shrink ttm_tt page-vectors on a per-page
> > > basis. A ttm_backup backend could then in theory get away with
> > > allocating a single temporary page for each struct ttm_tt.
> > >
> > > This is accomplished by splitting larger pages before trying to
> > > back them up.
> > >
> > > In the future we could allow ttm_backup to handle backing up
> > > large pages as well, but currently there's no benefit in
> > > doing that, since the shmem backup backend would have to
> > > split those anyway to avoid allocating too much temporary
> > > memory, and if the backend instead inserts pages into the
> > > swap-cache, those are split on reclaim by the core.
> > >
> > > Due to potential backup- and recover errors, allow partially
> > > swapped
> > > out struct ttm_tt's, although mark them as swapped out stopping
> > > them
> > > from being swapped out a second time. More details in the
> > > ttm_pool.c
> > > DOC section.
> > >
> > > v2:
> > > - A couple of cleanups and error fixes in ttm_pool_back_up_tt.
> > > - s/back_up/backup/
> > > - Add a writeback parameter to the exported interface.
> > > v8:
> > > - Use a struct for flags for readability (Matt Brost)
> > > - Address misc other review comments (Matt Brost)
> > >
> > > Cc: Christian König <christian.koenig@amd.com>
> > > Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
> > > Cc: Matthew Brost <matthew.brost@intel.com>
> > > Cc: <dri-devel@lists.freedesktop.org>
> > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > ---
> > > drivers/gpu/drm/ttm/ttm_pool.c | 394
> > > +++++++++++++++++++++++++++++++--
> > > drivers/gpu/drm/ttm/ttm_tt.c | 37 ++++
> > > include/drm/ttm/ttm_pool.h | 6 +
> > > include/drm/ttm/ttm_tt.h | 29 +++
> > > 4 files changed, 453 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
> > > b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index 8504dbe19c1a..0d224cd9f8eb 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -41,6 +41,7 @@
> > > #include <asm/set_memory.h>
> > > #endif
> > >
> > > +#include <drm/ttm/ttm_backup.h>
> > > #include <drm/ttm/ttm_pool.h>
> > > #include <drm/ttm/ttm_tt.h>
> > > #include <drm/ttm/ttm_bo.h>
> > > @@ -58,6 +59,32 @@ struct ttm_pool_dma {
> > > unsigned long vaddr;
> > > };
> > >
> > > +/**
> > > + * struct ttm_pool_tt_restore - State representing restore from
> > > backup
> > > + * @alloced_pages: Total number of already allocated pages for the
> > > ttm_tt.
> > > + * @restored_pages: Number of (sub) pages restored from swap for
> > > this
> > > + * chunk of 1 << @order pages.
> > > + * @first_page: The ttm page ptr representing for @old_pages[0].
> > > + * @caching_divide: Page pointer where subsequent pages are
> > > cached.
> > > + * @old_pages: Backup copy of page pointers that were replaced by
> > > the new
> > > + * page allocation.
> > > + * @pool: The pool used for page allocation while restoring.
> > > + * @order: The order of the last page allocated while restoring.
> > > + *
> > > + * Recovery from backup might fail when we've recovered less than
> > > the
> > > + * full ttm_tt. In order not to loose any data (yet), keep
> > > information
> > > + * around that allows us to restart a failed ttm backup recovery.
> > > + */
> > > +struct ttm_pool_tt_restore {
> > > + pgoff_t alloced_pages;
> > > + pgoff_t restored_pages;
> > > + struct page **first_page;
> > > + struct page **caching_divide;
> > > + struct ttm_pool *pool;
> > > + unsigned int order;
> > > + struct page *old_pages[];
> > > +};
> > > +
> > > static unsigned long page_pool_size;
> > >
> > > MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA
> > > pool");
> > > @@ -354,11 +381,102 @@ static unsigned int
> > > ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
> > > return p->private;
> > > }
> > >
> > > +/*
> > > + * To be able to insert single pages into backup directly,
> > > + * we need to split multi-order page allocations and make them
> > > look
> > > + * like single-page allocations.
> > > + */
> > > +static void ttm_pool_split_for_swap(struct ttm_pool *pool, struct
> > > page *p)
> > > +{
> > > + unsigned int order = ttm_pool_page_order(pool, p);
> > > + pgoff_t nr;
> > > +
> > > + if (!order)
> > > + return;
> > > +
> > > + split_page(p, order);
> > > + nr = 1UL << order;
> > > + while (nr--)
> > > + (p++)->private = 0;
> > > +}
> > > +
> > > +/**
> > > + * DOC: Partial backup and restoration of a struct ttm_tt.
> > > + *
> > > + * Swapout using ttm_backup::ops::backup_page() and swapin using
> > > + * ttm_backup::ops::copy_backed_up_page() may fail.
> > > + * The former most likely due to lack of swap-space or memory, the
> > > latter due
> > > + * to lack of memory or because of signal interruption during
> > > waits.
> > > + *
> > > + * Backupfailure is easily handled by using a ttm_tt pages vector
> > > that holds
> > > + * both swap entries and page pointers. This has to be taken into
> > > account when
> > > + * restoring such a ttm_tt from backup, and when freeing it while
> > > backed up.
> > > + * When restoring, for simplicity, new pages are actually
> > > allocated from the
> > > + * pool and the contents of any old pages are copied in and then
> > > the old pages
> > > + * are released.
> > > + *
> > > + * For restoration failures, the struct ttm_pool_tt_restore holds
> > > sufficient state
> > > + * to be able to resume an interrupted restore, and that structure
> > > is freed once
> > > + * the restoration is complete. If the struct ttm_tt is destroyed
> > > while there
> > > + * is a valid struct ttm_pool_tt_restore attached, that is also
> > > properly taken
> > > + * care of.
> > > + */
> > > +
> > > +static bool ttm_pool_restore_valid(const struct
> > > ttm_pool_tt_restore *restore)
> > > +{
> > > + return restore && restore->restored_pages < (1 << restore-
> > > >order);
> > > +}
> > > +
> > > +static int ttm_pool_restore_tt(struct ttm_pool_tt_restore
> > > *restore,
> > > + struct ttm_backup *backup,
> > > + struct ttm_operation_ctx *ctx)
> > > +{
> > > + unsigned int i, nr = 1 << restore->order;
> > > + int ret = 0;
> > > +
> > > + if (!ttm_pool_restore_valid(restore))
> > > + return 0;
> > > +
> > > + for (i = restore->restored_pages; i < nr; ++i) {
> > > + struct page *p = restore->old_pages[i];
> > > +
> > > + if (ttm_backup_page_ptr_is_handle(p)) {
> > > + unsigned long handle =
> > > ttm_backup_page_ptr_to_handle(p);
> > > +
> > > + if (handle == 0)
> > > + continue;
> > > +
> > > + ret = backup->ops->copy_backed_up_page
> > > + (backup, restore->first_page[i],
> > > + handle, ctx->interruptible);
> > > + if (ret)
> > > + break;
> > > +
> > > + backup->ops->drop(backup, handle);
> > > + } else if (p) {
> > > + /*
> > > + * We could probably avoid splitting the
> > > old page
> > > + * using clever logic, but ATM we don't
> > > care.
> > > + */
> > > + ttm_pool_split_for_swap(restore->pool, p);
> > > + copy_highpage(restore->first_page[i], p);
> > > + __free_pages(p, 0);
> > > + }
> > > +
> > > + restore->restored_pages++;
> > > + restore->old_pages[i] = NULL;
> > > + cond_resched();
> > > + }
> > > +
> > > + return ret;
> > > +}
> > > +
> > > /* Called when we got a page, either from a pool or newly
> > > allocated */
> > > static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned
> > > int order,
> > > struct page *p, dma_addr_t
> > > **dma_addr,
> > > unsigned long *num_pages,
> > > - struct page ***pages)
> > > + struct page ***pages,
> > > + struct ttm_pool_tt_restore
> > > *restore)
> > > {
> > > unsigned int i;
> > > int r;
> > > @@ -369,6 +487,16 @@ static int ttm_pool_page_allocated(struct
> > > ttm_pool *pool, unsigned int order,
> > > return r;
> > > }
> > >
> > > + if (restore) {
> > > + memcpy(restore->old_pages, *pages,
> > > + (1 << order) * sizeof(*restore-
> > > >old_pages));
> > > + memset(*pages, 0, (1 << order) * sizeof(**pages));
> > > + restore->order = order;
> > > + restore->restored_pages = 0;
> > > + restore->first_page = *pages;
> > > + restore->alloced_pages += 1UL << order;
> > > + }
> > > +
> > > *num_pages -= 1 << order;
> > > for (i = 1 << order; i; --i, ++(*pages), ++p)
> > > **pages = p;
> > > @@ -394,22 +522,39 @@ static void ttm_pool_free_range(struct
> > > ttm_pool *pool, struct ttm_tt *tt,
> > > pgoff_t start_page, pgoff_t
> > > end_page)
> > > {
> > > struct page **pages = &tt->pages[start_page];
> > > + struct ttm_backup *backup = tt->backup;
> > > unsigned int order;
> > > pgoff_t i, nr;
> > >
> > > for (i = start_page; i < end_page; i += nr, pages += nr) {
> > > struct ttm_pool_type *pt = NULL;
> > > + struct page *p = *pages;
> > > +
> > > + if (ttm_backup_page_ptr_is_handle(p)) {
> > > + unsigned long handle =
> > > ttm_backup_page_ptr_to_handle(p);
> > > +
> > > + nr = 1;
> > > + if (handle != 0)
> > > + backup->ops->drop(backup, handle);
> > > + continue;
> > > + }
> > > +
> > > + if (pool) {
> > > + order = ttm_pool_page_order(pool, p);
> > > + nr = (1UL << order);
> > > + if (tt->dma_address)
> > > + ttm_pool_unmap(pool, tt-
> > > >dma_address[i], nr);
> > >
> > > - order = ttm_pool_page_order(pool, *pages);
> > > - nr = (1UL << order);
> > > - if (tt->dma_address)
> > > - ttm_pool_unmap(pool, tt->dma_address[i],
> > > nr);
> > > + pt = ttm_pool_select_type(pool, caching,
> > > order);
> > > + } else {
> > > + order = p->private;
> > > + nr = (1UL << order);
> > > + }
> > >
> > > - pt = ttm_pool_select_type(pool, caching, order);
> > > if (pt)
> > > - ttm_pool_type_give(pt, *pages);
> > > + ttm_pool_type_give(pt, p);
> > > else
> > > - ttm_pool_free_page(pool, caching, order,
> > > *pages);
> > > + ttm_pool_free_page(pool, caching, order,
> > > p);
> > > }
> > > }
> > >
> > > @@ -453,9 +598,36 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > > struct ttm_tt *tt,
> > > else
> > > gfp_flags |= GFP_HIGHUSER;
> > >
> > > - for (order = min_t(unsigned int, MAX_PAGE_ORDER,
> > > __fls(num_pages));
> > > - num_pages;
> > > - order = min_t(unsigned int, order, __fls(num_pages)))
> > > {
> > > + order = min_t(unsigned int, MAX_PAGE_ORDER,
> > > __fls(num_pages));
> > > +
> > > + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
> > > + if (!tt->restore) {
> > > + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
> > > +
> > > + if (ctx->gfp_retry_mayfail)
> > > + gfp |= __GFP_RETRY_MAYFAIL;
> > > +
> > > + tt->restore =
> > > + kvzalloc(struct_size(tt->restore,
> > > old_pages,
> > > + (size_t)1 <<
> > > order), gfp);
> > > + if (!tt->restore)
> > > + return -ENOMEM;
> > > + } else if (ttm_pool_restore_valid(tt->restore)) {
> > > + struct ttm_pool_tt_restore *restore = tt-
> > > >restore;
> > > +
> > > + num_pages -= restore->alloced_pages;
> > > + order = min_t(unsigned int, order,
> > > __fls(num_pages));
> > > + pages += restore->alloced_pages;
> > > + r = ttm_pool_restore_tt(restore, tt-
> > > >backup, ctx);
> > > + if (r)
> > > + return r;
> > > + caching = restore->caching_divide;
> > > + }
> > > +
> > > + tt->restore->pool = pool;
> > > + }
> > > +
> > > + for (; num_pages; order = min_t(unsigned int, order,
> > > __fls(num_pages))) {
> > > struct ttm_pool_type *pt;
> > >
> > > page_caching = tt->caching;
> > > @@ -472,11 +644,19 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > > struct ttm_tt *tt,
> > > r = ttm_pool_page_allocated(pool,
> > > order, p,
> > >
> > > &dma_addr,
> > >
> > > &num_pages,
> > > -
> > > &pages);
> > > +
> > > &pages,
> > > + tt-
> > > >restore);
> > > if (r)
> > > goto error_free_page;
> > >
> > > caching = pages;
> > > + if (ttm_pool_restore_valid(tt-
> > > >restore)) {
> > > + r =
> > > ttm_pool_restore_tt(tt->restore, tt->backup,
> > > + ct
> > > x);
> > > + if (r)
> > > + goto
> > > error_free_all;
> > > + }
> > > +
> > > if (num_pages < (1 << order))
> > > break;
> > >
> > > @@ -496,9 +676,17 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > > struct ttm_tt *tt,
> > > caching = pages;
> > > }
> > > r = ttm_pool_page_allocated(pool, order,
> > > p, &dma_addr,
> > > - &num_pages,
> > > &pages);
> > > + &num_pages,
> > > &pages,
> > > + tt->restore);
> > > if (r)
> > > goto error_free_page;
> > > +
> > > + if (ttm_pool_restore_valid(tt->restore)) {
> > > + r = ttm_pool_restore_tt(tt-
> > > >restore, tt->backup, ctx);
> > > + if (r)
> > > + goto error_free_all;
> > > + }
> > > +
> > > if (PageHighMem(p))
> > > caching = pages;
> > > }
> > > @@ -517,12 +705,26 @@ int ttm_pool_alloc(struct ttm_pool *pool,
> > > struct ttm_tt *tt,
> > > if (r)
> > > goto error_free_all;
> > >
> > > + if (tt->restore) {
> > > + kvfree(tt->restore);
> > > + tt->restore = NULL;
> > > + }
> > > +
> > > + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP)
> > > + tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP |
> > > + TTM_TT_FLAG_SWAPPED);
> > > +
> > > return 0;
> > >
> > > error_free_page:
> > > ttm_pool_free_page(pool, page_caching, order, p);
> > >
> > > error_free_all:
> > > + if (tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP) {
> > > + tt->restore->caching_divide = caching;
> > > + return r;
> > > + }
> > > +
> > > num_pages = tt->num_pages - num_pages;
> > > caching_divide = caching - tt->pages;
> > > ttm_pool_free_range(pool, tt, tt->caching, 0,
> > > caching_divide);
> > > @@ -549,6 +751,172 @@ void ttm_pool_free(struct ttm_pool *pool,
> > > struct ttm_tt *tt)
> > > }
> > > EXPORT_SYMBOL(ttm_pool_free);
> > >
> > > +/**
> > > + * ttm_pool_release_backed_up() - Release content of a swapped-out
> > > struct ttm_tt
> > > + * @tt: The struct ttm_tt.
> > > + *
> > > + * Release handles with associated content or any remaining pages
> > > of
> > > + * a backed-up struct ttm_tt.
> > > + */
> > > +void ttm_pool_release_backed_up(struct ttm_tt *tt)
> > > +{
> > > + struct ttm_backup *backup = tt->backup;
> > > + struct ttm_pool_tt_restore *restore;
> > > + pgoff_t i, start_page = 0;
> > > + unsigned long handle;
> > > +
> > > + if (!(tt->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
> > > + return;
> > > +
> > > + restore = tt->restore;
> > > +
> > > + if (ttm_pool_restore_valid(restore)) {
> > > + pgoff_t nr = 1UL << restore->order;
> > > +
> > > + for (i = restore->restored_pages; i < nr; ++i) {
> > > + struct page *p = restore->old_pages[i];
> > > +
> > > + if (ttm_backup_page_ptr_is_handle(p)) {
> > > + handle =
> > > ttm_backup_page_ptr_to_handle(p);
> > > + if (handle == 0)
> > > + continue;
> > > +
> > > + backup->ops->drop(backup, handle);
> > > + } else if (p) {
> > > + ttm_pool_split_for_swap(restore-
> > > >pool, p);
> > > + __free_pages(p, 0);
> > > + }
> > > + }
> > > + }
> > > +
> > > + if (restore) {
> > > + pgoff_t mid = restore->caching_divide - tt->pages;
> > > +
> > > + start_page = restore->alloced_pages;
> > > + /* Pages that might be dma-mapped and non-cached
> > > */
> > > + ttm_pool_free_range(restore->pool, tt, tt-
> > > >caching,
> > > + 0, mid);
> > > + /* Pages that might be dma-mapped but cached */
> > > + ttm_pool_free_range(restore->pool, tt, ttm_cached,
> > > + mid, restore->alloced_pages);
> > > + }
> > > +
> > > + /* Shrunken pages. Cached and not dma-mapped. */
> > > + ttm_pool_free_range(NULL, tt, ttm_cached, start_page, tt-
> > > >num_pages);
> > > +
> > > + if (restore) {
> > > + kvfree(restore);
> > > + tt->restore = NULL;
> > > + }
> > > +
> > > + tt->page_flags &= ~(TTM_TT_FLAG_PRIV_BACKED_UP |
> > > TTM_TT_FLAG_SWAPPED);
> > > +}
> > > +
> > > +/**
> > > + * ttm_pool_backup_tt() - Back up or purge a struct ttm_tt
> > > + * @pool: The pool used when allocating the struct ttm_tt.
> > > + * @ttm: The struct ttm_tt.
> > > + * @flags: Flags to govern the backup behaviour.
> > > + *
> > > + * Back up or purge a struct ttm_tt. If @purge is true, then
> > > + * all pages will be freed directly to the system rather than to
> > > the pool
> > > + * they were allocated from, making the function behave similarly
> > > to
> > > + * ttm_pool_free(). If @purge is false the pages will be backed up
> > > instead,
> > > + * exchanged for handles.
> > > + * A subsequent call to ttm_pool_alloc() will then read back the
> > > content and
> > > + * a subsequent call to ttm_pool_release_shrunken() will drop it.
> > > + * If backup of a page fails for whatever reason, @ttm will still
> > > be
> > > + * partially backed up, retaining those pages for which backup
> > > fails.
> > > + *
> > > + * Return: Number of pages actually backed up or freed, or
> > > negative
> > > + * error code on error.
> > > + */
> > > +long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
> > > + const struct ttm_backup_flags *flags)
> > > +{
> > > + struct ttm_backup *backup = ttm->backup;
> > > + struct page *page;
> > > + unsigned long handle;
> > > + gfp_t alloc_gfp;
> > > + gfp_t gfp;
> > > + int ret = 0;
> > > + pgoff_t shrunken = 0;
> > > + pgoff_t i, num_pages;
> > > +
> > > + if ((!get_nr_swap_pages() && !flags->purge) ||
> > > + pool->use_dma_alloc ||
> > > + (ttm->page_flags & TTM_TT_FLAG_PRIV_BACKED_UP))
> > > + return -EBUSY;
> > > +
> > > +#ifdef CONFIG_X86
> > > + /* Anything returned to the system needs to be cached. */
> > > + if (ttm->caching != ttm_cached)
> > > + set_pages_array_wb(ttm->pages, ttm->num_pages);
> > > +#endif
> > > +
> > > + if (ttm->dma_address || flags->purge) {
> > > + for (i = 0; i < ttm->num_pages; i += num_pages) {
> > > + unsigned int order;
> > > +
> > > + page = ttm->pages[i];
> > > + if (unlikely(!page)) {
> > > + num_pages = 1;
> > > + continue;
> > > + }
> > > +
> > > + order = ttm_pool_page_order(pool, page);
> > > + num_pages = 1UL << order;
> > > + if (ttm->dma_address)
> > > + ttm_pool_unmap(pool, ttm-
> > > >dma_address[i],
> > > + num_pages);
> > > + if (flags->purge) {
> > > + shrunken += num_pages;
> > > + page->private = 0;
> > > + __free_pages(page, order);
> > > + memset(ttm->pages + i, 0,
> > > + num_pages * sizeof(*ttm-
> > > >pages));
> > > + }
> > > + }
> > > + }
> > > +
> > > + if (flags->purge)
> > > + return shrunken;
> > > +
> > > + if (pool->use_dma32)
> > > + gfp = GFP_DMA32;
> > > + else
> > > + gfp = GFP_HIGHUSER;
> > > +
> > > + alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN |
> > > __GFP_RETRY_MAYFAIL;
> > > +
> > > + for (i = 0; i < ttm->num_pages; ++i) {
> > > + page = ttm->pages[i];
> > > + if (unlikely(!page))
> > > + continue;
> > > +
> > > + ttm_pool_split_for_swap(pool, page);
> > > +
> > > + handle = backup->ops->backup_page(backup, page,
> > > flags->writeback, i,
> > > + gfp, alloc_gfp);
> > > + if (handle) {
> > > + ttm->pages[i] =
> > > ttm_backup_handle_to_page_ptr(handle);
> > > + put_page(page);
> > > + shrunken++;
> > > + } else {
> > > + /* We allow partially shrunken tts */
> > > + ret = -ENOMEM;
> > > + break;
> > > + }
> > > + cond_resched();
> > > + }
> > > +
> > > + if (shrunken)
> > > + ttm->page_flags |= (TTM_TT_FLAG_PRIV_BACKED_UP |
> > > + TTM_TT_FLAG_SWAPPED);
> > > +
> > > + return shrunken ? shrunken : ret;
> > > +}
> > > +
> > > /**
> > > * ttm_pool_init - Initialize a pool
> > > *
> > > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c
> > > b/drivers/gpu/drm/ttm/ttm_tt.c
> > > index 4b51b9023126..f520b8c93f03 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > > @@ -40,6 +40,7 @@
> > > #include <drm/drm_cache.h>
> > > #include <drm/drm_device.h>
> > > #include <drm/drm_util.h>
> > > +#include <drm/ttm/ttm_backup.h>
> > > #include <drm/ttm/ttm_bo.h>
> > > #include <drm/ttm/ttm_tt.h>
> > >
> > > @@ -158,6 +159,8 @@ static void ttm_tt_init_fields(struct ttm_tt
> > > *ttm,
> > > ttm->swap_storage = NULL;
> > > ttm->sg = bo->sg;
> > > ttm->caching = caching;
> > > + ttm->restore = NULL;
> > > + ttm->backup = NULL;
> > > }
> > >
> > > int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo,
> > > @@ -182,6 +185,12 @@ void ttm_tt_fini(struct ttm_tt *ttm)
> > > fput(ttm->swap_storage);
> > > ttm->swap_storage = NULL;
> > >
> > > + ttm_pool_release_backed_up(ttm);
> > > + if (ttm->backup) {
> >
> > Sorry for the conflicting comments in the last rev, but my last one
> > here
> > [1] regardind making this fully driver-owned didn’t receive a
> > response.
> > I’ll state it again: Do you think the backup fini should be owned by
> > the
> > driver? This would allow the driver to use a global backup for all TT
> > if
> > it wanted to. It would also make it consistent in the sense that the
> > driver would own both the allocation and fini of the backup.
> >
> > Matt
> >
> > [1]
> > https://patchwork.freedesktop.org/patch/602165/?series=131815&rev=6#comment_1104556
>
> Sorry, I typed a lengthy reply to that comment but it somehow got lost.
>
> Anyway, a single backup structure is, I think, possible with the
> current implementation as well. In fact I use to have one of those
> around for the direct-to-swap-cache implementation; basically the
> backup_fini was a NOP IIRC.
>
> One could also imagine the driver registering a large NVME file to use
> for backup, In this each subclassed backup struct would probably want
> to carry a refcount to the underlying file and each page position in
> the file would be allocated using a struct ida or similar..
>
> So for making it driver owned, I'm not completely following what you
> mean. I noted in the docs that the driver needs to assign this value,
> but thinking of it that should probably be done using the ttm_tt_init()
> helper. Although that will require changing the ttm_tt_init() interface
> for all drivers.
>
> But if I missed the point, please get back. Meanwhile, I'm sending out
> v9 with a couple of compilation fixes.
>
I think this makes sense, just want to make sure the design was flexible
enough for all possible use cases. I think you have convienced me it is.
Matt
> Thanks,
> Thomas
>
>
> >
> > > + ttm->backup->ops->fini(ttm->backup);
> > > + ttm->backup = NULL;
> > > + }
> > > +
> > > if (ttm->pages)
> > > kvfree(ttm->pages);
> > > else
> > > @@ -253,6 +262,34 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
> > > }
> > > EXPORT_SYMBOL_FOR_TESTS_ONLY(ttm_tt_swapin);
> > >
> > > +/**
> > > + * ttm_tt_backup() - Helper to back up a struct ttm_tt.
> > > + * @bdev: The TTM device.
> > > + * @tt: The struct ttm_tt.
> > > + * @flags: Flags that govern the backup behaviour.
> > > + *
> > > + * Update the page accounting and call ttm_pool_shrink_tt to free
> > > pages
> > > + * or back them up.
> > > + *
> > > + * Return: Number of pages freed or swapped out, or negative error
> > > code on
> > > + * error.
> > > + */
> > > +long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
> > > + const struct ttm_backup_flags flags)
> > > +{
> > > + long ret;
> > > +
> > > + if (WARN_ON(IS_ERR_OR_NULL(tt->backup)))
> > > + return 0;
> > > +
> > > + ret = ttm_pool_backup_tt(&bdev->pool, tt, &flags);
> > > +
> > > + if (ret > 0)
> > > + tt->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED;
> > > +
> > > + return ret;
> > > +}
> > > +
> > > /**
> > > * ttm_tt_swapout - swap out tt object
> > > *
> > > diff --git a/include/drm/ttm/ttm_pool.h
> > > b/include/drm/ttm/ttm_pool.h
> > > index 160d954a261e..3112a4be835c 100644
> > > --- a/include/drm/ttm/ttm_pool.h
> > > +++ b/include/drm/ttm/ttm_pool.h
> > > @@ -33,6 +33,7 @@
> > >
> > > struct device;
> > > struct seq_file;
> > > +struct ttm_backup_flags;
> > > struct ttm_operation_ctx;
> > > struct ttm_pool;
> > > struct ttm_tt;
> > > @@ -89,6 +90,11 @@ void ttm_pool_fini(struct ttm_pool *pool);
> > >
> > > int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);
> > >
> > > +void ttm_pool_release_backed_up(struct ttm_tt *tt);
> > > +
> > > +long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
> > > + const struct ttm_backup_flags *flags);
> > > +
> > > int ttm_pool_mgr_init(unsigned long num_pages);
> > > void ttm_pool_mgr_fini(void);
> > >
> > > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> > > index 2b9d856ff388..e42a75cff502 100644
> > > --- a/include/drm/ttm/ttm_tt.h
> > > +++ b/include/drm/ttm/ttm_tt.h
> > > @@ -32,11 +32,13 @@
> > > #include <drm/ttm/ttm_caching.h>
> > > #include <drm/ttm/ttm_kmap_iter.h>
> > >
> > > +struct ttm_backup;
> > > struct ttm_device;
> > > struct ttm_tt;
> > > struct ttm_resource;
> > > struct ttm_buffer_object;
> > > struct ttm_operation_ctx;
> > > +struct ttm_pool_tt_restore;
> > >
> > > /**
> > > * struct ttm_tt - This is a structure holding the pages, caching-
> > > and aperture
> > > @@ -85,6 +87,9 @@ struct ttm_tt {
> > > * fault handling abuses the DMA api a bit and
> > > dma_map_attrs can't be
> > > * used to assure pgprot always matches.
> > > *
> > > + * TTM_TT_FLAG_PRIV_BACKED_UP: TTM internal only. This is
> > > set if the
> > > + * struct ttm_tt has been (possibly partially) backed up.
> > > + *
> > > * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
> > > USE. This is
> > > * set by TTM after ttm_tt_populate() has successfully
> > > returned, and is
> > > * then unset when TTM calls ttm_tt_unpopulate().
> > > @@ -96,6 +101,7 @@ struct ttm_tt {
> > > #define TTM_TT_FLAG_DECRYPTED BIT(4)
> > >
> > > #define TTM_TT_FLAG_PRIV_POPULATED BIT(5)
> > > +#define TTM_TT_FLAG_PRIV_BACKED_UP BIT(6)
> > > uint32_t page_flags;
> > > /** @num_pages: Number of pages in the page array. */
> > > uint32_t num_pages;
> > > @@ -105,11 +111,19 @@ struct ttm_tt {
> > > dma_addr_t *dma_address;
> > > /** @swap_storage: Pointer to shmem struct file for swap
> > > storage. */
> > > struct file *swap_storage;
> > > + /**
> > > + * @backup: Pointer to backup struct for backed up tts.
> > > + * Could be unified with @swap_storage. Meanwhile, this is
> > > + * a driver-owned field.
> > > + */
> > > + struct ttm_backup *backup;
> > > /**
> > > * @caching: The current caching state of the pages, see
> > > enum
> > > * ttm_caching.
> > > */
> > > enum ttm_caching caching;
> > > + /** @restore: Partial restoration from backup state. TTM
> > > private */
> > > + struct ttm_pool_tt_restore *restore;
> > > };
> > >
> > > /**
> > > @@ -230,6 +244,21 @@ void ttm_tt_mgr_init(unsigned long num_pages,
> > > unsigned long num_dma32_pages);
> > > struct ttm_kmap_iter *ttm_kmap_iter_tt_init(struct
> > > ttm_kmap_iter_tt *iter_tt,
> > > struct ttm_tt *tt);
> > > unsigned long ttm_tt_pages_limit(void);
> > > +
> > > +/**
> > > + * struct ttm_backup_flags - Flags to govern backup behaviour.
> > > + * @purge: Free pages without backing up. Bypass pools.
> > > + * @writeback: Attempt to copy contents directly to swap space,
> > > even
> > > + * if that means blocking on writes to external memory.
> > > + */
> > > +struct ttm_backup_flags {
> > > + u32 purge : 1;
> > > + u32 writeback : 1;
> > > +};
> > > +
> > > +long ttm_tt_backup(struct ttm_device *bdev, struct ttm_tt *tt,
> > > + const struct ttm_backup_flags flags);
> > > +
> > > #if IS_ENABLED(CONFIG_AGP)
> > > #include <linux/agp_backend.h>
> > >
> > > --
> > > 2.44.0
> > >
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
2024-08-16 13:37 ` [PATCH v8 1/6] drm/ttm: Add a virtual base class for graphics memory backup Thomas Hellström
2024-08-16 13:37 ` [PATCH v8 2/6] drm/ttm/pool: Provide a helper to shrink pages Thomas Hellström
@ 2024-08-16 13:37 ` Thomas Hellström
2024-08-16 16:32 ` kernel test robot
2024-08-16 17:35 ` kernel test robot
2024-08-16 13:37 ` [PATCH v8 4/6] drm/ttm: Add a shrinker helper and export the LRU walker for driver use Thomas Hellström
` (5 subsequent siblings)
8 siblings, 2 replies; 17+ messages in thread
From: Thomas Hellström @ 2024-08-16 13:37 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Use fault-injection to test partial TTM swapout and interrupted swapin.
Return -EINTR for swapin to test the callers ability to handle and
restart the swapin, and on swapout perform a partial swapout to test that
the swapin and release_shrunken functionality.
v8:
- Use the core fault-injection system.
Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v7
---
drivers/gpu/drm/ttm/ttm_pool.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 0d224cd9f8eb..0824b66a9aac 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -48,6 +48,11 @@
#include "ttm_module.h"
+#ifdef CONFIG_FAULT_INJECTION
+#include <linux/fault-inject.h>
+static DECLARE_FAULT_ATTR(backup_fault_inject);
+#endif
+
/**
* struct ttm_pool_dma - Helper object for coherent DMA mappings
*
@@ -431,6 +436,7 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
struct ttm_backup *backup,
struct ttm_operation_ctx *ctx)
{
+ static unsigned long __maybe_unused swappedin;
unsigned int i, nr = 1 << restore->order;
int ret = 0;
@@ -446,6 +452,12 @@ static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
if (handle == 0)
continue;
+ if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
+ should_fail(&backup_fault_inject, 1)) {
+ ret = -EINTR;
+ break;
+ }
+
ret = backup->ops->copy_backed_up_page
(backup, restore->first_page[i],
handle, ctx->interruptible);
@@ -889,7 +901,14 @@ long ttm_pool_backup_tt(struct ttm_pool *pool, struct ttm_tt *ttm,
alloc_gfp = GFP_KERNEL | __GFP_HIGH | __GFP_NOWARN | __GFP_RETRY_MAYFAIL;
- for (i = 0; i < ttm->num_pages; ++i) {
+ num_pages = ttm->num_pages;
+
+ /* Pretend doing fault injection by shrinking only half of the pages. */
+
+ if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
+ num_pages = DIV_ROUND_UP(num_pages, 2);
+
+ for (i = 0; i < num_pages; ++i) {
page = ttm->pages[i];
if (unlikely(!page))
continue;
@@ -1178,6 +1197,10 @@ int ttm_pool_mgr_init(unsigned long num_pages)
&ttm_pool_debugfs_globals_fops);
debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL,
&ttm_pool_debugfs_shrink_fops);
+#ifdef CONFIG_FAULT_INJECTION
+ fault_create_debugfs_attr("backup_fault_inject", ttm_debugfs_root,
+ &backup_fault_inject);
+#endif
#endif
mm_shrinker = shrinker_alloc(0, "drm-ttm_pool");
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
2024-08-16 13:37 ` [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths Thomas Hellström
@ 2024-08-16 16:32 ` kernel test robot
2024-08-16 17:35 ` kernel test robot
1 sibling, 0 replies; 17+ messages in thread
From: kernel test robot @ 2024-08-16 16:32 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: oe-kbuild-all, Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Hi Thomas,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20240816]
[also build test ERROR on v6.11-rc3]
[cannot apply to drm-xe/drm-xe-next linus/master v6.11-rc3 v6.11-rc2 v6.11-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Add-a-virtual-base-class-for-graphics-memory-backup/20240816-213947
base: next-20240816
patch link: https://lore.kernel.org/r/20240816133717.3102-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20240817/202408170041.l5SO7IpQ-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240817/202408170041.l5SO7IpQ-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408170041.l5SO7IpQ-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_pool.c: In function 'ttm_pool_restore_tt':
>> drivers/gpu/drm/ttm/ttm_pool.c:456:29: error: implicit declaration of function 'should_fail'; did you mean 'schedule_tail'? [-Werror=implicit-function-declaration]
456 | should_fail(&backup_fault_inject, 1)) {
| ^~~~~~~~~~~
| schedule_tail
>> drivers/gpu/drm/ttm/ttm_pool.c:456:42: error: 'backup_fault_inject' undeclared (first use in this function)
456 | should_fail(&backup_fault_inject, 1)) {
| ^~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/ttm/ttm_pool.c:456:42: note: each undeclared identifier is reported only once for each function it appears in
drivers/gpu/drm/ttm/ttm_pool.c: In function 'ttm_pool_backup_tt':
drivers/gpu/drm/ttm/ttm_pool.c:908:64: error: 'backup_fault_inject' undeclared (first use in this function)
908 | if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
| ^~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
vim +456 drivers/gpu/drm/ttm/ttm_pool.c
434
435 static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
436 struct ttm_backup *backup,
437 struct ttm_operation_ctx *ctx)
438 {
439 static unsigned long __maybe_unused swappedin;
440 unsigned int i, nr = 1 << restore->order;
441 int ret = 0;
442
443 if (!ttm_pool_restore_valid(restore))
444 return 0;
445
446 for (i = restore->restored_pages; i < nr; ++i) {
447 struct page *p = restore->old_pages[i];
448
449 if (ttm_backup_page_ptr_is_handle(p)) {
450 unsigned long handle = ttm_backup_page_ptr_to_handle(p);
451
452 if (handle == 0)
453 continue;
454
455 if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
> 456 should_fail(&backup_fault_inject, 1)) {
457 ret = -EINTR;
458 break;
459 }
460
461 ret = backup->ops->copy_backed_up_page
462 (backup, restore->first_page[i],
463 handle, ctx->interruptible);
464 if (ret)
465 break;
466
467 backup->ops->drop(backup, handle);
468 } else if (p) {
469 /*
470 * We could probably avoid splitting the old page
471 * using clever logic, but ATM we don't care.
472 */
473 ttm_pool_split_for_swap(restore->pool, p);
474 copy_highpage(restore->first_page[i], p);
475 __free_pages(p, 0);
476 }
477
478 restore->restored_pages++;
479 restore->old_pages[i] = NULL;
480 cond_resched();
481 }
482
483 return ret;
484 }
485
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
2024-08-16 13:37 ` [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths Thomas Hellström
2024-08-16 16:32 ` kernel test robot
@ 2024-08-16 17:35 ` kernel test robot
1 sibling, 0 replies; 17+ messages in thread
From: kernel test robot @ 2024-08-16 17:35 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: llvm, oe-kbuild-all, Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Hi Thomas,
kernel test robot noticed the following build errors:
[auto build test ERROR on next-20240816]
[also build test ERROR on v6.11-rc3]
[cannot apply to drm-xe/drm-xe-next linus/master v6.11-rc3 v6.11-rc2 v6.11-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Add-a-virtual-base-class-for-graphics-memory-backup/20240816-213947
base: next-20240816
patch link: https://lore.kernel.org/r/20240816133717.3102-4-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths
config: x86_64-rhel-8.3-rust (https://download.01.org/0day-ci/archive/20240817/202408170144.N7YoWTCX-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240817/202408170144.N7YoWTCX-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408170144.N7YoWTCX-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_pool.c:456:8: error: call to undeclared function 'should_fail'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
456 | should_fail(&backup_fault_inject, 1)) {
| ^
>> drivers/gpu/drm/ttm/ttm_pool.c:456:21: error: use of undeclared identifier 'backup_fault_inject'
456 | should_fail(&backup_fault_inject, 1)) {
| ^
drivers/gpu/drm/ttm/ttm_pool.c:908:44: error: call to undeclared function 'should_fail'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
908 | if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
| ^
drivers/gpu/drm/ttm/ttm_pool.c:908:57: error: use of undeclared identifier 'backup_fault_inject'
908 | if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
| ^
4 errors generated.
vim +/should_fail +456 drivers/gpu/drm/ttm/ttm_pool.c
434
435 static int ttm_pool_restore_tt(struct ttm_pool_tt_restore *restore,
436 struct ttm_backup *backup,
437 struct ttm_operation_ctx *ctx)
438 {
439 static unsigned long __maybe_unused swappedin;
440 unsigned int i, nr = 1 << restore->order;
441 int ret = 0;
442
443 if (!ttm_pool_restore_valid(restore))
444 return 0;
445
446 for (i = restore->restored_pages; i < nr; ++i) {
447 struct page *p = restore->old_pages[i];
448
449 if (ttm_backup_page_ptr_is_handle(p)) {
450 unsigned long handle = ttm_backup_page_ptr_to_handle(p);
451
452 if (handle == 0)
453 continue;
454
455 if (IS_ENABLED(CONFIG_FAULT_INJECTION) && ctx->interruptible &&
> 456 should_fail(&backup_fault_inject, 1)) {
457 ret = -EINTR;
458 break;
459 }
460
461 ret = backup->ops->copy_backed_up_page
462 (backup, restore->first_page[i],
463 handle, ctx->interruptible);
464 if (ret)
465 break;
466
467 backup->ops->drop(backup, handle);
468 } else if (p) {
469 /*
470 * We could probably avoid splitting the old page
471 * using clever logic, but ATM we don't care.
472 */
473 ttm_pool_split_for_swap(restore->pool, p);
474 copy_highpage(restore->first_page[i], p);
475 __free_pages(p, 0);
476 }
477
478 restore->restored_pages++;
479 restore->old_pages[i] = NULL;
480 cond_resched();
481 }
482
483 return ret;
484 }
485
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v8 4/6] drm/ttm: Add a shrinker helper and export the LRU walker for driver use
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
` (2 preceding siblings ...)
2024-08-16 13:37 ` [PATCH v8 3/6] drm/ttm: Use fault-injection to test error paths Thomas Hellström
@ 2024-08-16 13:37 ` Thomas Hellström
2024-08-16 23:06 ` Matthew Brost
2024-08-16 13:37 ` [PATCH v8 5/6] drm/xe: Add a shrinker for xe bos Thomas Hellström
` (4 subsequent siblings)
8 siblings, 1 reply; 17+ messages in thread
From: Thomas Hellström @ 2024-08-16 13:37 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Matthew Brost, Somalapuram Amaranath,
Christian König, Paulo Zanoni, dri-devel
Following the design direction communicated here:
https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2d67@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9
Export the LRU walker for driver shrinker use and add a bo
shrinker helper for initial use by the xe driver.
v8:
- Split out from another patch.
- Use a struct for bool arguments to increase readability (Matt Brost).
- Unmap user-space cpu-mappings before shrinking pages.
- Explain non-fatal error codes (Matt Brost)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_bo_util.c | 65 +++++++++++++++++++++++++++++++
include/drm/ttm/ttm_bo.h | 17 ++++++++
2 files changed, 82 insertions(+)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 3c07f4712d5c..3490e3347de9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -919,3 +919,68 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
return progress;
}
+EXPORT_SYMBOL(ttm_lru_walk_for_evict);
+
+/**
+ * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
+ * @walk: The struct xe_ttm_lru_walk that describes the walk.
+ * @bo: The buffer object.
+ * @flags: Flags governing the shrinking behaviour.
+ *
+ * The function uses the ttm_tt_back_up functionality to back up or
+ * purge a struct ttm_tt. If the bo is not in system, it's first
+ * moved there, unless @flags.allow_move is false.
+ *
+ * Return: The number of pages shrunken or purged, or
+ * negative error code on failure.
+ */
+long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+ const struct ttm_bo_shrink_flags flags)
+{
+ static const struct ttm_place sys_placement_flags = {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = TTM_PL_SYSTEM,
+ .flags = 0,
+ };
+ static struct ttm_placement sys_placement = {
+ .num_placement = 1,
+ .placement = &sys_placement_flags,
+ };
+ struct ttm_operation_ctx *ctx = walk->ctx;
+ struct ttm_tt *tt = bo->ttm;
+ long lret;
+
+ dma_resv_assert_held(bo->base.resv);
+
+ if (!tt || !ttm_tt_is_populated(tt))
+ return 0;
+
+ if (flags.allow_move && bo->resource->mem_type != TTM_PL_SYSTEM) {
+ int ret = ttm_bo_validate(bo, &sys_placement, ctx);
+
+ /* Consider -ENOMEM and -ENOSPC non-fatal. */
+ if (ret) {
+ if (ret == -ENOMEM || ret == -ENOSPC)
+ ret = -EBUSY;
+ return ret;
+ }
+ }
+
+ ttm_bo_unmap_virtual(bo);
+ lret = ttm_bo_wait_ctx(bo, ctx);
+ if (lret < 0) {
+ if (lret == -ERESTARTSYS)
+ return lret;
+ return 0;
+ }
+
+ lret = ttm_tt_backup(bo->bdev, tt, (struct ttm_backup_flags)
+ {.purge = flags.purge,
+ .writeback = flags.writeback});
+ if (lret < 0 && lret != -EINTR)
+ return 0;
+
+ return lret;
+}
+EXPORT_SYMBOL(ttm_bo_try_shrink);
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index d1a732d56259..479ada85cea1 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -229,6 +229,23 @@ struct ttm_lru_walk {
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
struct ttm_resource_manager *man, s64 target);
+/**
+ * struct ttm_bo_shrink_flags - flags to govern the bo shrinking behaviour
+ * @purge: Purge the content rather than backing it up.
+ * @writeback: Attempt to immediately write content to swap space.
+ * @allow_move: Allow moving to system before shrinking. This is typically
+ * not desired for zombie- or ghost objects (with zombie object meaning
+ * objects with a zero gem object refcount)
+ */
+struct ttm_bo_shrink_flags {
+ u32 purge : 1;
+ u32 writeback : 1;
+ u32 allow_move : 1;
+};
+
+long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+ const struct ttm_bo_shrink_flags flags);
+
/**
* ttm_bo_get - reference a struct ttm_buffer_object
*
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v8 4/6] drm/ttm: Add a shrinker helper and export the LRU walker for driver use
2024-08-16 13:37 ` [PATCH v8 4/6] drm/ttm: Add a shrinker helper and export the LRU walker for driver use Thomas Hellström
@ 2024-08-16 23:06 ` Matthew Brost
0 siblings, 0 replies; 17+ messages in thread
From: Matthew Brost @ 2024-08-16 23:06 UTC (permalink / raw)
To: Thomas Hellström
Cc: intel-xe, Somalapuram Amaranath, Christian König,
Paulo Zanoni, dri-devel
On Fri, Aug 16, 2024 at 03:37:15PM +0200, Thomas Hellström wrote:
> Following the design direction communicated here:
>
> https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2d67@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9
>
> Export the LRU walker for driver shrinker use and add a bo
> shrinker helper for initial use by the xe driver.
>
> v8:
> - Split out from another patch.
> - Use a struct for bool arguments to increase readability (Matt Brost).
> - Unmap user-space cpu-mappings before shrinking pages.
> - Explain non-fatal error codes (Matt Brost)
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo_util.c | 65 +++++++++++++++++++++++++++++++
> include/drm/ttm/ttm_bo.h | 17 ++++++++
> 2 files changed, 82 insertions(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 3c07f4712d5c..3490e3347de9 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -919,3 +919,68 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
>
> return progress;
> }
> +EXPORT_SYMBOL(ttm_lru_walk_for_evict);
> +
> +/**
> + * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
> + * @walk: The struct xe_ttm_lru_walk that describes the walk.
> + * @bo: The buffer object.
> + * @flags: Flags governing the shrinking behaviour.
> + *
> + * The function uses the ttm_tt_back_up functionality to back up or
> + * purge a struct ttm_tt. If the bo is not in system, it's first
> + * moved there, unless @flags.allow_move is false.
> + *
> + * Return: The number of pages shrunken or purged, or
> + * negative error code on failure.
> + */
> +long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
> + const struct ttm_bo_shrink_flags flags)
> +{
> + static const struct ttm_place sys_placement_flags = {
> + .fpfn = 0,
> + .lpfn = 0,
> + .mem_type = TTM_PL_SYSTEM,
> + .flags = 0,
> + };
> + static struct ttm_placement sys_placement = {
> + .num_placement = 1,
> + .placement = &sys_placement_flags,
> + };
> + struct ttm_operation_ctx *ctx = walk->ctx;
> + struct ttm_tt *tt = bo->ttm;
> + long lret;
> +
> + dma_resv_assert_held(bo->base.resv);
> +
> + if (!tt || !ttm_tt_is_populated(tt))
> + return 0;
> +
> + if (flags.allow_move && bo->resource->mem_type != TTM_PL_SYSTEM) {
> + int ret = ttm_bo_validate(bo, &sys_placement, ctx);
> +
> + /* Consider -ENOMEM and -ENOSPC non-fatal. */
> + if (ret) {
> + if (ret == -ENOMEM || ret == -ENOSPC)
> + ret = -EBUSY;
> + return ret;
> + }
> + }
> +
> + ttm_bo_unmap_virtual(bo);
> + lret = ttm_bo_wait_ctx(bo, ctx);
> + if (lret < 0) {
> + if (lret == -ERESTARTSYS)
> + return lret;
> + return 0;
> + }
> +
> + lret = ttm_tt_backup(bo->bdev, tt, (struct ttm_backup_flags)
> + {.purge = flags.purge,
> + .writeback = flags.writeback});
> + if (lret < 0 && lret != -EINTR)
> + return 0;
> +
> + return lret;
> +}
> +EXPORT_SYMBOL(ttm_bo_try_shrink);
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index d1a732d56259..479ada85cea1 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -229,6 +229,23 @@ struct ttm_lru_walk {
> s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
> struct ttm_resource_manager *man, s64 target);
>
> +/**
> + * struct ttm_bo_shrink_flags - flags to govern the bo shrinking behaviour
> + * @purge: Purge the content rather than backing it up.
> + * @writeback: Attempt to immediately write content to swap space.
> + * @allow_move: Allow moving to system before shrinking. This is typically
> + * not desired for zombie- or ghost objects (with zombie object meaning
> + * objects with a zero gem object refcount)
> + */
> +struct ttm_bo_shrink_flags {
> + u32 purge : 1;
> + u32 writeback : 1;
> + u32 allow_move : 1;
> +};
> +
> +long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
> + const struct ttm_bo_shrink_flags flags);
> +
> /**
> * ttm_bo_get - reference a struct ttm_buffer_object
> *
> --
> 2.44.0
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v8 5/6] drm/xe: Add a shrinker for xe bos
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
` (3 preceding siblings ...)
2024-08-16 13:37 ` [PATCH v8 4/6] drm/ttm: Add a shrinker helper and export the LRU walker for driver use Thomas Hellström
@ 2024-08-16 13:37 ` Thomas Hellström
2024-08-16 19:48 ` kernel test robot
2024-08-16 13:37 ` [PATCH v8 6/6] drm/xe: Increase the XE_PL_TT watermark Thomas Hellström
` (3 subsequent siblings)
8 siblings, 1 reply; 17+ messages in thread
From: Thomas Hellström @ 2024-08-16 13:37 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Rather than relying on the TTM watermark accounting add a shrinker
for xe_bos in TT or system memory.
Leverage the newly added TTM per-page shrinking and shmem backup
support.
Although xe doesn't fully support WONTNEED (purgeable) bos yet,
introduce and add shrinker support for purgeable ttm_tts.
v2:
- Cleanups bugfixes and a KUNIT shrinker test.
- Add writeback support, and activate if kswapd.
v3:
- Move the try_shrink() helper to core TTM.
- Minor cleanups.
v4:
- Add runtime pm for the shrinker. Shrinking may require an active
device for CCS metadata copying.
v5:
- Separately purge ghost- and zombie objects in the shrinker.
- Fix a format specifier - type inconsistency. (Kernel test robot).
v7:
- s/long/s64/ (Christian König)
- s/sofar/progress/ (Matt Brost)
v8:
- Rebase on Xe KUNIT update.
- Add content verifying to the shrinker kunit test.
- Split out TTM changes to a separate patch.
- Get rid of multiple bool arguments for clarity (Matt Brost)
- Avoid an error pointer dereference (Matt Brost)
- Avoid an integer overflow (Matt Auld)
- Address misc review comments by Matt Brost.
Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/tests/xe_bo.c | 225 +++++++++++++++++++++
drivers/gpu/drm/xe/xe_bo.c | 166 +++++++++++++--
drivers/gpu/drm/xe/xe_bo.h | 36 ++++
drivers/gpu/drm/xe/xe_device.c | 8 +
drivers/gpu/drm/xe/xe_device_types.h | 2 +
drivers/gpu/drm/xe/xe_shrinker.c | 289 +++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_shrinker.h | 18 ++
8 files changed, 729 insertions(+), 16 deletions(-)
create mode 100644 drivers/gpu/drm/xe/xe_shrinker.c
create mode 100644 drivers/gpu/drm/xe/xe_shrinker.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e11392b5dd3d..2587a624f0cb 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -91,6 +91,7 @@ xe-y += xe_bb.o \
xe_ring_ops.o \
xe_sa.o \
xe_sched_job.o \
+ xe_shrinker.o \
xe_step.o \
xe_sync.o \
xe_tile.o \
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index df9fd907edd4..fb98688a8a45 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,11 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
+#include <linux/iosys-map.h>
+#include <linux/random.h>
+
+#include <uapi/linux/sysinfo.h>
+
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
@@ -359,9 +364,229 @@ static void xe_bo_evict_kunit(struct kunit *test)
evict_test_run_device(xe);
}
+struct xe_bo_link {
+ struct list_head link;
+ struct xe_bo *bo;
+ u32 val;
+};
+
+#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
+
+static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t __maybe_unused i;
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ iosys_map_wr(&map, i, u32, val);
+ if (i == 0)
+ link->val = val;
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+ return 0;
+}
+
+static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
+ unsigned int bo_nr, struct rnd_state *state,
+ struct xe_bo_link *link)
+{
+ struct iosys_map map;
+ int ret = ttm_bo_vmap(&bo->ttm, &map);
+ size_t i;
+ bool failed = false;
+
+ if (ret) {
+ KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
+ return true;
+ }
+
+ for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
+ u32 val = prandom_u32_state(state);
+
+ if (iosys_map_rd(&map, i, u32) != val) {
+ KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
+ bo_nr, (unsigned long long)i);
+ kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
+ (unsigned int)iosys_map_rd(&map, i, u32), val);
+ if (i == 0 && val != link->val)
+ kunit_info(test, "Looks like PRNG is out of sync.\n");
+ failed = true;
+ break;
+ }
+ }
+
+ ttm_bo_vunmap(&bo->ttm, &map);
+
+ return failed;
+}
+
+/*
+ * Try to create system bos corresponding to twice the amount
+ * of available system memory to test shrinker functionality.
+ * If no swap space is available to accommodate the
+ * memory overcommit, mark bos purgeable.
+ */
+static int shrink_test_run_device(struct xe_device *xe)
+{
+ struct kunit *test = kunit_get_current_test();
+ LIST_HEAD(bos);
+ struct xe_bo_link *link, *next;
+ struct sysinfo si;
+ size_t total, alloced;
+ unsigned int interrupted = 0, successful = 0, count = 0;
+ struct rnd_state prng;
+ u64 rand_seed;
+ bool failed = false;
+
+ rand_seed = get_random_u64();
+ prandom_seed_state(&prng, rand_seed);
+
+ si_meminfo(&si);
+ total = si.freeram * si.mem_unit;
+
+ kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
+ (unsigned long)total);
+
+ total <<= 1;
+ for (alloced = 0; alloced < total ; alloced += XE_BO_SHRINK_SIZE) {
+ struct xe_bo *bo;
+ unsigned int mem_type;
+ struct xe_ttm_tt *xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
+ KUNIT_FAIL(test, "Unexpected link allocation failure\n");
+ failed = true;
+ break;
+ }
+
+ INIT_LIST_HEAD(&link->link);
+
+ /* We can create bos using WC caching here. But it is slower. */
+ bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ DRM_XE_GEM_CPU_CACHING_WB,
+ ttm_bo_type_device,
+ XE_BO_FLAG_SYSTEM);
+ if (IS_ERR(bo)) {
+ if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
+ bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
+ KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
+ kfree(link);
+ failed = true;
+ break;
+ }
+ xe_bo_lock(bo, false);
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+
+ /*
+ * If we're low on swap entries, we can't shrink unless the bo
+ * is marked purgeable.
+ */
+ if (get_nr_swap_pages() < (XE_BO_SHRINK_SIZE >> PAGE_SHIFT) * 128) {
+ long num_pages = xe_tt->ttm.num_pages;
+
+ xe_tt->purgeable = true;
+ xe_shrinker_mod_pages(xe->mem.shrinker, -num_pages,
+ num_pages);
+ } else {
+ int ret = shrink_test_fill_random(bo, &prng, link);
+
+ if (ret) {
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
+ ERR_PTR(ret));
+ kfree(link);
+ failed = true;
+ break;
+ }
+ }
+
+ mem_type = bo->ttm.resource->mem_type;
+ xe_bo_unlock(bo);
+ link->bo = bo;
+ list_add_tail(&link->link, &bos);
+
+ if (mem_type != XE_PL_TT) {
+ KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
+ bo->ttm.resource->mem_type);
+ failed = true;
+ }
+ cond_resched();
+ if (signal_pending(current))
+ break;
+ }
+
+ /*
+ * Read back and destroy bos. Reset the pseudo-random seed to get an
+ * identical pseudo-random number sequence for readback.
+ */
+ prandom_seed_state(&prng, rand_seed);
+ list_for_each_entry_safe(link, next, &bos, link) {
+ static struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct xe_bo *bo = link->bo;
+ struct xe_ttm_tt *xe_tt;
+ int ret;
+
+ count++;
+ if (!signal_pending(current) && !failed) {
+ bool purgeable, intr = false;
+
+ xe_bo_lock(bo, NULL);
+
+ /* xe_tt->purgeable is cleared on validate. */
+ xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+ purgeable = xe_tt->purgeable;
+ do {
+ ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
+ if (ret == -EINTR)
+ intr = true;
+ } while (ret == -EINTR && !signal_pending(current));
+
+ if (!ret && !purgeable)
+ failed = shrink_test_verify(test, bo, count, &prng, link);
+
+ xe_bo_unlock(bo);
+ if (ret) {
+ KUNIT_FAIL(test, "Validation failed: %pe\n",
+ ERR_PTR(ret));
+ failed = true;
+ } else if (intr) {
+ interrupted++;
+ } else {
+ successful++;
+ }
+ }
+ xe_bo_put(link->bo);
+ list_del(&link->link);
+ kfree(link);
+ }
+ kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
+ interrupted, successful);
+
+ return 0;
+}
+
+static void xe_bo_shrink_kunit(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ shrink_test_run_device(xe);
+}
+
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
+ {.speed = KUNIT_SPEED_SLOW}),
{}
};
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 800119c8fc8d..51577ca62893 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -10,6 +10,7 @@
#include <drm/drm_drv.h>
#include <drm/drm_gem_ttm_helper.h>
#include <drm/drm_managed.h>
+#include <drm/ttm/ttm_backup.h>
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_tt.h>
@@ -25,6 +26,7 @@
#include "xe_pm.h"
#include "xe_preempt_fence.h"
#include "xe_res_cursor.h"
+#include "xe_shrinker.h"
#include "xe_trace_bo.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"
@@ -278,11 +280,15 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
}
}
+/* struct xe_ttm_tt - Subclassed ttm_tt for xe */
struct xe_ttm_tt {
struct ttm_tt ttm;
- struct device *dev;
+ /** @xe - The xe device */
+ struct xe_device *xe;
struct sg_table sgt;
struct sg_table *sg;
+ /** @purgeable - Whether the bo is purgeable (WONTNEED) */
+ bool purgeable;
};
static int xe_tt_map_sg(struct ttm_tt *tt)
@@ -291,7 +297,8 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
unsigned long num_pages = tt->num_pages;
int ret;
- XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+ XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
+ !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
if (xe_tt->sg)
return 0;
@@ -299,13 +306,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
num_pages, 0,
(u64)num_pages << PAGE_SHIFT,
- xe_sg_segment_size(xe_tt->dev),
+ xe_sg_segment_size(xe_tt->xe->drm.dev),
GFP_KERNEL);
if (ret)
return ret;
xe_tt->sg = &xe_tt->sgt;
- ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+ ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
if (ret) {
sg_free_table(xe_tt->sg);
@@ -321,7 +328,7 @@ static void xe_tt_unmap_sg(struct ttm_tt *tt)
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
if (xe_tt->sg) {
- dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+ dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
DMA_BIDIRECTIONAL, 0);
sg_free_table(xe_tt->sg);
xe_tt->sg = NULL;
@@ -336,21 +343,47 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo)
return xe_tt->sg;
}
+/*
+ * Account ttm pages against the device shrinker's shrinkable and
+ * purgeable counts.
+ */
+static void xe_ttm_tt_account_add(struct ttm_tt *tt)
+{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->purgeable)
+ xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages);
+ else
+ xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0);
+}
+
+static void xe_ttm_tt_account_subtract(struct ttm_tt *tt)
+{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+
+ if (xe_tt->purgeable)
+ xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages);
+ else
+ xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0);
+}
+
static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
u32 page_flags)
{
struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
struct xe_device *xe = xe_bo_device(bo);
- struct xe_ttm_tt *tt;
+ struct xe_ttm_tt *xe_tt;
+ struct ttm_tt *tt;
unsigned long extra_pages;
enum ttm_caching caching = ttm_cached;
int err;
- tt = kzalloc(sizeof(*tt), GFP_KERNEL);
- if (!tt)
+ xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
+ if (!xe_tt)
return NULL;
- tt->dev = xe->drm.dev;
+ tt = &xe_tt->ttm;
+ xe_tt->xe = xe;
extra_pages = 0;
if (xe_bo_needs_ccs_pages(bo))
@@ -396,42 +429,135 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
caching = ttm_uncached;
}
- err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
+ if (ttm_bo->type != ttm_bo_type_sg)
+ page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+ err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
if (err) {
- kfree(tt);
+ kfree(xe_tt);
return NULL;
}
- return &tt->ttm;
+ tt->backup = ttm_backup_shmem_create((loff_t)tt->num_pages << PAGE_SHIFT);
+ if (IS_ERR(tt->backup)) {
+ tt->backup = NULL;
+ ttm_tt_fini(tt);
+ kfree(xe_tt);
+ return NULL;
+ }
+
+ return tt;
}
static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
struct ttm_operation_ctx *ctx)
{
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
int err;
/*
* dma-bufs are not populated with pages, and the dma-
* addresses are set up when moved to XE_PL_TT.
*/
- if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+ if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
+ !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
return 0;
err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
if (err)
return err;
- return err;
+ xe_tt->purgeable = false;
+ xe_ttm_tt_account_add(tt);
+
+ return 0;
}
static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
{
- if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
+ if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
+ !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
return;
xe_tt_unmap_sg(tt);
- return ttm_pool_free(&ttm_dev->pool, tt);
+ ttm_pool_free(&ttm_dev->pool, tt);
+ xe_ttm_tt_account_subtract(tt);
+}
+
+/**
+ * xe_bo_shrink() - Try to shrink an xe bo.
+ * @walk: - The walk parameters
+ * @bo: The TTM buffer object
+ * @flags: Flags governing the shrink behaviour.
+ *
+ * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
+ * Note that we need to be able to handle also non xe bos
+ * (ghost bos), but only if the struct ttm_tt is embedded in
+ * a struct xe_ttm_tt.
+ *
+ * Return: The number of pages shrunken or purged, or negative error
+ * code on failure.
+ */
+long xe_bo_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+ const struct xe_bo_shrink_flags flags)
+{
+ struct ttm_tt *tt = bo->ttm;
+ struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+ struct ttm_place place = {.mem_type = bo->resource->mem_type};
+ struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
+ struct xe_device *xe = xe_tt->xe;
+ bool needs_rpm;
+ long lret = 0L;
+
+ if (!tt || !ttm_tt_is_populated(tt) ||
+ !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
+ (flags.purge && !xe_tt->purgeable))
+ return 0L;
+
+ if (!ttm_bo_eviction_valuable(bo, &place))
+ return 0L;
+
+ /* Beware of zombies (GEM object refcount == 0) and ghosts. */
+ if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo)) {
+ lret = ttm_bo_wait_ctx(bo, walk->ctx);
+ if (lret)
+ return lret;
+
+ /*
+ * We don't allow move from TT to SYSTEM for these objects,
+ * hence we need to unmap sg first.
+ */
+ xe_tt_unmap_sg(tt);
+ return ttm_bo_try_shrink(walk, bo, (struct ttm_bo_shrink_flags)
+ {.purge = true,
+ .writeback = false,
+ .allow_move = false});
+ }
+
+ /* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
+ needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
+ xe_bo_needs_ccs_pages(xe_bo) && !xe_tt->purgeable);
+ if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
+ goto out_unref;
+
+ lret = ttm_bo_try_shrink(walk, bo, (struct ttm_bo_shrink_flags)
+ {.purge = xe_tt->purgeable,
+ .writeback = flags.writeback,
+ .allow_move = true});
+ if (needs_rpm)
+ xe_pm_runtime_put(xe);
+
+ if (lret > 0) {
+ xe_assert(xe, !ttm_tt_is_populated(tt));
+
+ xe_ttm_tt_account_subtract(tt);
+ }
+
+out_unref:
+ xe_bo_put(xe_bo);
+
+ return lret;
}
static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
@@ -1699,6 +1825,8 @@ int xe_bo_pin_external(struct xe_bo *bo)
}
ttm_bo_pin(&bo->ttm);
+ if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
+ xe_ttm_tt_account_subtract(bo->ttm.ttm);
/*
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -1757,6 +1885,8 @@ int xe_bo_pin(struct xe_bo *bo)
}
ttm_bo_pin(&bo->ttm);
+ if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
+ xe_ttm_tt_account_subtract(bo->ttm.ttm);
/*
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -1791,6 +1921,8 @@ void xe_bo_unpin_external(struct xe_bo *bo)
spin_unlock(&xe->pinned.lock);
ttm_bo_unpin(&bo->ttm);
+ if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
+ xe_ttm_tt_account_add(bo->ttm.ttm);
/*
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -1819,6 +1951,8 @@ void xe_bo_unpin(struct xe_bo *bo)
}
ttm_bo_unpin(&bo->ttm);
+ if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
+ xe_ttm_tt_account_add(bo->ttm.ttm);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 1c9dc8adaaa3..ecdb76ce5069 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -64,6 +64,7 @@
#define XE_BO_PROPS_INVALID (-1)
struct sg_table;
+struct xe_ttm_lru_walk;
struct xe_bo *xe_bo_alloc(void);
void xe_bo_free(struct xe_bo *bo);
@@ -127,6 +128,28 @@ static inline struct xe_bo *xe_bo_get(struct xe_bo *bo)
return bo;
}
+/*
+ * xe_bo_get_unless_zero() - Conditionally obtain a GEM object refcount on an
+ * xe bo
+ * @bo: The bo for which we want to obtain a refcount.
+ *
+ * There is a short window between where the bo's GEM object refcount reaches
+ * zero and where we put the final ttm_bo reference. Code in the eviction- and
+ * shrinking path should therefore attempt to grab a gem object reference before
+ * trying to use members outside of the base class ttm object. This function is
+ * intended for that purpose. On successful return, this function must be paired
+ * with an xe_bo_put().
+ *
+ * Return: @bo on success, NULL on failure.
+ */
+static inline __must_check struct xe_bo *xe_bo_get_unless_zero(struct xe_bo *bo)
+{
+ if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount))
+ return NULL;
+
+ return bo;
+}
+
static inline void xe_bo_put(struct xe_bo *bo)
{
if (bo)
@@ -316,6 +339,19 @@ static inline unsigned int xe_sg_segment_size(struct device *dev)
#define i915_gem_object_flush_if_display(obj) ((void)(obj))
+/**
+ * struct xe_bo_shrink_flags - flags governing the shrink behaviour.
+ * @purge: Only purging allowed. Don't shrink if bo not purgeable.
+ * @writeback: Attempt to immediately move content to swap.
+ */
+struct xe_bo_shrink_flags {
+ u32 purge : 1;
+ u32 writeback : 1;
+};
+
+long xe_bo_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+ const struct xe_bo_shrink_flags flags);
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
/**
* xe_bo_is_mem_type - Whether the bo currently resides in the given
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 206328387150..4dc273dbad85 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -47,6 +47,7 @@
#include "xe_pcode.h"
#include "xe_pm.h"
#include "xe_query.h"
+#include "xe_shrinker.h"
#include "xe_sriov.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
@@ -294,6 +295,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
if (xe->unordered_wq)
destroy_workqueue(xe->unordered_wq);
+ if (!IS_ERR_OR_NULL(xe->mem.shrinker))
+ xe_shrinker_destroy(xe->mem.shrinker);
+
ttm_device_fini(&xe->ttm);
}
@@ -323,6 +327,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
if (err)
goto err;
+ xe->mem.shrinker = xe_shrinker_create(xe);
+ if (IS_ERR(xe->mem.shrinker))
+ return ERR_CAST(xe->mem.shrinker);
+
xe->info.devid = pdev->device;
xe->info.revid = pdev->revision;
xe->info.force_execlist = xe_modparam.force_execlist;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 16a24eadd94b..62903c265f02 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -339,6 +339,8 @@ struct xe_device {
struct xe_mem_region vram;
/** @mem.sys_mgr: system TTM manager */
struct ttm_resource_manager sys_mgr;
+ /** @mem.sys_mgr: system memory shrinker. */
+ struct xe_shrinker *shrinker;
} mem;
/** @sriov: device level virtualization data */
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
new file mode 100644
index 000000000000..4de98c1dd4a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/shrinker.h>
+#include <linux/swap.h>
+
+#include <drm/ttm/ttm_bo.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "xe_bo.h"
+#include "xe_pm.h"
+#include "xe_shrinker.h"
+
+/**
+ * struct xe_shrinker - per-device shrinker
+ * @xe: Back pointer to the device.
+ * @lock: Lock protecting accounting.
+ * @shrinkable_pages: Number of pages that are currently shrinkable.
+ * @purgeable_pages: Number of pages that are currently purgeable.
+ * @shrink: Pointer to the mm shrinker.
+ * @pm_worker: Worker to wake up the device if required.
+ */
+struct xe_shrinker {
+ struct xe_device *xe;
+ rwlock_t lock;
+ long shrinkable_pages;
+ long purgeable_pages;
+ struct shrinker *shrink;
+ struct work_struct pm_worker;
+};
+
+/**
+ * struct xe_shrink_lru_walk - lru_walk subclass for shrinker
+ * @walk: The embedded base class.
+ * @xe: Pointer to the xe device.
+ * @purge: Purgeable only request from the srinker.
+ * @writeback: Try to write back to persistent storage.
+ */
+struct xe_shrink_lru_walk {
+ struct ttm_lru_walk walk;
+ struct xe_device *xe;
+ bool purge;
+ bool writeback;
+};
+
+static struct xe_shrinker *to_xe_shrinker(struct shrinker *shrink)
+{
+ return shrink->private_data;
+}
+
+static struct xe_shrink_lru_walk *
+to_xe_shrink_lru_walk(struct ttm_lru_walk *walk)
+{
+ return container_of(walk, struct xe_shrink_lru_walk, walk);
+}
+
+/**
+ * xe_shrinker_mod_pages() - Modify shrinker page accounting
+ * @shrinker: Pointer to the struct xe_shrinker.
+ * @shrinkable: Shrinkable pages delta. May be negative.
+ * @purgeable: Purgeable page delta. May be negative.
+ *
+ * Modifies the shrinkable and purgeable pages accounting.
+ */
+void
+xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable)
+{
+ write_lock(&shrinker->lock);
+ shrinker->shrinkable_pages += shrinkable;
+ shrinker->purgeable_pages += purgeable;
+ write_unlock(&shrinker->lock);
+}
+
+static s64 xe_shrinker_process_bo(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo)
+{
+ struct xe_shrink_lru_walk *shrink_walk = to_xe_shrink_lru_walk(walk);
+
+ return xe_bo_shrink(walk, bo, (struct xe_bo_shrink_flags)
+ {.purge = shrink_walk->purge,
+ .writeback = shrink_walk->writeback});
+}
+
+static s64 xe_shrinker_walk(struct xe_shrink_lru_walk *shrink_walk, s64 target)
+{
+ struct xe_device *xe = shrink_walk->xe;
+ struct ttm_resource_manager *man;
+ unsigned int mem_type;
+ s64 progress = 0;
+ s64 lret;
+
+ for (mem_type = XE_PL_SYSTEM; mem_type <= XE_PL_TT; ++mem_type) {
+ man = ttm_manager_type(&xe->ttm, mem_type);
+ if (!man || !man->use_tt)
+ continue;
+
+ lret = ttm_lru_walk_for_evict(&shrink_walk->walk, &xe->ttm, man, target);
+ if (lret < 0)
+ return lret;
+
+ progress += lret;
+ if (progress >= target)
+ break;
+ }
+
+ return progress;
+}
+
+static unsigned long
+xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct xe_shrinker *shrinker = to_xe_shrinker(shrink);
+ unsigned long num_pages;
+
+ num_pages = get_nr_swap_pages();
+ read_lock(&shrinker->lock);
+ num_pages = min_t(unsigned long, num_pages, shrinker->shrinkable_pages);
+ num_pages += shrinker->purgeable_pages;
+ read_unlock(&shrinker->lock);
+
+ return num_pages ? num_pages : SHRINK_EMPTY;
+}
+
+static const struct ttm_lru_walk_ops xe_shrink_ops = {
+ .process_bo = xe_shrinker_process_bo,
+};
+
+/*
+ * Check if we need runtime pm, and if so try to grab a reference if
+ * already active. If grabbing a reference fails, queue a worker that
+ * does it for us outside of reclaim, but don't wait for it to complete.
+ * If bo shrinking needs an rpm reference and we don't have it (yet),
+ * that bo will be skipped anyway.
+ */
+static bool xe_shrinker_runtime_pm_get(struct xe_shrinker *shrinker, bool force,
+ unsigned long nr_to_scan)
+{
+ struct xe_device *xe = shrinker->xe;
+
+ if (IS_DGFX(xe) || !xe_device_has_flat_ccs(xe) ||
+ !get_nr_swap_pages())
+ return false;
+
+ if (!force) {
+ read_lock(&shrinker->lock);
+ force = (nr_to_scan > shrinker->purgeable_pages);
+ read_unlock(&shrinker->lock);
+ if (!force)
+ return false;
+ }
+
+ if (!xe_pm_runtime_get_if_active(xe)) {
+ queue_work(xe->unordered_wq, &shrinker->pm_worker);
+ return false;
+ }
+
+ return true;
+}
+
+static void xe_shrinker_runtime_pm_put(struct xe_shrinker *shrinker, bool runtime_pm)
+{
+ if (runtime_pm)
+ xe_pm_runtime_put(shrinker->xe);
+}
+
+static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct xe_shrinker *shrinker = to_xe_shrinker(shrink);
+ bool is_kswapd = current_is_kswapd();
+ struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = !is_kswapd,
+ };
+ unsigned long nr_to_scan, freed = 0;
+ struct xe_shrink_lru_walk shrink_walk = {
+ .walk = {
+ .ops = &xe_shrink_ops,
+ .ctx = &ctx,
+ .trylock_only = true,
+ },
+ .xe = shrinker->xe,
+ .purge = true,
+ .writeback = is_kswapd,
+ };
+ bool runtime_pm;
+ bool purgeable;
+ s64 ret;
+
+ sc->nr_scanned = 0;
+ nr_to_scan = sc->nr_to_scan;
+
+ read_lock(&shrinker->lock);
+ purgeable = !!shrinker->purgeable_pages;
+ read_unlock(&shrinker->lock);
+
+ /* Might need runtime PM. Try to wake early if it looks like it. */
+ runtime_pm = xe_shrinker_runtime_pm_get(shrinker, false, nr_to_scan);
+
+ while (purgeable && freed < nr_to_scan) {
+ ret = xe_shrinker_walk(&shrink_walk, nr_to_scan);
+ if (ret <= 0)
+ break;
+
+ freed += ret;
+ }
+
+ sc->nr_scanned = freed;
+ if (freed < nr_to_scan)
+ nr_to_scan -= freed;
+ else
+ nr_to_scan = 0;
+ if (!nr_to_scan)
+ goto out;
+
+ /* If we didn't wake before, try to do it now if needed. */
+ if (!runtime_pm)
+ runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0);
+
+ shrink_walk.purge = false;
+ nr_to_scan = sc->nr_to_scan;
+ while (freed < nr_to_scan) {
+ ret = xe_shrinker_walk(&shrink_walk, nr_to_scan);
+ if (ret <= 0)
+ break;
+
+ freed += ret;
+ }
+
+ sc->nr_scanned = freed;
+
+out:
+ xe_shrinker_runtime_pm_put(shrinker, runtime_pm);
+ return freed ? freed : SHRINK_STOP;
+}
+
+/* Wake up the device for shrinking. */
+static void xe_shrinker_pm(struct work_struct *work)
+{
+ struct xe_shrinker *shrinker =
+ container_of(work, typeof(*shrinker), pm_worker);
+
+ xe_pm_runtime_get(shrinker->xe);
+ xe_pm_runtime_put(shrinker->xe);
+}
+
+/**
+ * xe_shrinker_create() - Create an xe per-device shrinker
+ * @xe: Pointer to the xe device.
+ *
+ * Returns: A pointer to the created shrinker on success,
+ * Negative error code on failure.
+ */
+struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
+{
+ struct xe_shrinker *shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
+
+ if (!shrinker)
+ return ERR_PTR(-ENOMEM);
+
+ shrinker->shrink = shrinker_alloc(0, "xe system shrinker");
+ if (!shrinker->shrink) {
+ kfree(shrinker);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_WORK(&shrinker->pm_worker, xe_shrinker_pm);
+ shrinker->xe = xe;
+ rwlock_init(&shrinker->lock);
+ shrinker->shrink->count_objects = xe_shrinker_count;
+ shrinker->shrink->scan_objects = xe_shrinker_scan;
+ shrinker->shrink->private_data = shrinker;
+ shrinker_register(shrinker->shrink);
+
+ return shrinker;
+}
+
+/**
+ * xe_shrinker_destroy() - Destroy an xe per-device shrinker
+ * @shrinker: Pointer to the shrinker to destroy.
+ */
+void xe_shrinker_destroy(struct xe_shrinker *shrinker)
+{
+ xe_assert(shrinker->xe, !shrinker->shrinkable_pages);
+ xe_assert(shrinker->xe, !shrinker->purgeable_pages);
+ shrinker_free(shrinker->shrink);
+ flush_work(&shrinker->pm_worker);
+ kfree(shrinker);
+}
diff --git a/drivers/gpu/drm/xe/xe_shrinker.h b/drivers/gpu/drm/xe/xe_shrinker.h
new file mode 100644
index 000000000000..28a038f4fcbf
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_shrinker.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_SHRINKER_H_
+#define _XE_SHRINKER_H_
+
+struct xe_shrinker;
+struct xe_device;
+
+void xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable);
+
+struct xe_shrinker *xe_shrinker_create(struct xe_device *xe);
+
+void xe_shrinker_destroy(struct xe_shrinker *shrinker);
+
+#endif
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v8 5/6] drm/xe: Add a shrinker for xe bos
2024-08-16 13:37 ` [PATCH v8 5/6] drm/xe: Add a shrinker for xe bos Thomas Hellström
@ 2024-08-16 19:48 ` kernel test robot
0 siblings, 0 replies; 17+ messages in thread
From: kernel test robot @ 2024-08-16 19:48 UTC (permalink / raw)
To: Thomas Hellström, intel-xe
Cc: llvm, oe-kbuild-all, Thomas Hellström, Christian König,
Somalapuram Amaranath, Matthew Brost, dri-devel, Paulo Zanoni
Hi Thomas,
kernel test robot noticed the following build warnings:
[auto build test WARNING on next-20240816]
[cannot apply to drm-xe/drm-xe-next linus/master v6.11-rc3 v6.11-rc2 v6.11-rc1 v6.11-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-ttm-Add-a-virtual-base-class-for-graphics-memory-backup/20240816-213947
base: next-20240816
patch link: https://lore.kernel.org/r/20240816133717.3102-6-thomas.hellstrom%40linux.intel.com
patch subject: [PATCH v8 5/6] drm/xe: Add a shrinker for xe bos
config: s390-allmodconfig (https://download.01.org/0day-ci/archive/20240817/202408170320.SsODtn1a-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 26670e7fa4f032a019d23d56c6a02926e854e8af)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240817/202408170320.SsODtn1a-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408170320.SsODtn1a-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from drivers/gpu/drm/xe/xe_bo.c:6:
In file included from drivers/gpu/drm/xe/xe_bo.h:9:
In file included from include/drm/ttm/ttm_tt.h:30:
In file included from include/linux/pagemap.h:8:
In file included from include/linux/mm.h:2199:
include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
504 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~ ^
505 | item];
| ~~~~
include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
511 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~ ^
512 | NR_VM_NUMA_EVENT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~~
include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
include/linux/vmstat.h:524:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
524 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~ ^
525 | NR_VM_NUMA_EVENT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/gpu/drm/xe/xe_bo.c:6:
In file included from drivers/gpu/drm/xe/xe_bo.h:11:
In file included from drivers/gpu/drm/xe/xe_bo_types.h:9:
In file included from include/linux/iosys-map.h:10:
In file included from include/linux/io.h:14:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
548 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
561 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
| ^
include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
| ^
In file included from drivers/gpu/drm/xe/xe_bo.c:6:
In file included from drivers/gpu/drm/xe/xe_bo.h:11:
In file included from drivers/gpu/drm/xe/xe_bo_types.h:9:
In file included from include/linux/iosys-map.h:10:
In file included from include/linux/io.h:14:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
574 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
| ^
include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
| ^
In file included from drivers/gpu/drm/xe/xe_bo.c:6:
In file included from drivers/gpu/drm/xe/xe_bo.h:11:
In file included from drivers/gpu/drm/xe/xe_bo_types.h:9:
In file included from include/linux/iosys-map.h:10:
In file included from include/linux/io.h:14:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
585 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
595 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
605 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
693 | readsb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
701 | readsw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
709 | readsl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
718 | writesb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
727 | writesw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
736 | writesl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
In file included from drivers/gpu/drm/xe/xe_bo.c:2496:
>> drivers/gpu/drm/xe/tests/xe_bo.c:462:42: warning: variable 'bo' is uninitialized when used here [-Wuninitialized]
462 | struct xe_ttm_tt *xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
| ^~
include/linux/container_of.h:19:26: note: expanded from macro 'container_of'
19 | void *__mptr = (void *)(ptr); \
| ^~~
drivers/gpu/drm/xe/tests/xe_bo.c:460:19: note: initialize the variable 'bo' to silence this warning
460 | struct xe_bo *bo;
| ^
| = NULL
17 warnings generated.
vim +/bo +462 drivers/gpu/drm/xe/tests/xe_bo.c
430
431 /*
432 * Try to create system bos corresponding to twice the amount
433 * of available system memory to test shrinker functionality.
434 * If no swap space is available to accommodate the
435 * memory overcommit, mark bos purgeable.
436 */
437 static int shrink_test_run_device(struct xe_device *xe)
438 {
439 struct kunit *test = kunit_get_current_test();
440 LIST_HEAD(bos);
441 struct xe_bo_link *link, *next;
442 struct sysinfo si;
443 size_t total, alloced;
444 unsigned int interrupted = 0, successful = 0, count = 0;
445 struct rnd_state prng;
446 u64 rand_seed;
447 bool failed = false;
448
449 rand_seed = get_random_u64();
450 prandom_seed_state(&prng, rand_seed);
451
452 si_meminfo(&si);
453 total = si.freeram * si.mem_unit;
454
455 kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
456 (unsigned long)total);
457
458 total <<= 1;
459 for (alloced = 0; alloced < total ; alloced += XE_BO_SHRINK_SIZE) {
460 struct xe_bo *bo;
461 unsigned int mem_type;
> 462 struct xe_ttm_tt *xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
463
464 link = kzalloc(sizeof(*link), GFP_KERNEL);
465 if (!link) {
466 KUNIT_FAIL(test, "Unexpected link allocation failure\n");
467 failed = true;
468 break;
469 }
470
471 INIT_LIST_HEAD(&link->link);
472
473 /* We can create bos using WC caching here. But it is slower. */
474 bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
475 DRM_XE_GEM_CPU_CACHING_WB,
476 ttm_bo_type_device,
477 XE_BO_FLAG_SYSTEM);
478 if (IS_ERR(bo)) {
479 if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
480 bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
481 KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
482 kfree(link);
483 failed = true;
484 break;
485 }
486 xe_bo_lock(bo, false);
487 xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
488
489 /*
490 * If we're low on swap entries, we can't shrink unless the bo
491 * is marked purgeable.
492 */
493 if (get_nr_swap_pages() < (XE_BO_SHRINK_SIZE >> PAGE_SHIFT) * 128) {
494 long num_pages = xe_tt->ttm.num_pages;
495
496 xe_tt->purgeable = true;
497 xe_shrinker_mod_pages(xe->mem.shrinker, -num_pages,
498 num_pages);
499 } else {
500 int ret = shrink_test_fill_random(bo, &prng, link);
501
502 if (ret) {
503 xe_bo_unlock(bo);
504 xe_bo_put(bo);
505 KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
506 ERR_PTR(ret));
507 kfree(link);
508 failed = true;
509 break;
510 }
511 }
512
513 mem_type = bo->ttm.resource->mem_type;
514 xe_bo_unlock(bo);
515 link->bo = bo;
516 list_add_tail(&link->link, &bos);
517
518 if (mem_type != XE_PL_TT) {
519 KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
520 bo->ttm.resource->mem_type);
521 failed = true;
522 }
523 cond_resched();
524 if (signal_pending(current))
525 break;
526 }
527
528 /*
529 * Read back and destroy bos. Reset the pseudo-random seed to get an
530 * identical pseudo-random number sequence for readback.
531 */
532 prandom_seed_state(&prng, rand_seed);
533 list_for_each_entry_safe(link, next, &bos, link) {
534 static struct ttm_operation_ctx ctx = {.interruptible = true};
535 struct xe_bo *bo = link->bo;
536 struct xe_ttm_tt *xe_tt;
537 int ret;
538
539 count++;
540 if (!signal_pending(current) && !failed) {
541 bool purgeable, intr = false;
542
543 xe_bo_lock(bo, NULL);
544
545 /* xe_tt->purgeable is cleared on validate. */
546 xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
547 purgeable = xe_tt->purgeable;
548 do {
549 ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
550 if (ret == -EINTR)
551 intr = true;
552 } while (ret == -EINTR && !signal_pending(current));
553
554 if (!ret && !purgeable)
555 failed = shrink_test_verify(test, bo, count, &prng, link);
556
557 xe_bo_unlock(bo);
558 if (ret) {
559 KUNIT_FAIL(test, "Validation failed: %pe\n",
560 ERR_PTR(ret));
561 failed = true;
562 } else if (intr) {
563 interrupted++;
564 } else {
565 successful++;
566 }
567 }
568 xe_bo_put(link->bo);
569 list_del(&link->link);
570 kfree(link);
571 }
572 kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
573 interrupted, successful);
574
575 return 0;
576 }
577
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v8 6/6] drm/xe: Increase the XE_PL_TT watermark
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
` (4 preceding siblings ...)
2024-08-16 13:37 ` [PATCH v8 5/6] drm/xe: Add a shrinker for xe bos Thomas Hellström
@ 2024-08-16 13:37 ` Thomas Hellström
2024-08-16 13:43 ` ✓ CI.Patch_applied: success for TTM shrinker helpers and xe buffer object shrinker (rev7) Patchwork
` (2 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Thomas Hellström @ 2024-08-16 13:37 UTC (permalink / raw)
To: intel-xe
Cc: Thomas Hellström, Matthew Brost, Somalapuram Amaranath,
Christian König, Paulo Zanoni, dri-devel
The XE_PL_TT watermark was set to 50% of system memory.
The idea behind that was unclear since the net effect is that
TT memory will be evicted to TTM_PL_SYSTEM memory if that
watermark is exceeded, requiring PPGTT rebinds and dma
remapping. But there is no similar watermark for TTM_PL_1SYSTEM
memory.
The TTM functionality that tries to swap out system memory to
shmem objects if a 50% limit of total system memory is reached
is orthogonal to this, and with the shrinker added, it's no
longer in effect.
Replace the 50% TTM_PL_TT limit with a 100% limit, in effect
allowing all graphics memory to be bound to the device unless it
has been swapped out by the shrinker.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/xe_ttm_sys_mgr.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 9844a8edbfe1..d38b91872da3 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -108,9 +108,8 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe)
u64 gtt_size;
si_meminfo(&si);
+ /* Potentially restrict amount of TT memory here. */
gtt_size = (u64)si.totalram * si.mem_unit;
- /* TTM limits allocation of all TTM devices by 50% of system memory */
- gtt_size /= 2;
man->use_tt = true;
man->func = &xe_ttm_sys_mgr_func;
--
2.44.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* ✓ CI.Patch_applied: success for TTM shrinker helpers and xe buffer object shrinker (rev7)
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
` (5 preceding siblings ...)
2024-08-16 13:37 ` [PATCH v8 6/6] drm/xe: Increase the XE_PL_TT watermark Thomas Hellström
@ 2024-08-16 13:43 ` Patchwork
2024-08-16 13:43 ` ✗ CI.checkpatch: warning " Patchwork
2024-08-16 13:43 ` ✗ CI.KUnit: failure " Patchwork
8 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2024-08-16 13:43 UTC (permalink / raw)
To: Thomas Hellström; +Cc: intel-xe
== Series Details ==
Series: TTM shrinker helpers and xe buffer object shrinker (rev7)
URL : https://patchwork.freedesktop.org/series/131815/
State : success
== Summary ==
=== Applying kernel patches on branch 'drm-tip' with base: ===
Base commit: cfdb0d68f7d0 drm-tip: 2024y-08m-16d-13h-11m-04s UTC integration manifest
=== git am output follows ===
Applying: drm/ttm: Add a virtual base class for graphics memory backup
Applying: drm/ttm/pool: Provide a helper to shrink pages
Applying: drm/ttm: Use fault-injection to test error paths
Applying: drm/ttm: Add a shrinker helper and export the LRU walker for driver use
Applying: drm/xe: Add a shrinker for xe bos
Applying: drm/xe: Increase the XE_PL_TT watermark
^ permalink raw reply [flat|nested] 17+ messages in thread* ✗ CI.checkpatch: warning for TTM shrinker helpers and xe buffer object shrinker (rev7)
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
` (6 preceding siblings ...)
2024-08-16 13:43 ` ✓ CI.Patch_applied: success for TTM shrinker helpers and xe buffer object shrinker (rev7) Patchwork
@ 2024-08-16 13:43 ` Patchwork
2024-08-16 13:43 ` ✗ CI.KUnit: failure " Patchwork
8 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2024-08-16 13:43 UTC (permalink / raw)
To: Thomas Hellström; +Cc: intel-xe
== Series Details ==
Series: TTM shrinker helpers and xe buffer object shrinker (rev7)
URL : https://patchwork.freedesktop.org/series/131815/
State : warning
== Summary ==
+ KERNEL=/kernel
+ git clone https://gitlab.freedesktop.org/drm/maintainer-tools mt
Cloning into 'mt'...
warning: redirecting to https://gitlab.freedesktop.org/drm/maintainer-tools.git/
+ git -C mt rev-list -n1 origin/master
9fe5037901cabbcdf27a6fe0dfb047ca1474d363
+ cd /kernel
+ git config --global --add safe.directory /kernel
+ git log -n1
commit 091dd2ed8641cb565fa9efb727cb607e1ba1562c
Author: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Fri Aug 16 15:37:17 2024 +0200
drm/xe: Increase the XE_PL_TT watermark
The XE_PL_TT watermark was set to 50% of system memory.
The idea behind that was unclear since the net effect is that
TT memory will be evicted to TTM_PL_SYSTEM memory if that
watermark is exceeded, requiring PPGTT rebinds and dma
remapping. But there is no similar watermark for TTM_PL_1SYSTEM
memory.
The TTM functionality that tries to swap out system memory to
shmem objects if a 50% limit of total system memory is reached
is orthogonal to this, and with the shrinker added, it's no
longer in effect.
Replace the 50% TTM_PL_TT limit with a 100% limit, in effect
allowing all graphics memory to be bound to the device unless it
has been swapped out by the shrinker.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
+ /mt/dim checkpatch cfdb0d68f7d07eecfafb5fda99e6dc313359d425 drm-intel
cb53cbb921d0 drm/ttm: Add a virtual base class for graphics memory backup
-:53: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#53:
new file mode 100644
total: 0 errors, 1 warnings, 0 checks, 284 lines checked
ef1b41da0162 drm/ttm/pool: Provide a helper to shrink pages
b13f481b7963 drm/ttm: Use fault-injection to test error paths
b89c49cf98a0 drm/ttm: Add a shrinker helper and export the LRU walker for driver use
-:12: WARNING:COMMIT_LOG_LONG_LINE: Prefer a maximum 75 chars per line (possible unwrapped commit description?)
#12:
https://lore.kernel.org/linux-mm/b7491378-defd-4f1c-31e2-29e4c77e2d67@amd.com/T/#ma918844aa8a6efe8768fdcda0c6590d5c93850c9
total: 0 errors, 1 warnings, 0 checks, 91 lines checked
ff241299d920 drm/xe: Add a shrinker for xe bos
-:717: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#717:
new file mode 100644
total: 0 errors, 1 warnings, 0 checks, 929 lines checked
091dd2ed8641 drm/xe: Increase the XE_PL_TT watermark
^ permalink raw reply [flat|nested] 17+ messages in thread* ✗ CI.KUnit: failure for TTM shrinker helpers and xe buffer object shrinker (rev7)
2024-08-16 13:37 [PATCH v8 0/6] TTM shrinker helpers and xe buffer object shrinker Thomas Hellström
` (7 preceding siblings ...)
2024-08-16 13:43 ` ✗ CI.checkpatch: warning " Patchwork
@ 2024-08-16 13:43 ` Patchwork
8 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2024-08-16 13:43 UTC (permalink / raw)
To: Thomas Hellström; +Cc: intel-xe
== Series Details ==
Series: TTM shrinker helpers and xe buffer object shrinker (rev7)
URL : https://patchwork.freedesktop.org/series/131815/
State : failure
== Summary ==
+ trap cleanup EXIT
+ /kernel/tools/testing/kunit/kunit.py run --kunitconfig /kernel/drivers/gpu/drm/xe/.kunitconfig
ERROR:root:../drivers/gpu/drm/ttm/ttm_pool.c: In function ‘ttm_pool_restore_tt’:
../drivers/gpu/drm/ttm/ttm_pool.c:456:8: error: implicit declaration of function ‘should_fail’; did you mean ‘schedule_tail’? [-Werror=implicit-function-declaration]
456 | should_fail(&backup_fault_inject, 1)) {
| ^~~~~~~~~~~
| schedule_tail
../drivers/gpu/drm/ttm/ttm_pool.c:456:21: error: ‘backup_fault_inject’ undeclared (first use in this function)
456 | should_fail(&backup_fault_inject, 1)) {
| ^~~~~~~~~~~~~~~~~~~
../drivers/gpu/drm/ttm/ttm_pool.c:456:21: note: each undeclared identifier is reported only once for each function it appears in
../drivers/gpu/drm/ttm/ttm_pool.c: In function ‘ttm_pool_backup_tt’:
../drivers/gpu/drm/ttm/ttm_pool.c:908:57: error: ‘backup_fault_inject’ undeclared (first use in this function)
908 | if (IS_ENABLED(CONFIG_FAULT_INJECTION) && should_fail(&backup_fault_inject, 1))
| ^~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
make[7]: *** [../scripts/Makefile.build:244: drivers/gpu/drm/ttm/ttm_pool.o] Error 1
make[7]: *** Waiting for unfinished jobs....
make[6]: *** [../scripts/Makefile.build:485: drivers/gpu/drm/ttm] Error 2
make[6]: *** Waiting for unfinished jobs....
../lib/iomap.c:156:5: warning: no previous prototype for ‘ioread64_lo_hi’ [-Wmissing-prototypes]
156 | u64 ioread64_lo_hi(const void __iomem *addr)
| ^~~~~~~~~~~~~~
../lib/iomap.c:163:5: warning: no previous prototype for ‘ioread64_hi_lo’ [-Wmissing-prototypes]
163 | u64 ioread64_hi_lo(const void __iomem *addr)
| ^~~~~~~~~~~~~~
../lib/iomap.c:170:5: warning: no previous prototype for ‘ioread64be_lo_hi’ [-Wmissing-prototypes]
170 | u64 ioread64be_lo_hi(const void __iomem *addr)
| ^~~~~~~~~~~~~~~~
../lib/iomap.c:178:5: warning: no previous prototype for ‘ioread64be_hi_lo’ [-Wmissing-prototypes]
178 | u64 ioread64be_hi_lo(const void __iomem *addr)
| ^~~~~~~~~~~~~~~~
../lib/iomap.c:264:6: warning: no previous prototype for ‘iowrite64_lo_hi’ [-Wmissing-prototypes]
264 | void iowrite64_lo_hi(u64 val, void __iomem *addr)
| ^~~~~~~~~~~~~~~
../lib/iomap.c:272:6: warning: no previous prototype for ‘iowrite64_hi_lo’ [-Wmissing-prototypes]
272 | void iowrite64_hi_lo(u64 val, void __iomem *addr)
| ^~~~~~~~~~~~~~~
../lib/iomap.c:280:6: warning: no previous prototype for ‘iowrite64be_lo_hi’ [-Wmissing-prototypes]
280 | void iowrite64be_lo_hi(u64 val, void __iomem *addr)
| ^~~~~~~~~~~~~~~~~
../lib/iomap.c:288:6: warning: no previous prototype for ‘iowrite64be_hi_lo’ [-Wmissing-prototypes]
288 | void iowrite64be_hi_lo(u64 val, void __iomem *addr)
| ^~~~~~~~~~~~~~~~~
make[5]: *** [../scripts/Makefile.build:485: drivers/gpu/drm] Error 2
make[4]: *** [../scripts/Makefile.build:485: drivers/gpu] Error 2
make[3]: *** [../scripts/Makefile.build:485: drivers] Error 2
make[2]: *** [/kernel/Makefile:1925: .] Error 2
make[1]: *** [/kernel/Makefile:224: __sub-make] Error 2
make: *** [Makefile:224: __sub-make] Error 2
[13:43:26] Configuring KUnit Kernel ...
Generating .config ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
[13:43:30] Building KUnit Kernel ...
Populating config with:
$ make ARCH=um O=.kunit olddefconfig
Building with:
$ make ARCH=um O=.kunit --jobs=48
+ cleanup
++ stat -c %u:%g /kernel
+ chown -R 1003:1003 /kernel
^ permalink raw reply [flat|nested] 17+ messages in thread