Re: [PATCH v5 2/2] drm/xe: Enable 2M pages in xe_migrate_vram

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Brost,  Matthew" <matthew.brost@intel.com>
Cc: "simon.richter@hogyros.de" <simon.richter@hogyros.de>,
	"Auld, Matthew" <matthew.auld@intel.com>
Subject: Re: [PATCH v5 2/2] drm/xe: Enable 2M pages in xe_migrate_vram
Date: Mon, 13 Oct 2025 17:08:03 +0000	[thread overview]
Message-ID: <a435a2a974b8627c945471e75c4dc21b835e9d22.camel@intel.com> (raw)
In-Reply-To: <20251013034555.4121168-3-matthew.brost@intel.com>

On Sun, 2025-10-12 at 20:45 -0700, Matthew Brost wrote:
> Using 2M pages in xe_migrate_vram has two benefits: we issue fewer
> instructions per 2M copy (1 vs. 512), and the cache hit rate should
> be
> higher. This results in increased copy engine bandwidth, as shown by
> benchmark IGTs.
> 
> Enable 2M pages by reserving PDEs in the migrate VM and using 2M
> pages
> in xe_migrate_vram if the DMA address order matches 2M.
> 
> v2:
>  - Reuse build_pt_update_batch_sram (Stuart)
>  - Fix build_pt_update_batch_sram for PAGE_SIZE > 4K
> v3:
>  - More fixes for PAGE_SIZE > 4K, align chunk, decrement chunk as
> needed
>  - Use stack incr var in xe_migrate_vram_use_pde (Stuart)
> v4:
>  - Split PAGE_SIZE > 4K fix out in different patch (Stuart)
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_migrate.c | 53 ++++++++++++++++++++++++++++---
> --
>  1 file changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index 216fc0ec2bb7..4ca48dd1cfd8 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -57,6 +57,13 @@ struct xe_migrate {
>         u64 usm_batch_base_ofs;
>         /** @cleared_mem_ofs: VM offset of @cleared_bo. */
>         u64 cleared_mem_ofs;
> +       /** @large_page_copy_ofs: VM offset of 2M pages used for
> large copies */
> +       u64 large_page_copy_ofs;
> +       /**
> +        * @large_page_copy_pdes: BO offset to writeout 2M pages
> (PDEs) used for
> +        * large copies
> +        */
> +       u64 large_page_copy_pdes;
>         /**
>          * @fence: dma-fence representing the last migration job
> batch.
>          * Protected by @job_mutex.
> @@ -288,6 +295,12 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>                           (i + 1) * 8, u64, entry);
>         }
>  
> +       /* Reserve 2M PDEs */
> +       level = 1;
> +       m->large_page_copy_ofs = NUM_PT_SLOTS << xe_pt_shift(level);
> +       m->large_page_copy_pdes = map_ofs + XE_PAGE_SIZE * level +
> +               NUM_PT_SLOTS * 8;
> +
>         /* Set up a 1GiB NULL mapping at 255GiB offset. */
>         level = 2;
>         xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255
> * 8, u64,
> @@ -1778,10 +1791,10 @@ static u32 pte_update_cmd_size(u64 size)
>  static void build_pt_update_batch_sram(struct xe_migrate *m,
>                                        struct xe_bb *bb, u32
> pt_offset,
>                                        struct drm_pagemap_addr
> *sram_addr,
> -                                      u32 size)
> +                                      u32 size, int level)
>  {
>         u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
> -       u64 gpu_page_size = 0x1ull << xe_pt_shift(0);
> +       u64 gpu_page_size = 0x1ull << xe_pt_shift(level);
>         u32 ptes;
>         int i = 0;
>  
> @@ -1808,7 +1821,7 @@ static void build_pt_update_batch_sram(struct
> xe_migrate *m,
>  again:
>                         pte = m->q->vm->pt_ops->pte_encode_addr(m-
> >tile->xe,
>                                                                 addr,
> pat_index,
> -                                                               0,
> false, 0);
> +                                                               level
> , false, 0);
>                         bb->cs[bb->len++] = lower_32_bits(pte);
>                         bb->cs[bb->len++] = upper_32_bits(pte);
>  
> @@ -1826,6 +1839,19 @@ static void build_pt_update_batch_sram(struct
> xe_migrate *m,
>         }
>  }
>  
> +static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr
> *sram_addr,
> +                                   unsigned long size)
> +{
> +       u32 large_size = (0x1 << xe_pt_shift(1));
> +       unsigned long i, incr = large_size / PAGE_SIZE;
> +
> +       for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE); i += incr)
> +               if (PAGE_SIZE << sram_addr[i].order != large_size)
> +                       return false;
> +
> +       return true;
> +}
> +
>  enum xe_migrate_copy_dir {
>         XE_MIGRATE_COPY_TO_VRAM,
>         XE_MIGRATE_COPY_TO_SRAM,
> @@ -1855,6 +1881,7 @@ static struct dma_fence *xe_migrate_vram(struct
> xe_migrate *m,
>                 PAGE_SIZE : 4;
>         int err;
>         unsigned long i, j;
> +       bool use_pde = xe_migrate_vram_use_pde(sram_addr, len +
> sram_offset);
>  
>         if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) ||
>                         (sram_offset | vram_addr) &
> XE_CACHELINE_MASK))
> @@ -1879,7 +1906,7 @@ static struct dma_fence *xe_migrate_vram(struct
> xe_migrate *m,
>          * struct drm_pagemap_addr. Ensure this is the case even with
> higher
>          * orders.
>          */
> -       for (i = 0; i < npages;) {
> +       for (i = 0; !use_pde && i < npages;) {

What if the CPU page size is larger than 2M? Don't we still want this?

Thanks,
Stuart

>                 unsigned int order = sram_addr[i].order;
>  
>                 for (j = 1; j < NR_PAGES(order) && i + j < npages;
> j++)
> @@ -1889,16 +1916,26 @@ static struct dma_fence
> *xe_migrate_vram(struct xe_migrate *m,
>                 i += NR_PAGES(order);
>         }
>  
> -       build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
> -                                  sram_addr, len + sram_offset);
> +       if (use_pde)
> +               build_pt_update_batch_sram(m, bb, m-
> >large_page_copy_pdes,
> +                                          sram_addr, len +
> sram_offset, 1);
> +       else
> +               build_pt_update_batch_sram(m, bb, pt_slot *
> XE_PAGE_SIZE,
> +                                          sram_addr, len +
> sram_offset, 0);
>  
>         if (dir == XE_MIGRATE_COPY_TO_VRAM) {
> -               src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) +
> sram_offset;
> +               if (use_pde)
> +                       src_L0_ofs = m->large_page_copy_ofs +
> sram_offset;
> +               else
> +                       src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) +
> sram_offset;
>                 dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr,
> false);
>  
>         } else {
>                 src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr,
> false);
> -               dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) +
> sram_offset;
> +               if (use_pde)
> +                       dst_L0_ofs = m->large_page_copy_ofs +
> sram_offset;
> +               else
> +                       dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) +
> sram_offset;
>         }
>  
>         bb->cs[bb->len++] = MI_BATCH_BUFFER_END;

next prev parent reply	other threads:[~2025-10-13 17:08 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-13  3:45 [PATCH v5 0/2] Different page size handle in migrate layer Matthew Brost
2025-10-13  3:45 ` [PATCH v5 1/2] drm/xe: Fix build_pt_update_batch_sram for non-4K PAGE_SIZE Matthew Brost
2025-10-13 16:38   ` [v5,1/2] " Simon Richter
2025-10-13 16:53   ` [PATCH v5 1/2] " Summers, Stuart
2025-10-13  3:45 ` [PATCH v5 2/2] drm/xe: Enable 2M pages in xe_migrate_vram Matthew Brost
2025-10-13 17:08   ` Summers, Stuart [this message]
2025-10-13 17:14     ` Matthew Brost
2025-10-13 17:22       ` Summers, Stuart
2025-10-13 17:34         ` Matthew Brost
2025-10-13 18:01           ` Summers, Stuart
2025-10-14  2:17         ` Simon Richter
2025-10-14  3:08           ` Summers, Stuart
2025-10-13 17:29       ` Simon.Richter
2025-10-13 17:32         ` Matthew Brost
2025-10-13  5:38 ` ✓ CI.KUnit: success for Different page size handle in migrate layer (rev2) Patchwork
2025-10-13  6:23 ` ✓ Xe.CI.BAT: " Patchwork
2025-10-13  6:51 ` ✓ Xe.CI.Full: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a435a2a974b8627c945471e75c4dc21b835e9d22.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=simon.richter@hogyros.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox