From: "Souza, Jose" <jose.souza@intel.com>
To: "Das, Nirmoy" <nirmoy.das@intel.com>,
"intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
"nirmoy.das@linux.intel.com" <nirmoy.das@linux.intel.com>,
"Landwerlin, Lionel G" <lionel.g.landwerlin@intel.com>
Subject: Re: [PATCH 3/3] drm/xe/uapi: Introduce VMA bind flag for device atomics
Date: Thu, 11 Apr 2024 14:40:49 +0000 [thread overview]
Message-ID: <bad0ac3232dbc2aebfa1a5a6933167635705c63c.camel@intel.com> (raw)
In-Reply-To: <b357a7f5-0124-485a-ac80-529426d450bd@linux.intel.com>
On Thu, 2024-04-11 at 16:32 +0200, Nirmoy Das wrote:
> Hi Jose,
>
> On 4/11/2024 4:07 PM, Souza, Jose wrote:
> > On Thu, 2024-04-11 at 17:00 +0300, Lionel Landwerlin wrote:
> > > On 11/04/2024 16:42, Nirmoy Das wrote:
> > > > Hi Lionel,
> > > >
> > > > On 4/11/2024 3:14 PM, Lionel Landwerlin wrote:
> > > > > On 10/04/2024 20:03, Nirmoy Das wrote:
> > > > > > Adds a new VMA bind flag to enable device atomics on SMEM only buffers.
> > > > > >
> > > > > > Given that simultaneous usage of device atomics and CPU atomics on
> > > > > > the same SMEM buffer is not guaranteed to function without migration,
> > > > > > and UMD expects no migration for SMEM-only buffer objects, so this
> > > > > > provide
> > > > > > a way to set device atomics when UMD is certain to use the buffer only
> > > > > > for device atomics.
> > > > > >
> > > > > > Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> > > > > > ---
> > > > > > drivers/gpu/drm/xe/xe_vm.c | 27 +++++++++++++++++++++++++--
> > > > > > drivers/gpu/drm/xe/xe_vm_types.h | 2 ++
> > > > > > include/uapi/drm/xe_drm.h | 9 +++++----
> > > > > > 3 files changed, 32 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > > > > index 8f3474c5f480..530b4bbc186c 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > > > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > > > > @@ -851,6 +851,7 @@ static void xe_vma_free(struct xe_vma *vma)
> > > > > > #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
> > > > > > #define VMA_CREATE_FLAG_IS_NULL BIT(1)
> > > > > > #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
> > > > > > +#define VMA_CREATE_FLAG_DEVICE_ATOMICS BIT(3)
> > > > > > static struct xe_vma *xe_vma_create(struct xe_vm *vm,
> > > > > > struct xe_bo *bo,
> > > > > > @@ -864,6 +865,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm
> > > > > > *vm,
> > > > > > bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
> > > > > > bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
> > > > > > bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
> > > > > > + bool enable_atomics = (flags & VMA_CREATE_FLAG_IS_NULL);
> > > > > > xe_assert(vm->xe, start < end);
> > > > > > xe_assert(vm->xe, end < vm->size);
> > > > > > @@ -912,7 +914,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm
> > > > > > *vm,
> > > > > > xe_bo_assert_held(bo);
> > > > > > if (GRAPHICS_VER(vm->xe) >= 20 || xe_bo_is_vram(bo) ||
> > > > > > - !IS_DGFX(vm->xe))
> > > > > > + !IS_DGFX(vm->xe) || enable_atomics)
> > > > > > vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
> > > > > > vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
> > > > > > @@ -2174,6 +2176,18 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm,
> > > > > > struct xe_bo *bo,
> > > > > > operation, (ULL)addr, (ULL)range,
> > > > > > (ULL)bo_offset_or_userptr);
> > > > > > + if (bo && (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> > > > > > + (vm->xe->info.platform == XE_PVC && !xe_bo_is_vram(bo))) {
> > > > > > + drm_warn(&vm->xe->drm, "Setting device atomics on SMEM is
> > > > > > not supported for this platform");
> > > > > > + return ERR_PTR(-EINVAL);
> > > > > > + }
> > > > > > +
> > > > > > + if (bo && (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> > > > > > + !xe_bo_has_single_placement(bo))
> > > > > > + drm_warn(&vm->xe->drm, "DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS
> > > > > > can be only set if the BO has single placement");
> > > > > > + return ERR_PTR(-EINVAL);
> > > > > > + }
> > > > > > +
> > > > > > switch (operation) {
> > > > > > case DRM_XE_VM_BIND_OP_MAP:
> > > > > > case DRM_XE_VM_BIND_OP_MAP_USERPTR:
> > > > > > @@ -2216,6 +2230,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm,
> > > > > > struct xe_bo *bo,
> > > > > > if (__op->op == DRM_GPUVA_OP_MAP) {
> > > > > > op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
> > > > > > op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
> > > > > > + op->map.enable_device_atomics = flags &
> > > > > > DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS;
> > > > > > op->map.pat_index = pat_index;
> > > > > > } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
> > > > > > op->prefetch.region = prefetch_region;
> > > > > > @@ -2412,6 +2427,8 @@ static int vm_bind_ioctl_ops_parse(struct
> > > > > > xe_vm *vm, struct xe_exec_queue *q,
> > > > > > VMA_CREATE_FLAG_IS_NULL : 0;
> > > > > > flags |= op->map.dumpable ?
> > > > > > VMA_CREATE_FLAG_DUMPABLE : 0;
> > > > > > + flags |= op->map.enable_device_atomics ?
> > > > > > + VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
> > > > > > vma = new_vma(vm, &op->base.map, op->map.pat_index,
> > > > > > flags);
> > > > > > @@ -2439,6 +2456,8 @@ static int vm_bind_ioctl_ops_parse(struct
> > > > > > xe_vm *vm, struct xe_exec_queue *q,
> > > > > > flags |= op->base.remap.unmap->va->flags &
> > > > > > XE_VMA_DUMPABLE ?
> > > > > > VMA_CREATE_FLAG_DUMPABLE : 0;
> > > > > > + flags |= op->base.remap.unmap->va->flags ?
> > > > > > + VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
> > > > > > vma = new_vma(vm, op->base.remap.prev,
> > > > > > old->pat_index, flags);
> > > > > > @@ -2476,6 +2495,9 @@ static int vm_bind_ioctl_ops_parse(struct
> > > > > > xe_vm *vm, struct xe_exec_queue *q,
> > > > > > flags |= op->base.remap.unmap->va->flags &
> > > > > > XE_VMA_DUMPABLE ?
> > > > > > VMA_CREATE_FLAG_DUMPABLE : 0;
> > > > > > + flags |= op->base.remap.unmap->va->flags ?
> > > > > > + VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
> > > > > > +
> > > > > > vma = new_vma(vm, op->base.remap.next,
> > > > > > old->pat_index, flags);
> > > > > > @@ -2831,7 +2853,8 @@ static int vm_bind_ioctl_ops_execute(struct
> > > > > > xe_vm *vm,
> > > > > > (DRM_XE_VM_BIND_FLAG_READONLY | \
> > > > > > DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
> > > > > > DRM_XE_VM_BIND_FLAG_NULL | \
> > > > > > - DRM_XE_VM_BIND_FLAG_DUMPABLE)
> > > > > > + DRM_XE_VM_BIND_FLAG_DUMPABLE | \
> > > > > > + DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS)
> > > > > > #define XE_64K_PAGE_MASK 0xffffull
> > > > > > #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
> > > > > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > index badf3945083d..7b9c68909c78 100644
> > > > > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > > > > @@ -282,6 +282,8 @@ struct xe_vma_op_map {
> > > > > > bool dumpable;
> > > > > > /** @pat_index: The pat index to use for this operation. */
> > > > > > u16 pat_index;
> > > > > > + /** @enable_device_atomics: Whether the VMA will allow device
> > > > > > atomics */
> > > > > > + bool enable_device_atomics;
> > > > > > };
> > > > > > /** struct xe_vma_op_remap - VMA remap operation */
> > > > > > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > > > > > index 1446c3bae515..bffe8b1c040c 100644
> > > > > > --- a/include/uapi/drm/xe_drm.h
> > > > > > +++ b/include/uapi/drm/xe_drm.h
> > > > > > @@ -969,10 +969,11 @@ struct drm_xe_vm_bind_op {
> > > > > > /** @op: Bind operation to perform */
> > > > > > __u32 op;
> > > > > > -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
> > > > > > -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
> > > > > > -#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
> > > > > > -#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
> > > > > > +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
> > > > > > +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
> > > > > > +#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
> > > > > > +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
> > > > >
> > > > > We should probably document that it's an error to add this flag if
> > > > > the BO has a single memory region.
> > > > It is an error if this flag is set on non single memory region as
> > > > atomics on SMEM+LMEM buffer will be handled with migration.
> > > >
> > > > Let me know if I got that wrong.
> > > >
> > > > > Are we supposed to the ability to set that flag or is there going to
> > > > > be a query?
> > > > >
> > > > > A query might make sense since it's going to be rejected for some
> > > > > platform.
> > > > Can we use the rejection as a query ? I am using it in a IGT test
> > > > https://patchwork.freedesktop.org/patch/588759/?series=132289&rev=1
> > > >
> > > >
> > That was not acceptable for DRM_XE_VM_BIND_FLAG_DUMPABLE. At the end we were able to land DRM_XE_VM_BIND_FLAG_DUMPABLE in Linux 6.8 so we did not
> > needed the query. But for this one will be needed.
>
> If the rejection can't work as query, I can add one. Would
> adding(DRM_XE_QUERY_CONFIG_SUPP_DEV_ATOMIC_ON_SMEM) a new flag into
> 'struct drm_xe_query_config' work ?
As another bit after DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM? That lgtm but even better would be add a new category like
DRM_XE_QUERY_CONFIG_SUPPORTED_FEATURES and then DRM_XE_QUERY_CONFIG_SUPPORTED_FEATURES_ATOMIC_ON_SMEM (1 << 0).
So we have a u64 just to check for features supported by different KMD versions.
But ask the opinion of other engineers in Xe chat.
>
>
> Regards,
>
> Nirmoy
>
> >
> >
> > > > Regards,
> > > >
> > > > Nirmoy
> > >
> > > Okay, sounds doable.
> > >
> > > Will try to put up an MR for Mesa soon.
> > >
> > >
> > > -Lionel
> > >
> > >
> > > > >
> > > > > -Lionel
> > > > >
> > > > >
> > > > > > +#define DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS (1 << 4)
> > > > > > /** @flags: Bind flags */
> > > > > > __u32 flags;
> > > > >
next prev parent reply other threads:[~2024-04-11 14:40 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-10 17:03 [PATCH 0/3] Enable device atomics with a VM bind flag Nirmoy Das
2024-04-10 17:03 ` [PATCH 1/3] drm/xe: Consolidate setting PTE_AE into one place Nirmoy Das
2024-04-11 23:22 ` Matt Roper
2024-04-12 9:34 ` Nirmoy Das
2024-04-12 11:33 ` Nirmoy Das
2024-04-10 17:03 ` [PATCH 2/3] drm/xe: Add function to check if BO has single placement Nirmoy Das
2024-04-10 17:03 ` [PATCH 3/3] drm/xe/uapi: Introduce VMA bind flag for device atomics Nirmoy Das
2024-04-10 17:35 ` Matthew Brost
2024-04-11 9:22 ` Nirmoy Das
2024-04-11 14:00 ` Nirmoy Das
2024-04-11 13:14 ` Lionel Landwerlin
2024-04-11 13:32 ` Lionel Landwerlin
2024-04-11 13:42 ` Nirmoy Das
2024-04-11 14:00 ` Lionel Landwerlin
2024-04-11 14:07 ` Souza, Jose
2024-04-11 14:32 ` Nirmoy Das
2024-04-11 14:40 ` Souza, Jose [this message]
2024-04-11 14:54 ` Nirmoy Das
2024-04-11 23:44 ` Matt Roper
2024-04-12 8:06 ` Nirmoy Das
2024-04-10 18:04 ` ✓ CI.Patch_applied: success for Enable device atomics with a VM bind flag Patchwork
2024-04-10 18:04 ` ✓ CI.checkpatch: " Patchwork
2024-04-10 18:04 ` ✗ CI.KUnit: failure " Patchwork
2024-04-11 16:22 ` [PATCH 0/3] " Zeng, Oak
2024-04-11 17:00 ` Nirmoy Das
2024-04-11 17:23 ` Zeng, Oak
2024-04-12 5:06 ` Mrozek, Michal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bad0ac3232dbc2aebfa1a5a6933167635705c63c.camel@intel.com \
--to=jose.souza@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=lionel.g.landwerlin@intel.com \
--cc=nirmoy.das@intel.com \
--cc=nirmoy.das@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.