* [PATCH v2 01/13] drm/gem: Add a drm_gem_object_funcs::sync() and a drm_gem_sync() helper
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 10:11 ` [PATCH v2 02/13] drm/prime: Provide default ::{begin, end}_cpu_access() implementations Boris Brezillon
` (11 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
Prepare things for standardizing synchronization around CPU accesses
of GEM buffers. This will be used to provide default
drm_gem_dmabuf_{begin,end}_cpu_access() implementations, and provide
a way for drivers to add their own ioctls to synchronize CPU
writes/reads when they can't do it directly from userland.
v2:
- New commit
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/drm_gem.c | 10 +++++++++
include/drm/drm_gem.h | 45 +++++++++++++++++++++++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index cbeb76b2124f..ec92ea81d9db 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1333,6 +1333,16 @@ void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
}
EXPORT_SYMBOL(drm_gem_vunmap);
+int drm_gem_sync(struct drm_gem_object *obj, size_t offset, size_t size,
+ enum drm_gem_object_access_flags access)
+{
+ if (obj->funcs->sync)
+ return obj->funcs->sync(obj, offset, size, access);
+
+ return 0;
+}
+EXPORT_SYMBOL(drm_gem_sync);
+
/**
* drm_gem_lock_reservations - Sets up the ww context and acquires
* the lock on an array of GEM objects.
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 8d48d2af2649..1c33e59ab305 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -66,6 +66,33 @@ enum drm_gem_object_status {
DRM_GEM_OBJECT_ACTIVE = BIT(2),
};
+/**
+ * enum drm_gem_object_status - bitmask describing GEM access types to prepare for
+ */
+enum drm_gem_object_access_flags {
+ /** @DRM_GEM_OBJECT_CPU_ACCESS: Prepare for a CPU access. */
+ DRM_GEM_OBJECT_CPU_ACCESS = 0,
+
+ /** @DRM_GEM_OBJECT_DEV_ACCESS: Prepare for a device access. */
+ DRM_GEM_OBJECT_DEV_ACCESS = BIT(0),
+
+ /** @DRM_GEM_OBJECT_ACCESSOR_MASK: Mask used to check the entity doing the access. */
+ DRM_GEM_OBJECT_ACCESSOR_MASK = BIT(0),
+
+ /** @DRM_GEM_OBJECT_READ_ACCESS: Prepare for read-only accesses. */
+ DRM_GEM_OBJECT_READ_ACCESS = BIT(1),
+
+ /** @DRM_GEM_OBJECT_WRITE_ACCESS: Prepare for write-only accesses. */
+ DRM_GEM_OBJECT_WRITE_ACCESS = BIT(2),
+
+ /** @DRM_GEM_OBJECT_RW_ACCESS: Prepare for a read/write accesses. */
+ DRM_GEM_OBJECT_RW_ACCESS = DRM_GEM_OBJECT_READ_ACCESS |
+ DRM_GEM_OBJECT_WRITE_ACCESS,
+
+ /** @DRM_GEM_OBJECT_ACCESS_TYPE_MASK: Mask used to check the access type. */
+ DRM_GEM_OBJECT_ACCESS_TYPE_MASK = DRM_GEM_OBJECT_RW_ACCESS,
+};
+
/**
* struct drm_gem_object_funcs - GEM object functions
*/
@@ -191,6 +218,21 @@ struct drm_gem_object_funcs {
*/
int (*mmap)(struct drm_gem_object *obj, struct vm_area_struct *vma);
+ /**
+ * @sync:
+ *
+ * Prepare for CPU/device access. This can involve migration of
+ * a buffer to the system-RAM/VRAM, or for UMA, flushing/invalidating
+ * the CPU caches. The range can be used to optimize the synchronization
+ * when possible.
+ *
+ * Returns 0 on success, -errno otherwise.
+ *
+ * This callback is optional.
+ */
+ int (*sync)(struct drm_gem_object *obj, size_t offset, size_t size,
+ enum drm_gem_object_access_flags access);
+
/**
* @evict:
*
@@ -559,6 +601,9 @@ void drm_gem_unlock(struct drm_gem_object *obj);
int drm_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map);
void drm_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map);
+int drm_gem_sync(struct drm_gem_object *obj, size_t offset, size_t size,
+ enum drm_gem_object_access_flags access);
+
int drm_gem_objects_lookup(struct drm_file *filp, void __user *bo_handles,
int count, struct drm_gem_object ***objs_out);
struct drm_gem_object *drm_gem_object_lookup(struct drm_file *filp, u32 handle);
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH v2 02/13] drm/prime: Provide default ::{begin, end}_cpu_access() implementations
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
2025-10-10 10:11 ` [PATCH v2 01/13] drm/gem: Add a drm_gem_object_funcs::sync() and a drm_gem_sync() helper Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:11 ` [PATCH v2 02/13] drm/prime: Provide default ::{begin,end}_cpu_access() implementations Steven Price
2025-10-10 10:11 ` [PATCH v2 03/13] drm/shmem: Add a drm_gem_shmem_sync() helper Boris Brezillon
` (10 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
Hook-up drm_gem_dmabuf_{begin,end}_cpu_access() to drm_gem_sync() so
that drivers relying on the default prime_dmabuf_ops can still have
a way to prepare for CPU accesses from outside the UMD.
v2:
- New commit
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/drm_prime.c | 36 ++++++++++++++++++++++++++++++++++++
include/drm/drm_prime.h | 5 +++++
2 files changed, 41 insertions(+)
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 43a10b4af43a..03c09f9ab129 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -823,6 +823,40 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
}
EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
+int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_CPU_ACCESS;
+
+ if (direction == DMA_FROM_DEVICE)
+ access |= DRM_GEM_OBJECT_READ_ACCESS;
+ else if (direction == DMA_BIDIRECTIONAL)
+ access |= DRM_GEM_OBJECT_RW_ACCESS;
+ else
+ return -EINVAL;
+
+ return drm_gem_sync(obj, 0, obj->size, access);
+}
+EXPORT_SYMBOL(drm_gem_dmabuf_begin_cpu_access);
+
+int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
+{
+ struct drm_gem_object *obj = dma_buf->priv;
+ enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_DEV_ACCESS;
+
+ if (direction == DMA_TO_DEVICE)
+ access |= DRM_GEM_OBJECT_READ_ACCESS;
+ else if (direction == DMA_BIDIRECTIONAL)
+ access |= DRM_GEM_OBJECT_RW_ACCESS;
+ else
+ return -EINVAL;
+
+ return drm_gem_sync(obj, 0, obj->size, access);
+}
+EXPORT_SYMBOL(drm_gem_dmabuf_end_cpu_access);
+
static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = {
.attach = drm_gem_map_attach,
.detach = drm_gem_map_detach,
@@ -832,6 +866,8 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = {
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
+ .begin_cpu_access = drm_gem_dmabuf_begin_cpu_access,
+ .end_cpu_access = drm_gem_dmabuf_end_cpu_access,
};
/**
diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
index f50f862f0d8b..052fba039bb6 100644
--- a/include/drm/drm_prime.h
+++ b/include/drm/drm_prime.h
@@ -92,6 +92,11 @@ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map);
int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma);
+int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction);
+int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction);
+
struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
struct page **pages, unsigned int nr_pages);
struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj,
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 02/13] drm/prime: Provide default ::{begin,end}_cpu_access() implementations
2025-10-10 10:11 ` [PATCH v2 02/13] drm/prime: Provide default ::{begin, end}_cpu_access() implementations Boris Brezillon
@ 2025-10-10 14:11 ` Steven Price
2025-10-10 15:03 ` Boris Brezillon
0 siblings, 1 reply; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:11 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> Hook-up drm_gem_dmabuf_{begin,end}_cpu_access() to drm_gem_sync() so
> that drivers relying on the default prime_dmabuf_ops can still have
> a way to prepare for CPU accesses from outside the UMD.
>
> v2:
> - New commit
>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> drivers/gpu/drm/drm_prime.c | 36 ++++++++++++++++++++++++++++++++++++
> include/drm/drm_prime.h | 5 +++++
> 2 files changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> index 43a10b4af43a..03c09f9ab129 100644
> --- a/drivers/gpu/drm/drm_prime.c
> +++ b/drivers/gpu/drm/drm_prime.c
> @@ -823,6 +823,40 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
> }
> EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
>
> +int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
> + enum dma_data_direction direction)
> +{
> + struct drm_gem_object *obj = dma_buf->priv;
> + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_CPU_ACCESS;
> +
> + if (direction == DMA_FROM_DEVICE)
> + access |= DRM_GEM_OBJECT_READ_ACCESS;
> + else if (direction == DMA_BIDIRECTIONAL)
> + access |= DRM_GEM_OBJECT_RW_ACCESS;
> + else
> + return -EINVAL;
> +
> + return drm_gem_sync(obj, 0, obj->size, access);
> +}
> +EXPORT_SYMBOL(drm_gem_dmabuf_begin_cpu_access);
> +
> +int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
> + enum dma_data_direction direction)
> +{
> + struct drm_gem_object *obj = dma_buf->priv;
> + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_DEV_ACCESS;
> +
> + if (direction == DMA_TO_DEVICE)
> + access |= DRM_GEM_OBJECT_READ_ACCESS;
> + else if (direction == DMA_BIDIRECTIONAL)
> + access |= DRM_GEM_OBJECT_RW_ACCESS;
> + else
> + return -EINVAL;
> +
> + return drm_gem_sync(obj, 0, obj->size, access);
> +}
> +EXPORT_SYMBOL(drm_gem_dmabuf_end_cpu_access);
I feel I must be missing something, but why does
drm_gem_dmabuf_begin_cpu_access() reject DMA_TO_DEVICE and
drm_gem_dmabuf_end_cpu_access() reject DMA_FROM_DEVICE?
My understanding is that these begin/end calls should be bracketing the
operation and the same direction should be specified for each.
Thanks,
Steve
> +
> static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = {
> .attach = drm_gem_map_attach,
> .detach = drm_gem_map_detach,
> @@ -832,6 +866,8 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = {
> .mmap = drm_gem_dmabuf_mmap,
> .vmap = drm_gem_dmabuf_vmap,
> .vunmap = drm_gem_dmabuf_vunmap,
> + .begin_cpu_access = drm_gem_dmabuf_begin_cpu_access,
> + .end_cpu_access = drm_gem_dmabuf_end_cpu_access,
> };
>
> /**
> diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
> index f50f862f0d8b..052fba039bb6 100644
> --- a/include/drm/drm_prime.h
> +++ b/include/drm/drm_prime.h
> @@ -92,6 +92,11 @@ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map);
> int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
> int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma);
>
> +int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
> + enum dma_data_direction direction);
> +int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
> + enum dma_data_direction direction);
> +
> struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
> struct page **pages, unsigned int nr_pages);
> struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj,
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 02/13] drm/prime: Provide default ::{begin,end}_cpu_access() implementations
2025-10-10 14:11 ` [PATCH v2 02/13] drm/prime: Provide default ::{begin,end}_cpu_access() implementations Steven Price
@ 2025-10-10 15:03 ` Boris Brezillon
2025-10-10 15:34 ` Steven Price
0 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 15:03 UTC (permalink / raw)
To: Steven Price
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On Fri, 10 Oct 2025 15:11:54 +0100
Steven Price <steven.price@arm.com> wrote:
> On 10/10/2025 11:11, Boris Brezillon wrote:
> > Hook-up drm_gem_dmabuf_{begin,end}_cpu_access() to drm_gem_sync() so
> > that drivers relying on the default prime_dmabuf_ops can still have
> > a way to prepare for CPU accesses from outside the UMD.
> >
> > v2:
> > - New commit
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > ---
> > drivers/gpu/drm/drm_prime.c | 36 ++++++++++++++++++++++++++++++++++++
> > include/drm/drm_prime.h | 5 +++++
> > 2 files changed, 41 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> > index 43a10b4af43a..03c09f9ab129 100644
> > --- a/drivers/gpu/drm/drm_prime.c
> > +++ b/drivers/gpu/drm/drm_prime.c
> > @@ -823,6 +823,40 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
> > }
> > EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
> >
> > +int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
> > + enum dma_data_direction direction)
> > +{
> > + struct drm_gem_object *obj = dma_buf->priv;
> > + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_CPU_ACCESS;
> > +
> > + if (direction == DMA_FROM_DEVICE)
> > + access |= DRM_GEM_OBJECT_READ_ACCESS;
> > + else if (direction == DMA_BIDIRECTIONAL)
> > + access |= DRM_GEM_OBJECT_RW_ACCESS;
> > + else
> > + return -EINVAL;
> > +
> > + return drm_gem_sync(obj, 0, obj->size, access);
> > +}
> > +EXPORT_SYMBOL(drm_gem_dmabuf_begin_cpu_access);
> > +
> > +int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
> > + enum dma_data_direction direction)
> > +{
> > + struct drm_gem_object *obj = dma_buf->priv;
> > + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_DEV_ACCESS;
> > +
> > + if (direction == DMA_TO_DEVICE)
> > + access |= DRM_GEM_OBJECT_READ_ACCESS;
> > + else if (direction == DMA_BIDIRECTIONAL)
> > + access |= DRM_GEM_OBJECT_RW_ACCESS;
> > + else
> > + return -EINVAL;
> > +
> > + return drm_gem_sync(obj, 0, obj->size, access);
> > +}
> > +EXPORT_SYMBOL(drm_gem_dmabuf_end_cpu_access);
>
> I feel I must be missing something, but why does
> drm_gem_dmabuf_begin_cpu_access() reject DMA_TO_DEVICE and
> drm_gem_dmabuf_end_cpu_access() reject DMA_FROM_DEVICE?
Not really sure what it means to prepare for dev access and synchronize
with what the device might have changed in memory. Sounds like device
-> device synchronization, which is not what this API is for.
Similarly preparing for CPU access with TO_DEVICE (AKA forcing previous
CPU changes to be visible to the device) doesn't make sense either.
>
> My understanding is that these begin/end calls should be bracketing the
> operation and the same direction should be specified for each.
If [1] is correct and the begin/end_cpu_access() is based on the
dma_sync_ semantics, nope, that's not how it's supposed to work. The
way I see it, it just expresses the cache operations you want to take
place around your CPU access.
If you read data from the CPU, you want dir=FROM_DEVICE in your
begin_cpu_access(), so that the CPU caches are invalidated. If you
write from the CPU, you want dir=TO_DEVICE in your end_cpu_access. If
you know you will be reading again soon, you might want to pass
dir=BIDIR in your end_cpu_access(), though I'm not too sure what's the
benefit of that to be honest.
[1]https://elixir.bootlin.com/linux/v6.17.1/source/drivers/gpu/drm/tegra/gem.c#L684
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 02/13] drm/prime: Provide default ::{begin,end}_cpu_access() implementations
2025-10-10 15:03 ` Boris Brezillon
@ 2025-10-10 15:34 ` Steven Price
2025-10-10 16:02 ` Boris Brezillon
0 siblings, 1 reply; 33+ messages in thread
From: Steven Price @ 2025-10-10 15:34 UTC (permalink / raw)
To: Boris Brezillon
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On 10/10/2025 16:03, Boris Brezillon wrote:
> On Fri, 10 Oct 2025 15:11:54 +0100
> Steven Price <steven.price@arm.com> wrote:
>
>> On 10/10/2025 11:11, Boris Brezillon wrote:
>>> Hook-up drm_gem_dmabuf_{begin,end}_cpu_access() to drm_gem_sync() so
>>> that drivers relying on the default prime_dmabuf_ops can still have
>>> a way to prepare for CPU accesses from outside the UMD.
>>>
>>> v2:
>>> - New commit
>>>
>>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>>> ---
>>> drivers/gpu/drm/drm_prime.c | 36 ++++++++++++++++++++++++++++++++++++
>>> include/drm/drm_prime.h | 5 +++++
>>> 2 files changed, 41 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>>> index 43a10b4af43a..03c09f9ab129 100644
>>> --- a/drivers/gpu/drm/drm_prime.c
>>> +++ b/drivers/gpu/drm/drm_prime.c
>>> @@ -823,6 +823,40 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
>>> }
>>> EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
>>>
>>> +int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
>>> + enum dma_data_direction direction)
>>> +{
>>> + struct drm_gem_object *obj = dma_buf->priv;
>>> + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_CPU_ACCESS;
>>> +
>>> + if (direction == DMA_FROM_DEVICE)
>>> + access |= DRM_GEM_OBJECT_READ_ACCESS;
>>> + else if (direction == DMA_BIDIRECTIONAL)
>>> + access |= DRM_GEM_OBJECT_RW_ACCESS;
>>> + else
>>> + return -EINVAL;
>>> +
>>> + return drm_gem_sync(obj, 0, obj->size, access);
>>> +}
>>> +EXPORT_SYMBOL(drm_gem_dmabuf_begin_cpu_access);
>>> +
>>> +int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
>>> + enum dma_data_direction direction)
>>> +{
>>> + struct drm_gem_object *obj = dma_buf->priv;
>>> + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_DEV_ACCESS;
>>> +
>>> + if (direction == DMA_TO_DEVICE)
>>> + access |= DRM_GEM_OBJECT_READ_ACCESS;
>>> + else if (direction == DMA_BIDIRECTIONAL)
>>> + access |= DRM_GEM_OBJECT_RW_ACCESS;
>>> + else
>>> + return -EINVAL;
>>> +
>>> + return drm_gem_sync(obj, 0, obj->size, access);
>>> +}
>>> +EXPORT_SYMBOL(drm_gem_dmabuf_end_cpu_access);
>>
>> I feel I must be missing something, but why does
>> drm_gem_dmabuf_begin_cpu_access() reject DMA_TO_DEVICE and
>> drm_gem_dmabuf_end_cpu_access() reject DMA_FROM_DEVICE?
>
> Not really sure what it means to prepare for dev access and synchronize
> with what the device might have changed in memory. Sounds like device
> -> device synchronization, which is not what this API is for.
>
> Similarly preparing for CPU access with TO_DEVICE (AKA forcing previous
> CPU changes to be visible to the device) doesn't make sense either.
Well those operations might well be no-ops, but we shouldn't return an
error just because of that.
>>
>> My understanding is that these begin/end calls should be bracketing the
>> operation and the same direction should be specified for each.
>
> If [1] is correct and the begin/end_cpu_access() is based on the
> dma_sync_ semantics, nope, that's not how it's supposed to work. The
> way I see it, it just expresses the cache operations you want to take
> place around your CPU access.
Well that function completely ignored the direction flag. Looking at a
caller of the APIs it seems very common to bracket the work with the
same direction - admittedly they are all DMA_FROM_DEVICE examples I've
found. E.g. a random FB driver:
https://elixir.bootlin.com/linux/v6.17.1/source/drivers/gpu/drm/solomon/ssd130x.c#L1026
> drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
I have to admit I don't really know what the point of this is - and the
error return isn't used beyond a drm_err(). But we don't want to spam
the logs with errors!
> If you read data from the CPU, you want dir=FROM_DEVICE in your
> begin_cpu_access(), so that the CPU caches are invalidated. If you
> write from the CPU, you want dir=TO_DEVICE in your end_cpu_access. If
> you know you will be reading again soon, you might want to pass
> dir=BIDIR in your end_cpu_access(), though I'm not too sure what's the
> benefit of that to be honest.
>
> [1]https://elixir.bootlin.com/linux/v6.17.1/source/drivers/gpu/drm/tegra/gem.c#L684
The documentation[2] states:
> * - Fallback operations in the kernel, for example when a device is connected
> * over USB and the kernel needs to shuffle the data around first before
> * sending it away. Cache coherency is handled by bracketing any transactions
> * with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access()
> * access.
So I guess the purpose is where it's not "cache coherency" as such, but
actually having to shuffle the data between different memories.
Thanks,
Steve
[2]
https://elixir.bootlin.com/linux/v6.17.1/source/drivers/dma-buf/dma-buf.c#L1282
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 02/13] drm/prime: Provide default ::{begin,end}_cpu_access() implementations
2025-10-10 15:34 ` Steven Price
@ 2025-10-10 16:02 ` Boris Brezillon
0 siblings, 0 replies; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 16:02 UTC (permalink / raw)
To: Steven Price
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On Fri, 10 Oct 2025 16:34:14 +0100
Steven Price <steven.price@arm.com> wrote:
> On 10/10/2025 16:03, Boris Brezillon wrote:
> > On Fri, 10 Oct 2025 15:11:54 +0100
> > Steven Price <steven.price@arm.com> wrote:
> >
> >> On 10/10/2025 11:11, Boris Brezillon wrote:
> >>> Hook-up drm_gem_dmabuf_{begin,end}_cpu_access() to drm_gem_sync() so
> >>> that drivers relying on the default prime_dmabuf_ops can still have
> >>> a way to prepare for CPU accesses from outside the UMD.
> >>>
> >>> v2:
> >>> - New commit
> >>>
> >>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> >>> ---
> >>> drivers/gpu/drm/drm_prime.c | 36 ++++++++++++++++++++++++++++++++++++
> >>> include/drm/drm_prime.h | 5 +++++
> >>> 2 files changed, 41 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> >>> index 43a10b4af43a..03c09f9ab129 100644
> >>> --- a/drivers/gpu/drm/drm_prime.c
> >>> +++ b/drivers/gpu/drm/drm_prime.c
> >>> @@ -823,6 +823,40 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
> >>> }
> >>> EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
> >>>
> >>> +int drm_gem_dmabuf_begin_cpu_access(struct dma_buf *dma_buf,
> >>> + enum dma_data_direction direction)
> >>> +{
> >>> + struct drm_gem_object *obj = dma_buf->priv;
> >>> + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_CPU_ACCESS;
> >>> +
> >>> + if (direction == DMA_FROM_DEVICE)
> >>> + access |= DRM_GEM_OBJECT_READ_ACCESS;
> >>> + else if (direction == DMA_BIDIRECTIONAL)
> >>> + access |= DRM_GEM_OBJECT_RW_ACCESS;
> >>> + else
> >>> + return -EINVAL;
> >>> +
> >>> + return drm_gem_sync(obj, 0, obj->size, access);
> >>> +}
> >>> +EXPORT_SYMBOL(drm_gem_dmabuf_begin_cpu_access);
> >>> +
> >>> +int drm_gem_dmabuf_end_cpu_access(struct dma_buf *dma_buf,
> >>> + enum dma_data_direction direction)
> >>> +{
> >>> + struct drm_gem_object *obj = dma_buf->priv;
> >>> + enum drm_gem_object_access_flags access = DRM_GEM_OBJECT_DEV_ACCESS;
> >>> +
> >>> + if (direction == DMA_TO_DEVICE)
> >>> + access |= DRM_GEM_OBJECT_READ_ACCESS;
> >>> + else if (direction == DMA_BIDIRECTIONAL)
> >>> + access |= DRM_GEM_OBJECT_RW_ACCESS;
> >>> + else
> >>> + return -EINVAL;
> >>> +
> >>> + return drm_gem_sync(obj, 0, obj->size, access);
> >>> +}
> >>> +EXPORT_SYMBOL(drm_gem_dmabuf_end_cpu_access);
> >>
> >> I feel I must be missing something, but why does
> >> drm_gem_dmabuf_begin_cpu_access() reject DMA_TO_DEVICE and
> >> drm_gem_dmabuf_end_cpu_access() reject DMA_FROM_DEVICE?
> >
> > Not really sure what it means to prepare for dev access and synchronize
> > with what the device might have changed in memory. Sounds like device
> > -> device synchronization, which is not what this API is for.
> >
> > Similarly preparing for CPU access with TO_DEVICE (AKA forcing previous
> > CPU changes to be visible to the device) doesn't make sense either.
>
> Well those operations might well be no-ops, but we shouldn't return an
> error just because of that.
Fair enough.
>
> >>
> >> My understanding is that these begin/end calls should be bracketing the
> >> operation and the same direction should be specified for each.
> >
> > If [1] is correct and the begin/end_cpu_access() is based on the
> > dma_sync_ semantics, nope, that's not how it's supposed to work. The
> > way I see it, it just expresses the cache operations you want to take
> > place around your CPU access.
>
> Well that function completely ignored the direction flag. Looking at a
> caller of the APIs it seems very common to bracket the work with the
> same direction - admittedly they are all DMA_FROM_DEVICE examples I've
> found. E.g. a random FB driver:
>
> https://elixir.bootlin.com/linux/v6.17.1/source/drivers/gpu/drm/solomon/ssd130x.c#L1026
>
> > drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
>
> I have to admit I don't really know what the point of this is - and the
> error return isn't used beyond a drm_err(). But we don't want to spam
> the logs with errors!
Sure, I can return zero instead of an error.
>
> > If you read data from the CPU, you want dir=FROM_DEVICE in your
> > begin_cpu_access(), so that the CPU caches are invalidated. If you
> > write from the CPU, you want dir=TO_DEVICE in your end_cpu_access. If
> > you know you will be reading again soon, you might want to pass
> > dir=BIDIR in your end_cpu_access(), though I'm not too sure what's the
> > benefit of that to be honest.
> >
> > [1]https://elixir.bootlin.com/linux/v6.17.1/source/drivers/gpu/drm/tegra/gem.c#L684
>
> The documentation[2] states:
>
> > * - Fallback operations in the kernel, for example when a device is connected
> > * over USB and the kernel needs to shuffle the data around first before
> > * sending it away. Cache coherency is handled by bracketing any transactions
> > * with calls to dma_buf_begin_cpu_access() and dma_buf_end_cpu_access()
> > * access.
>
> So I guess the purpose is where it's not "cache coherency" as such, but
> actually having to shuffle the data between different memories.
The fact it's using the same flags that are used for dma_sync_ ops
makes it so confusing :face_palm:. I wish these were separate R/W
flags instead encoding what the CPU intends to do, and that the API
would make it clear that the same flags should be passed on both begin
and end. Anyway, I guess returning 0 instead of an error fixes the
problem, so I'll do that.
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 03/13] drm/shmem: Add a drm_gem_shmem_sync() helper
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
2025-10-10 10:11 ` [PATCH v2 01/13] drm/gem: Add a drm_gem_object_funcs::sync() and a drm_gem_sync() helper Boris Brezillon
2025-10-10 10:11 ` [PATCH v2 02/13] drm/prime: Provide default ::{begin, end}_cpu_access() implementations Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 10:11 ` [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD Boris Brezillon
` (9 subsequent siblings)
12 siblings, 0 replies; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
From: Faith Ekstrand <faith.ekstrand@collabora.com>
This enables syncing mapped GEM objects between the CPU and GPU via calls
to dma_sync_*(). It's a bit annoying as it requires walking the sg_table
so it's best if every driver doesn't hand-roll it.
When we're dealing with a dma-buf, the synchronization requests are
forwarded to the exporter through dma_buf_{begin,end}_cpu_access().
We provide a drm_gem_shmem_object_sync() for drivers that wants to
have this default implementation as their drm_gem_object_funcs::sync,
but we don't set drm_gem_shmem_funcs::sync to
drm_gem_shmem_object_sync() to avoid changing the behavior of drivers
currently relying on the default gem_shmem vtable.
v2:
- s/drm_gem_shmem_sync_mmap/drm_gem_shmem_sync/
- Change the prototype to match drm_gem_object_funcs::sync()
- Add a wrapper for drm_gem_object_funcs::sync()
Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/drm_gem_shmem_helper.c | 89 ++++++++++++++++++++++++++
include/drm/drm_gem_shmem_helper.h | 11 ++++
2 files changed, 100 insertions(+)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index dc94a27710e5..4094bd243cc8 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -690,6 +690,95 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_mmap);
+/**
+ * drm_gem_shmem_sync - Sync CPU-mapped data to/from the device
+ * @shmem: shmem GEM object
+ * @offset: Offset into the GEM object
+ * @size: Size of the area to sync
+ * @access: Flags describing the access to sync for
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+int drm_gem_shmem_sync(struct drm_gem_shmem_object *shmem, size_t offset,
+ size_t size, enum drm_gem_object_access_flags access)
+{
+ bool for_dev = (access & DRM_GEM_OBJECT_ACCESSOR_MASK) == DRM_GEM_OBJECT_DEV_ACCESS;
+ u32 access_type = access & DRM_GEM_OBJECT_ACCESS_TYPE_MASK;
+ struct drm_device *dev = shmem->base.dev;
+ enum dma_data_direction dir;
+ struct sg_table *sgt;
+ struct scatterlist *sgl;
+ unsigned int count;
+
+ if (access_type == DRM_GEM_OBJECT_RW_ACCESS)
+ dir = DMA_BIDIRECTIONAL;
+ else if (access_type == DRM_GEM_OBJECT_READ_ACCESS)
+ dir = for_dev ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ else if (access_type == DRM_GEM_OBJECT_WRITE_ACCESS)
+ dir = for_dev ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ else
+ return 0;
+
+ /* Don't bother if it's WC-mapped */
+ if (shmem->map_wc)
+ return 0;
+
+ if (size == 0)
+ return 0;
+
+ if (offset + size < offset || offset + size > shmem->base.size)
+ return -EINVAL;
+
+ if (drm_gem_is_imported(&shmem->base)) {
+ /* We can't do fine-grained syncs with dma_buf and there's no
+ * easy way to guarantee that CPU caches/memory won't get
+ * impacted by the buffer-wide synchronization, so let's fail
+ * instead of pretending we can cope with that.
+ */
+ if (offset != 0 || size != shmem->base.size)
+ return -EINVAL;
+
+ struct dma_buf *dma_buf = shmem->base.import_attach->dmabuf;
+
+ if (for_dev)
+ return dma_buf_end_cpu_access(dma_buf, dir);
+ else
+ return dma_buf_begin_cpu_access(dma_buf, dir);
+ }
+
+ sgt = drm_gem_shmem_get_pages_sgt(shmem);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ for_each_sgtable_dma_sg(sgt, sgl, count) {
+ if (size == 0)
+ break;
+
+ dma_addr_t paddr = sg_dma_address(sgl);
+ size_t len = sg_dma_len(sgl);
+
+ if (len <= offset) {
+ offset -= len;
+ continue;
+ }
+
+ paddr += offset;
+ len -= offset;
+ len = min_t(size_t, len, size);
+ size -= len;
+ offset = 0;
+
+ if (for_dev)
+ dma_sync_single_for_device(dev->dev, paddr, len, dir);
+ else
+ dma_sync_single_for_cpu(dev->dev, paddr, len, dir);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_sync);
+
/**
* drm_gem_shmem_print_info() - Print &drm_gem_shmem_object info for debugfs
* @shmem: shmem GEM object
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 589f7bfe7506..6363e4ac9163 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -123,6 +123,8 @@ int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem,
void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
struct iosys_map *map);
int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma);
+int drm_gem_shmem_sync(struct drm_gem_shmem_object *shmem, size_t offset,
+ size_t size, enum drm_gem_object_access_flags access);
int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem);
void drm_gem_shmem_unpin_locked(struct drm_gem_shmem_object *shmem);
@@ -279,6 +281,15 @@ static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct v
return drm_gem_shmem_mmap(shmem, vma);
}
+static inline int
+drm_gem_shmem_object_sync(struct drm_gem_object *obj, size_t offset,
+ size_t size, enum drm_gem_object_access_flags access)
+{
+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+ return drm_gem_shmem_sync(shmem, offset, size, access);
+}
+
/*
* Driver ops
*/
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (2 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 03/13] drm/shmem: Add a drm_gem_shmem_sync() helper Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:22 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 05/13] drm/panthor: Add a PANTHOR_BO_SYNC ioctl Boris Brezillon
` (8 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
If we want to be able to skip CPU cache maintenance operations on
CPU-cached mappings, the UMD needs to know the kind of coherency
in place. Add a field to drm_panthor_gpu_info to do that. We can re-use
a padding field for that since this object is write-only from the
KMD perspective, and the UMD should just ignore it.
v2:
- New commit
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_device.c | 6 +++-
drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++--
3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index c7033d82cef5..afe24a03a71c 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -25,6 +25,8 @@
static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
{
+ /* Start with no coherency, and update it if the device is flagged coherent. */
+ ptdev->gpu_info.selected_coherency = GPU_COHERENCY_NONE;
ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
if (!ptdev->coherent)
@@ -34,8 +36,10 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
* ACE protocol has never been supported for command stream frontend GPUs.
*/
if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) &
- GPU_COHERENCY_PROT_BIT(ACE_LITE)))
+ GPU_COHERENCY_PROT_BIT(ACE_LITE))) {
+ ptdev->gpu_info.selected_coherency = GPU_COHERENCY_PROT_BIT(ACE_LITE);
return 0;
+ }
drm_err(&ptdev->base, "Coherency not supported by the device");
return -ENOTSUPP;
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index 9d98720ce03f..a95c0b94ef58 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -49,7 +49,7 @@ struct panthor_gpu {
static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
{
gpu_write(ptdev, GPU_COHERENCY_PROTOCOL,
- ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
+ ptdev->gpu_info.selected_coherency);
}
static void panthor_gpu_l2_config_set(struct panthor_device *ptdev)
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 467d365ed7ba..b9e483ff5e21 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type {
DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO,
};
+/**
+ * enum drm_panthor_gpu_coherency: Type of GPU coherency
+ */
+enum drm_panthor_gpu_coherency {
+ /**
+ * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency.
+ */
+ DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 1 << 0,
+
+ /**
+ * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE coherency.
+ */
+ DRM_PANTHOR_GPU_COHERENCY_ACE = 1 << 1,
+
+ /**
+ * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency.
+ */
+ DRM_PANTHOR_GPU_COHERENCY_NONE = 31,
+};
+
/**
* struct drm_panthor_gpu_info - GPU information
*
@@ -301,7 +321,16 @@ struct drm_panthor_gpu_info {
*/
__u32 thread_max_barrier_size;
- /** @coherency_features: Coherency features. */
+ /**
+ * @coherency_features: Coherency features.
+ *
+ * Combination of drm_panthor_gpu_coherency flags.
+ *
+ * Note that this is just what the coherency protocols supported by the
+ * GPU, but the actual coherency in place depends on the SoC
+ * integration and is reflected by
+ * drm_panthor_gpu_info::selected_coherency.
+ */
__u32 coherency_features;
/** @texture_features: Texture features. */
@@ -310,8 +339,12 @@ struct drm_panthor_gpu_info {
/** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */
__u32 as_present;
- /** @pad0: MBZ. */
- __u32 pad0;
+ /**
+ * @select_coherency: Coherency selected for this device.
+ *
+ * One of drm_panthor_gpu_coherency.
+ */
+ __u32 selected_coherency;
/** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */
__u64 shader_present;
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD
2025-10-10 10:11 ` [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD Boris Brezillon
@ 2025-10-10 14:22 ` Steven Price
2025-10-10 15:08 ` Boris Brezillon
0 siblings, 1 reply; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:22 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> If we want to be able to skip CPU cache maintenance operations on
> CPU-cached mappings, the UMD needs to know the kind of coherency
> in place. Add a field to drm_panthor_gpu_info to do that. We can re-use
> a padding field for that since this object is write-only from the
> KMD perspective, and the UMD should just ignore it.
>
> v2:
> - New commit
>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> drivers/gpu/drm/panthor/panthor_device.c | 6 +++-
> drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
> include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++--
> 3 files changed, 42 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> index c7033d82cef5..afe24a03a71c 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.c
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -25,6 +25,8 @@
>
> static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
> {
> + /* Start with no coherency, and update it if the device is flagged coherent. */
> + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_NONE;
> ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
>
> if (!ptdev->coherent)
> @@ -34,8 +36,10 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
> * ACE protocol has never been supported for command stream frontend GPUs.
> */
> if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) &
> - GPU_COHERENCY_PROT_BIT(ACE_LITE)))
> + GPU_COHERENCY_PROT_BIT(ACE_LITE))) {
> + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_PROT_BIT(ACE_LITE);
> return 0;
> + }
>
> drm_err(&ptdev->base, "Coherency not supported by the device");
> return -ENOTSUPP;
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 9d98720ce03f..a95c0b94ef58 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -49,7 +49,7 @@ struct panthor_gpu {
> static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
> {
> gpu_write(ptdev, GPU_COHERENCY_PROTOCOL,
> - ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
> + ptdev->gpu_info.selected_coherency);
It looks like an existing bug, but GPU_COHERENCY_PROTOCOL doesn't take a
bit mask. So we should be just writing GPU_COHERENCY_ACE_LITE not
GPU_COHERENCY_PROT_BIT(ACE_LITE).
> }
>
> static void panthor_gpu_l2_config_set(struct panthor_device *ptdev)
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index 467d365ed7ba..b9e483ff5e21 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type {
> DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO,
> };
>
> +/**
> + * enum drm_panthor_gpu_coherency: Type of GPU coherency
> + */
> +enum drm_panthor_gpu_coherency {
> + /**
> + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency.
> + */
> + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 1 << 0,
> +
> + /**
> + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE coherency.
Copy/paste mistake ^^^^^
> + */
> + DRM_PANTHOR_GPU_COHERENCY_ACE = 1 << 1,
> +
> + /**
> + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency.
> + */
> + DRM_PANTHOR_GPU_COHERENCY_NONE = 31,
> +};
This is a mix of bit mask and non-bit mask. I'm assuming this was
intended for the newly added selected_coherency field, in which case it
shouldn't be shifting - the values are 0 and 1 for ace_lite and ace.
Thanks,
Steve
> +
> /**
> * struct drm_panthor_gpu_info - GPU information
> *
> @@ -301,7 +321,16 @@ struct drm_panthor_gpu_info {
> */
> __u32 thread_max_barrier_size;
>
> - /** @coherency_features: Coherency features. */
> + /**
> + * @coherency_features: Coherency features.
> + *
> + * Combination of drm_panthor_gpu_coherency flags.
> + *
> + * Note that this is just what the coherency protocols supported by the
> + * GPU, but the actual coherency in place depends on the SoC
> + * integration and is reflected by
> + * drm_panthor_gpu_info::selected_coherency.
> + */
> __u32 coherency_features;
>
> /** @texture_features: Texture features. */
> @@ -310,8 +339,12 @@ struct drm_panthor_gpu_info {
> /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */
> __u32 as_present;
>
> - /** @pad0: MBZ. */
> - __u32 pad0;
> + /**
> + * @select_coherency: Coherency selected for this device.
> + *
> + * One of drm_panthor_gpu_coherency.
> + */
> + __u32 selected_coherency;
>
> /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */
> __u64 shader_present;
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD
2025-10-10 14:22 ` Steven Price
@ 2025-10-10 15:08 ` Boris Brezillon
2025-10-10 15:36 ` Steven Price
0 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 15:08 UTC (permalink / raw)
To: Steven Price
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On Fri, 10 Oct 2025 15:22:50 +0100
Steven Price <steven.price@arm.com> wrote:
> On 10/10/2025 11:11, Boris Brezillon wrote:
> > If we want to be able to skip CPU cache maintenance operations on
> > CPU-cached mappings, the UMD needs to know the kind of coherency
> > in place. Add a field to drm_panthor_gpu_info to do that. We can re-use
> > a padding field for that since this object is write-only from the
> > KMD perspective, and the UMD should just ignore it.
> >
> > v2:
> > - New commit
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > ---
> > drivers/gpu/drm/panthor/panthor_device.c | 6 +++-
> > drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
> > include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++--
> > 3 files changed, 42 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
> > index c7033d82cef5..afe24a03a71c 100644
> > --- a/drivers/gpu/drm/panthor/panthor_device.c
> > +++ b/drivers/gpu/drm/panthor/panthor_device.c
> > @@ -25,6 +25,8 @@
> >
> > static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
> > {
> > + /* Start with no coherency, and update it if the device is flagged coherent. */
> > + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_NONE;
> > ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
> >
> > if (!ptdev->coherent)
> > @@ -34,8 +36,10 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
> > * ACE protocol has never been supported for command stream frontend GPUs.
> > */
> > if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) &
> > - GPU_COHERENCY_PROT_BIT(ACE_LITE)))
> > + GPU_COHERENCY_PROT_BIT(ACE_LITE))) {
> > + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_PROT_BIT(ACE_LITE);
> > return 0;
> > + }
> >
> > drm_err(&ptdev->base, "Coherency not supported by the device");
> > return -ENOTSUPP;
> > diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> > index 9d98720ce03f..a95c0b94ef58 100644
> > --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> > @@ -49,7 +49,7 @@ struct panthor_gpu {
> > static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
> > {
> > gpu_write(ptdev, GPU_COHERENCY_PROTOCOL,
> > - ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
> > + ptdev->gpu_info.selected_coherency);
>
> It looks like an existing bug, but GPU_COHERENCY_PROTOCOL doesn't take a
> bit mask. So we should be just writing GPU_COHERENCY_ACE_LITE not
> GPU_COHERENCY_PROT_BIT(ACE_LITE).
Oops. Should I prepare a fix, or does someone at Arm intend to send a
fix for this one?
>
> > }
> >
> > static void panthor_gpu_l2_config_set(struct panthor_device *ptdev)
> > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> > index 467d365ed7ba..b9e483ff5e21 100644
> > --- a/include/uapi/drm/panthor_drm.h
> > +++ b/include/uapi/drm/panthor_drm.h
> > @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type {
> > DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO,
> > };
> >
> > +/**
> > + * enum drm_panthor_gpu_coherency: Type of GPU coherency
> > + */
> > +enum drm_panthor_gpu_coherency {
> > + /**
> > + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency.
> > + */
> > + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 1 << 0,
> > +
> > + /**
> > + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE coherency.
>
> Copy/paste mistake ^^^^^
Will fix.
>
> > + */
> > + DRM_PANTHOR_GPU_COHERENCY_ACE = 1 << 1,
> > +
> > + /**
> > + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency.
> > + */
> > + DRM_PANTHOR_GPU_COHERENCY_NONE = 31,
> > +};
>
> This is a mix of bit mask and non-bit mask. I'm assuming this was
> intended for the newly added selected_coherency field, in which case it
> shouldn't be shifting - the values are 0 and 1 for ace_lite and ace.
Yeah, I think I went back and forth on this, and just picked the worst
option in the end. I'll make it a real enum with the mapping you
suggested.
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD
2025-10-10 15:08 ` Boris Brezillon
@ 2025-10-10 15:36 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 15:36 UTC (permalink / raw)
To: Boris Brezillon
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On 10/10/2025 16:08, Boris Brezillon wrote:
> On Fri, 10 Oct 2025 15:22:50 +0100
> Steven Price <steven.price@arm.com> wrote:
>
>> On 10/10/2025 11:11, Boris Brezillon wrote:
>>> If we want to be able to skip CPU cache maintenance operations on
>>> CPU-cached mappings, the UMD needs to know the kind of coherency
>>> in place. Add a field to drm_panthor_gpu_info to do that. We can re-use
>>> a padding field for that since this object is write-only from the
>>> KMD perspective, and the UMD should just ignore it.
>>>
>>> v2:
>>> - New commit
>>>
>>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>>> ---
>>> drivers/gpu/drm/panthor/panthor_device.c | 6 +++-
>>> drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
>>> include/uapi/drm/panthor_drm.h | 39 ++++++++++++++++++++++--
>>> 3 files changed, 42 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
>>> index c7033d82cef5..afe24a03a71c 100644
>>> --- a/drivers/gpu/drm/panthor/panthor_device.c
>>> +++ b/drivers/gpu/drm/panthor/panthor_device.c
>>> @@ -25,6 +25,8 @@
>>>
>>> static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
>>> {
>>> + /* Start with no coherency, and update it if the device is flagged coherent. */
>>> + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_NONE;
>>> ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
>>>
>>> if (!ptdev->coherent)
>>> @@ -34,8 +36,10 @@ static int panthor_gpu_coherency_init(struct panthor_device *ptdev)
>>> * ACE protocol has never been supported for command stream frontend GPUs.
>>> */
>>> if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) &
>>> - GPU_COHERENCY_PROT_BIT(ACE_LITE)))
>>> + GPU_COHERENCY_PROT_BIT(ACE_LITE))) {
>>> + ptdev->gpu_info.selected_coherency = GPU_COHERENCY_PROT_BIT(ACE_LITE);
>>> return 0;
>>> + }
>>>
>>> drm_err(&ptdev->base, "Coherency not supported by the device");
>>> return -ENOTSUPP;
>>> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
>>> index 9d98720ce03f..a95c0b94ef58 100644
>>> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
>>> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
>>> @@ -49,7 +49,7 @@ struct panthor_gpu {
>>> static void panthor_gpu_coherency_set(struct panthor_device *ptdev)
>>> {
>>> gpu_write(ptdev, GPU_COHERENCY_PROTOCOL,
>>> - ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE);
>>> + ptdev->gpu_info.selected_coherency);
>>
>> It looks like an existing bug, but GPU_COHERENCY_PROTOCOL doesn't take a
>> bit mask. So we should be just writing GPU_COHERENCY_ACE_LITE not
>> GPU_COHERENCY_PROT_BIT(ACE_LITE).
>
> Oops. Should I prepare a fix, or does someone at Arm intend to send a
> fix for this one?
If you're happy to write up a fix then please do!
Thanks,
Steve
>>
>>> }
>>>
>>> static void panthor_gpu_l2_config_set(struct panthor_device *ptdev)
>>> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
>>> index 467d365ed7ba..b9e483ff5e21 100644
>>> --- a/include/uapi/drm/panthor_drm.h
>>> +++ b/include/uapi/drm/panthor_drm.h
>>> @@ -245,6 +245,26 @@ enum drm_panthor_dev_query_type {
>>> DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO,
>>> };
>>>
>>> +/**
>>> + * enum drm_panthor_gpu_coherency: Type of GPU coherency
>>> + */
>>> +enum drm_panthor_gpu_coherency {
>>> + /**
>>> + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE Lite coherency.
>>> + */
>>> + DRM_PANTHOR_GPU_COHERENCY_ACE_LITE = 1 << 0,
>>> +
>>> + /**
>>> + * @DRM_PANTHOR_GPU_COHERENCY_ACE_LITE: ACE coherency.
>>
>> Copy/paste mistake ^^^^^
>
> Will fix.
>
>>
>>> + */
>>> + DRM_PANTHOR_GPU_COHERENCY_ACE = 1 << 1,
>>> +
>>> + /**
>>> + * @DRM_PANTHOR_GPU_COHERENCY_NONE: No coherency.
>>> + */
>>> + DRM_PANTHOR_GPU_COHERENCY_NONE = 31,
>>> +};
>>
>> This is a mix of bit mask and non-bit mask. I'm assuming this was
>> intended for the newly added selected_coherency field, in which case it
>> shouldn't be shifting - the values are 0 and 1 for ace_lite and ace.
>
> Yeah, I think I went back and forth on this, and just picked the worst
> option in the end. I'll make it a real enum with the mapping you
> suggested.
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 05/13] drm/panthor: Add a PANTHOR_BO_SYNC ioctl
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (3 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 04/13] drm/panthor: Expose the selected coherency protocol to the UMD Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:31 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 06/13] drm/panthor: Add an ioctl to query BO flags Boris Brezillon
` (7 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
From: Faith Ekstrand <faith.ekstrand@collabora.com>
This will be used by the UMD to synchronize CPU-cached mappings when
the UMD can't do it directly (no usermode cache maintenance instruction
on Arm32).
v2:
- Change the flags so they better match the drm_gem_shmem_sync()
semantics
Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_drv.c | 47 ++++++++++++++++++-
drivers/gpu/drm/panthor/panthor_gem.c | 21 +++++++++
drivers/gpu/drm/panthor/panthor_gem.h | 3 ++
include/uapi/drm/panthor_drm.h | 66 +++++++++++++++++++++++++++
4 files changed, 136 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index fb4b293f17f0..857954d2ac7b 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -175,7 +175,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
- PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
+ PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \
+ PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size))
/**
* PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
@@ -1394,6 +1395,49 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev,
return 0;
}
+#define PANTHOR_BO_SYNC_OP_FLAGS \
+ (DRM_PANTHOR_BO_SYNC_FOR_DEV | DRM_PANTHOR_BO_SYNC_FOR_READ | \
+ DRM_PANTHOR_BO_SYNC_FOR_WRITE)
+
+static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_panthor_bo_sync *args = data;
+ struct drm_panthor_bo_sync_op *ops;
+ struct drm_gem_object *obj;
+ int ret = 0;
+
+ ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops);
+ if (ret)
+ return ret;
+
+ for (u32 i = 0; i < args->ops.count; i++) {
+ if (ops[i].flags & ~PANTHOR_BO_SYNC_OP_FLAGS) {
+ ret = -EINVAL;
+ goto err_ops;
+ }
+
+ obj = drm_gem_object_lookup(file, ops[i].handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto err_ops;
+ }
+
+ ret = panthor_gem_bo_sync(obj, ops[i].flags,
+ ops[i].offset, ops[i].size);
+
+ drm_gem_object_put(obj);
+
+ if (ret)
+ goto err_ops;
+ }
+
+err_ops:
+ kvfree(ops);
+
+ return ret;
+}
+
static int
panthor_open(struct drm_device *ddev, struct drm_file *file)
{
@@ -1468,6 +1512,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW),
+ PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW),
};
static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index 156c7a0b62a2..617e04134d30 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -338,6 +338,27 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label)
panthor_gem_bo_set_label(bo->obj, str);
}
+int
+panthor_gem_bo_sync(struct drm_gem_object *obj, u32 flags,
+ u64 offset, u64 size)
+{
+ enum drm_gem_object_access_flags gem_access_flags = 0;
+ struct panthor_gem_object *bo = to_panthor_bo(obj);
+
+ if (flags & DRM_PANTHOR_BO_SYNC_FOR_DEV)
+ gem_access_flags |= DRM_GEM_OBJECT_DEV_ACCESS;
+ else
+ gem_access_flags |= DRM_GEM_OBJECT_CPU_ACCESS;
+
+ if (flags & DRM_PANTHOR_BO_SYNC_FOR_READ)
+ gem_access_flags |= DRM_GEM_OBJECT_READ_ACCESS;
+
+ if (flags & DRM_PANTHOR_BO_SYNC_FOR_WRITE)
+ gem_access_flags |= DRM_GEM_OBJECT_WRITE_ACCESS;
+
+ return drm_gem_shmem_sync(&bo->base, offset, size, gem_access_flags);
+}
+
#ifdef CONFIG_DEBUG_FS
struct gem_size_totals {
size_t size;
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h
index 80c6e24112d0..c08c7c066840 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.h
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -147,6 +147,9 @@ panthor_gem_create_with_handle(struct drm_file *file,
void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label);
void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label);
+int panthor_gem_bo_sync(struct drm_gem_object *obj, u32 flags,
+ u64 offset, u64 size);
+
static inline u64
panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
{
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index b9e483ff5e21..4b10b3c289e9 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id {
* pgoff_t size.
*/
DRM_PANTHOR_SET_USER_MMIO_OFFSET,
+
+ /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */
+ DRM_PANTHOR_BO_SYNC,
};
/**
@@ -1073,6 +1076,67 @@ struct drm_panthor_set_user_mmio_offset {
__u64 offset;
};
+/**
+ * enum drm_panthor_bo_sync_op_flags - BO sync flags
+ */
+enum drm_panthor_bo_sync_op_flags {
+ /**
+ * @DRM_PANTHOR_BO_SYNC_FOR_CPU: Sync data for CPU accesses.
+ */
+ DRM_PANTHOR_BO_SYNC_FOR_CPU = (0 << 0),
+
+ /**
+ * @DRM_PANTHOR_BO_SYNC_FOR_DEV: Sync data for device accesses.
+ */
+ DRM_PANTHOR_BO_SYNC_FOR_DEV = (1 << 0),
+
+ /**
+ * @DRM_PANTHOR_BO_SYNC_FOR_READ: Sync for reads.
+ */
+ DRM_PANTHOR_BO_SYNC_FOR_READ = (1 << 1),
+
+ /**
+ * @DRM_PANTHOR_BO_SYNC_FOR_WRITE: Sync for writes.
+ */
+ DRM_PANTHOR_BO_SYNC_FOR_WRITE = (1 << 2),
+};
+
+/**
+ * struct drm_panthor_bo_sync_op - BO map sync op
+ */
+struct drm_panthor_bo_sync_op {
+ /** @handle: Handle of the buffer object to sync. */
+ __u32 handle;
+
+ /** @flags: Flags controlling the sync operation. */
+ __u32 flags;
+
+ /**
+ * @offset: Offset into the BO at which the sync range starts.
+ *
+ * This will be rounded down to the nearest cache line as needed.
+ */
+ __u64 offset;
+
+ /**
+ * @size: Size of the range to sync
+ *
+ * @size + @offset will be rounded up to the nearest cache line as
+ * needed.
+ */
+ __u64 size;
+};
+
+/**
+ * struct drm_panthor_bo_sync - BO map sync request
+ */
+struct drm_panthor_bo_sync {
+ /**
+ * @ops: Array of struct drm_panthor_bo_sync_op sync operations.
+ */
+ struct drm_panthor_obj_array ops;
+};
+
/**
* DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
* @__access: Access type. Must be R, W or RW.
@@ -1119,6 +1183,8 @@ enum {
DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label),
DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET =
DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset),
+ DRM_IOCTL_PANTHOR_BO_SYNC =
+ DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
};
#if defined(__cplusplus)
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 05/13] drm/panthor: Add a PANTHOR_BO_SYNC ioctl
2025-10-10 10:11 ` [PATCH v2 05/13] drm/panthor: Add a PANTHOR_BO_SYNC ioctl Boris Brezillon
@ 2025-10-10 14:31 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:31 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> From: Faith Ekstrand <faith.ekstrand@collabora.com>
>
> This will be used by the UMD to synchronize CPU-cached mappings when
> the UMD can't do it directly (no usermode cache maintenance instruction
> on Arm32).
>
> v2:
> - Change the flags so they better match the drm_gem_shmem_sync()
> semantics
>
> Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
> ---
> drivers/gpu/drm/panthor/panthor_drv.c | 47 ++++++++++++++++++-
> drivers/gpu/drm/panthor/panthor_gem.c | 21 +++++++++
> drivers/gpu/drm/panthor/panthor_gem.h | 3 ++
> include/uapi/drm/panthor_drm.h | 66 +++++++++++++++++++++++++++
> 4 files changed, 136 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> index fb4b293f17f0..857954d2ac7b 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -175,7 +175,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
> PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
> PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
> PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
> - PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
> + PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \
> + PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size))
>
> /**
> * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
> @@ -1394,6 +1395,49 @@ static int panthor_ioctl_set_user_mmio_offset(struct drm_device *ddev,
> return 0;
> }
>
> +#define PANTHOR_BO_SYNC_OP_FLAGS \
> + (DRM_PANTHOR_BO_SYNC_FOR_DEV | DRM_PANTHOR_BO_SYNC_FOR_READ | \
> + DRM_PANTHOR_BO_SYNC_FOR_WRITE)
> +
> +static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_panthor_bo_sync *args = data;
> + struct drm_panthor_bo_sync_op *ops;
> + struct drm_gem_object *obj;
> + int ret = 0;
> +
> + ret = PANTHOR_UOBJ_GET_ARRAY(ops, &args->ops);
> + if (ret)
> + return ret;
> +
> + for (u32 i = 0; i < args->ops.count; i++) {
> + if (ops[i].flags & ~PANTHOR_BO_SYNC_OP_FLAGS) {
> + ret = -EINVAL;
> + goto err_ops;
> + }
> +
> + obj = drm_gem_object_lookup(file, ops[i].handle);
> + if (!obj) {
> + ret = -ENOENT;
> + goto err_ops;
> + }
> +
> + ret = panthor_gem_bo_sync(obj, ops[i].flags,
> + ops[i].offset, ops[i].size);
> +
> + drm_gem_object_put(obj);
> +
> + if (ret)
> + goto err_ops;
> + }
> +
> +err_ops:
> + kvfree(ops);
> +
> + return ret;
> +}
> +
> static int
> panthor_open(struct drm_device *ddev, struct drm_file *file)
> {
> @@ -1468,6 +1512,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
> PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
> PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
> PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW),
> + PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW),
> };
>
> static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
> index 156c7a0b62a2..617e04134d30 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.c
> +++ b/drivers/gpu/drm/panthor/panthor_gem.c
> @@ -338,6 +338,27 @@ panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label)
> panthor_gem_bo_set_label(bo->obj, str);
> }
>
> +int
> +panthor_gem_bo_sync(struct drm_gem_object *obj, u32 flags,
> + u64 offset, u64 size)
> +{
> + enum drm_gem_object_access_flags gem_access_flags = 0;
> + struct panthor_gem_object *bo = to_panthor_bo(obj);
> +
> + if (flags & DRM_PANTHOR_BO_SYNC_FOR_DEV)
> + gem_access_flags |= DRM_GEM_OBJECT_DEV_ACCESS;
> + else
> + gem_access_flags |= DRM_GEM_OBJECT_CPU_ACCESS;
> +
> + if (flags & DRM_PANTHOR_BO_SYNC_FOR_READ)
> + gem_access_flags |= DRM_GEM_OBJECT_READ_ACCESS;
> +
> + if (flags & DRM_PANTHOR_BO_SYNC_FOR_WRITE)
> + gem_access_flags |= DRM_GEM_OBJECT_WRITE_ACCESS;
> +
> + return drm_gem_shmem_sync(&bo->base, offset, size, gem_access_flags);
> +}
> +
> #ifdef CONFIG_DEBUG_FS
> struct gem_size_totals {
> size_t size;
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h
> index 80c6e24112d0..c08c7c066840 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.h
> +++ b/drivers/gpu/drm/panthor/panthor_gem.h
> @@ -147,6 +147,9 @@ panthor_gem_create_with_handle(struct drm_file *file,
> void panthor_gem_bo_set_label(struct drm_gem_object *obj, const char *label);
> void panthor_gem_kernel_bo_set_label(struct panthor_kernel_bo *bo, const char *label);
>
> +int panthor_gem_bo_sync(struct drm_gem_object *obj, u32 flags,
> + u64 offset, u64 size);
> +
> static inline u64
> panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
> {
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index b9e483ff5e21..4b10b3c289e9 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -144,6 +144,9 @@ enum drm_panthor_ioctl_id {
> * pgoff_t size.
> */
> DRM_PANTHOR_SET_USER_MMIO_OFFSET,
> +
> + /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */
> + DRM_PANTHOR_BO_SYNC,
> };
>
> /**
> @@ -1073,6 +1076,67 @@ struct drm_panthor_set_user_mmio_offset {
> __u64 offset;
> };
>
> +/**
> + * enum drm_panthor_bo_sync_op_flags - BO sync flags
> + */
> +enum drm_panthor_bo_sync_op_flags {
> + /**
> + * @DRM_PANTHOR_BO_SYNC_FOR_CPU: Sync data for CPU accesses.
> + */
> + DRM_PANTHOR_BO_SYNC_FOR_CPU = (0 << 0),
> +
> + /**
> + * @DRM_PANTHOR_BO_SYNC_FOR_DEV: Sync data for device accesses.
> + */
> + DRM_PANTHOR_BO_SYNC_FOR_DEV = (1 << 0),
> +
> + /**
> + * @DRM_PANTHOR_BO_SYNC_FOR_READ: Sync for reads.
> + */
> + DRM_PANTHOR_BO_SYNC_FOR_READ = (1 << 1),
> +
> + /**
> + * @DRM_PANTHOR_BO_SYNC_FOR_WRITE: Sync for writes.
> + */
> + DRM_PANTHOR_BO_SYNC_FOR_WRITE = (1 << 2),
> +};
> +
> +/**
> + * struct drm_panthor_bo_sync_op - BO map sync op
> + */
> +struct drm_panthor_bo_sync_op {
> + /** @handle: Handle of the buffer object to sync. */
> + __u32 handle;
> +
> + /** @flags: Flags controlling the sync operation. */
> + __u32 flags;
> +
> + /**
> + * @offset: Offset into the BO at which the sync range starts.
> + *
> + * This will be rounded down to the nearest cache line as needed.
> + */
> + __u64 offset;
> +
> + /**
> + * @size: Size of the range to sync
> + *
> + * @size + @offset will be rounded up to the nearest cache line as
> + * needed.
> + */
> + __u64 size;
> +};
> +
> +/**
> + * struct drm_panthor_bo_sync - BO map sync request
> + */
> +struct drm_panthor_bo_sync {
> + /**
> + * @ops: Array of struct drm_panthor_bo_sync_op sync operations.
> + */
> + struct drm_panthor_obj_array ops;
> +};
> +
> /**
> * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
> * @__access: Access type. Must be R, W or RW.
> @@ -1119,6 +1183,8 @@ enum {
> DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label),
> DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET =
> DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset),
> + DRM_IOCTL_PANTHOR_BO_SYNC =
> + DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
> };
>
> #if defined(__cplusplus)
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 06/13] drm/panthor: Add an ioctl to query BO flags
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (4 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 05/13] drm/panthor: Add a PANTHOR_BO_SYNC ioctl Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:31 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 07/13] drm/panthor: Add flag to map GEM object Write-Back Cacheable Boris Brezillon
` (6 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
This is useful when importing BOs, so we can know about cacheability
and flush the caches when needed.
We can also know when the buffer comes from a different subsystem and
take proper actions (avoid CPU mappings, or do kernel-based syncs
instead of userland cache flushes).
v2:
- New commit
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_drv.c | 24 +++++++++++
include/uapi/drm/panthor_drm.h | 57 +++++++++++++++++++++++++++
2 files changed, 81 insertions(+)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 857954d2ac7b..9004d0ba0e45 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1438,6 +1438,29 @@ static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data,
return ret;
}
+static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_panthor_bo_query_info *args = data;
+ struct panthor_gem_object *bo;
+ struct drm_gem_object *obj;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ bo = to_panthor_bo(obj);
+ args->pad = 0;
+ args->create_flags = bo->flags;
+
+ args->extra_flags = 0;
+ if (drm_gem_is_imported(&bo->base.base))
+ args->extra_flags |= DRM_PANTHOR_BO_IS_IMPORTED;
+
+ drm_gem_object_put(obj);
+ return 0;
+}
+
static int
panthor_open(struct drm_device *ddev, struct drm_file *file)
{
@@ -1513,6 +1536,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW),
PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW),
+ PANTHOR_IOCTL(BO_QUERY_INFO, bo_query_info, DRM_RENDER_ALLOW),
};
static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 4b10b3c289e9..54502286c8b1 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -147,6 +147,13 @@ enum drm_panthor_ioctl_id {
/** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */
DRM_PANTHOR_BO_SYNC,
+
+ /**
+ * @DRM_PANTHOR_BO_QUERY_INFO: Query information about a BO.
+ *
+ * This is useful for imported BOs.
+ */
+ DRM_PANTHOR_BO_QUERY_INFO,
};
/**
@@ -1137,6 +1144,54 @@ struct drm_panthor_bo_sync {
struct drm_panthor_obj_array ops;
};
+/**
+ * enum drm_panthor_bo_extra_flags - Set of flags returned on a BO_QUERY_INFO request
+ *
+ * Those are flags reflecting BO properties that are not directly coming from the flags
+ * passed are creation time, or information on BOs that were imported from other drivers.
+ */
+enum drm_panthor_bo_extra_flags {
+ /**
+ * @DRM_PANTHOR_BO_IS_IMPORTED: BO has been imported from an external driver.
+ *
+ * Note that imported dma-buf handles are not flagged as imported if they
+ * where exported by panthor. Only buffers that are coming from other drivers
+ * (dma heaps, other GPUs, display controllers, V4L, ...).
+ *
+ * It's also important to note that all imported BOs are mapped cached and can't
+ * be considered IO-coherent even if the GPU is. This means they require explicit
+ * syncs that must go through the DRM_PANTHOR_BO_SYNC ioctl (userland cache
+ * maintenance is not allowed in that case, because extra operations might be
+ * needed to make changes visible to the CPU/device, like buffer migration when the
+ * exporter is a GPU with its own VRAM).
+ */
+ DRM_PANTHOR_BO_IS_IMPORTED = (1 << 0),
+};
+
+/**
+ * struct drm_panthor_bo_query_info - Query BO info
+ */
+struct drm_panthor_bo_query_info {
+ /** @handle: Handle of the buffer object to query flags on. */
+ __u32 handle;
+
+ /**
+ * @extra_flags: Combination of enum drm_panthor_bo_extra_flags flags.
+ */
+ __u32 extra_flags;
+
+ /**
+ * @create_flags: Flags passed at creation time.
+ *
+ * Combination of enum drm_panthor_bo_flags flags.
+ * Will be zero if the buffer comes from a different driver.
+ */
+ __u32 create_flags;
+
+ /** @pad: Will be zero on return. */
+ __u32 pad;
+};
+
/**
* DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
* @__access: Access type. Must be R, W or RW.
@@ -1185,6 +1240,8 @@ enum {
DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset),
DRM_IOCTL_PANTHOR_BO_SYNC =
DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
+ DRM_IOCTL_PANTHOR_BO_QUERY_INFO =
+ DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info),
};
#if defined(__cplusplus)
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 06/13] drm/panthor: Add an ioctl to query BO flags
2025-10-10 10:11 ` [PATCH v2 06/13] drm/panthor: Add an ioctl to query BO flags Boris Brezillon
@ 2025-10-10 14:31 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:31 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> This is useful when importing BOs, so we can know about cacheability
> and flush the caches when needed.
>
> We can also know when the buffer comes from a different subsystem and
> take proper actions (avoid CPU mappings, or do kernel-based syncs
> instead of userland cache flushes).
>
> v2:
> - New commit
>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
> ---
> drivers/gpu/drm/panthor/panthor_drv.c | 24 +++++++++++
> include/uapi/drm/panthor_drm.h | 57 +++++++++++++++++++++++++++
> 2 files changed, 81 insertions(+)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> index 857954d2ac7b..9004d0ba0e45 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -1438,6 +1438,29 @@ static int panthor_ioctl_bo_sync(struct drm_device *ddev, void *data,
> return ret;
> }
>
> +static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_panthor_bo_query_info *args = data;
> + struct panthor_gem_object *bo;
> + struct drm_gem_object *obj;
> +
> + obj = drm_gem_object_lookup(file, args->handle);
> + if (!obj)
> + return -ENOENT;
> +
> + bo = to_panthor_bo(obj);
> + args->pad = 0;
> + args->create_flags = bo->flags;
> +
> + args->extra_flags = 0;
> + if (drm_gem_is_imported(&bo->base.base))
> + args->extra_flags |= DRM_PANTHOR_BO_IS_IMPORTED;
> +
> + drm_gem_object_put(obj);
> + return 0;
> +}
> +
> static int
> panthor_open(struct drm_device *ddev, struct drm_file *file)
> {
> @@ -1513,6 +1536,7 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
> PANTHOR_IOCTL(BO_SET_LABEL, bo_set_label, DRM_RENDER_ALLOW),
> PANTHOR_IOCTL(SET_USER_MMIO_OFFSET, set_user_mmio_offset, DRM_RENDER_ALLOW),
> PANTHOR_IOCTL(BO_SYNC, bo_sync, DRM_RENDER_ALLOW),
> + PANTHOR_IOCTL(BO_QUERY_INFO, bo_query_info, DRM_RENDER_ALLOW),
> };
>
> static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index 4b10b3c289e9..54502286c8b1 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -147,6 +147,13 @@ enum drm_panthor_ioctl_id {
>
> /** @DRM_PANTHOR_BO_SYNC: Sync BO data to/from the device */
> DRM_PANTHOR_BO_SYNC,
> +
> + /**
> + * @DRM_PANTHOR_BO_QUERY_INFO: Query information about a BO.
> + *
> + * This is useful for imported BOs.
> + */
> + DRM_PANTHOR_BO_QUERY_INFO,
> };
>
> /**
> @@ -1137,6 +1144,54 @@ struct drm_panthor_bo_sync {
> struct drm_panthor_obj_array ops;
> };
>
> +/**
> + * enum drm_panthor_bo_extra_flags - Set of flags returned on a BO_QUERY_INFO request
> + *
> + * Those are flags reflecting BO properties that are not directly coming from the flags
> + * passed are creation time, or information on BOs that were imported from other drivers.
> + */
> +enum drm_panthor_bo_extra_flags {
> + /**
> + * @DRM_PANTHOR_BO_IS_IMPORTED: BO has been imported from an external driver.
> + *
> + * Note that imported dma-buf handles are not flagged as imported if they
> + * where exported by panthor. Only buffers that are coming from other drivers
> + * (dma heaps, other GPUs, display controllers, V4L, ...).
> + *
> + * It's also important to note that all imported BOs are mapped cached and can't
> + * be considered IO-coherent even if the GPU is. This means they require explicit
> + * syncs that must go through the DRM_PANTHOR_BO_SYNC ioctl (userland cache
> + * maintenance is not allowed in that case, because extra operations might be
> + * needed to make changes visible to the CPU/device, like buffer migration when the
> + * exporter is a GPU with its own VRAM).
> + */
> + DRM_PANTHOR_BO_IS_IMPORTED = (1 << 0),
> +};
> +
> +/**
> + * struct drm_panthor_bo_query_info - Query BO info
> + */
> +struct drm_panthor_bo_query_info {
> + /** @handle: Handle of the buffer object to query flags on. */
> + __u32 handle;
> +
> + /**
> + * @extra_flags: Combination of enum drm_panthor_bo_extra_flags flags.
> + */
> + __u32 extra_flags;
> +
> + /**
> + * @create_flags: Flags passed at creation time.
> + *
> + * Combination of enum drm_panthor_bo_flags flags.
> + * Will be zero if the buffer comes from a different driver.
> + */
> + __u32 create_flags;
> +
> + /** @pad: Will be zero on return. */
> + __u32 pad;
> +};
> +
> /**
> * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
> * @__access: Access type. Must be R, W or RW.
> @@ -1185,6 +1240,8 @@ enum {
> DRM_IOCTL_PANTHOR(WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset),
> DRM_IOCTL_PANTHOR_BO_SYNC =
> DRM_IOCTL_PANTHOR(WR, BO_SYNC, bo_sync),
> + DRM_IOCTL_PANTHOR_BO_QUERY_INFO =
> + DRM_IOCTL_PANTHOR(WR, BO_QUERY_INFO, bo_query_info),
> };
>
> #if defined(__cplusplus)
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 07/13] drm/panthor: Add flag to map GEM object Write-Back Cacheable
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (5 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 06/13] drm/panthor: Add an ioctl to query BO flags Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:41 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 08/13] drm/panthor: Bump the driver version to 1.6 Boris Brezillon
` (5 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, Loïc Molinari,
kernel
From: Loïc Molinari <loic.molinari@collabora.com>
Will be used by the UMD to optimize CPU accesses to buffers
that are frequently read by the CPU, or on which the access
pattern makes non-cacheable mappings inefficient.
Mapping buffers CPU-cached implies taking care of the CPU
cache maintenance in the UMD, unless the GPU is IO coherent.
v2:
- Add more to the commit message
- Tweak the doc
- Make sure we sync the section of the BO pointing to the CS
syncobj before we read its seqno
Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_drv.c | 8 +++++++-
drivers/gpu/drm/panthor/panthor_gem.c | 3 +++
drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++++++++--
include/uapi/drm/panthor_drm.h | 9 +++++++++
4 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index 9004d0ba0e45..af9b431255a4 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -900,7 +900,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
}
-#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP
+#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \
+ DRM_PANTHOR_BO_WB_MMAP)
static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
struct drm_file *file)
@@ -919,6 +920,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
goto out_dev_exit;
}
+ if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) &&
+ (args->flags & DRM_PANTHOR_BO_WB_MMAP))
+ return -EINVAL;
+
if (args->exclusive_vm_id) {
vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
if (!vm) {
@@ -1450,6 +1455,7 @@ static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data,
return -ENOENT;
bo = to_panthor_bo(obj);
+
args->pad = 0;
args->create_flags = bo->flags;
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index 617e04134d30..a0ccc316e375 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -280,6 +280,9 @@ panthor_gem_create_with_handle(struct drm_file *file,
bo = to_panthor_bo(&shmem->base);
bo->flags = flags;
+ if (flags & DRM_PANTHOR_BO_WB_MMAP)
+ shmem->map_wc = false;
+
if (exclusive_vm) {
bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
drm_gem_object_get(bo->exclusive_vm_root_gem);
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 0cc9055f4ee5..5f9dba3a9d65 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -868,8 +868,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
struct iosys_map map;
int ret;
- if (queue->syncwait.kmap)
- return queue->syncwait.kmap + queue->syncwait.offset;
+ if (queue->syncwait.kmap) {
+ bo = container_of(queue->syncwait.obj,
+ struct panthor_gem_object, base.base);
+ goto out_sync;
+ }
bo = panthor_vm_get_bo_for_va(group->vm,
queue->syncwait.gpu_va,
@@ -886,6 +889,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
goto err_put_syncwait_obj;
+out_sync:
+ /* Make sure the CPU caches are invalidated before the seqno is read.
+ * drm_gem_shmem_sync() is a NOP if wap_wc=false, so no need to check
+ * it here.
+ */
+ drm_gem_shmem_sync(&bo->base, queue->syncwait.offset,
+ queue->syncwait.sync64 ?
+ sizeof(struct panthor_syncobj_64b) :
+ sizeof(struct panthor_syncobj_32b),
+ DRM_GEM_OBJECT_CPU_ACCESS | DRM_GEM_OBJECT_READ_ACCESS);
+
return queue->syncwait.kmap + queue->syncwait.offset;
err_put_syncwait_obj:
diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
index 54502286c8b1..e77d65e51c64 100644
--- a/include/uapi/drm/panthor_drm.h
+++ b/include/uapi/drm/panthor_drm.h
@@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state {
enum drm_panthor_bo_flags {
/** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */
DRM_PANTHOR_BO_NO_MMAP = (1 << 0),
+
+ /**
+ * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping.
+ *
+ * CPU map the buffer object in userspace by forcing the "Write-Back
+ * Cacheable" cacheability attribute. The mapping otherwise uses the
+ * "Non-Cacheable" attribute if the GPU is not IO coherent.
+ */
+ DRM_PANTHOR_BO_WB_MMAP = (1 << 1),
};
/**
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 07/13] drm/panthor: Add flag to map GEM object Write-Back Cacheable
2025-10-10 10:11 ` [PATCH v2 07/13] drm/panthor: Add flag to map GEM object Write-Back Cacheable Boris Brezillon
@ 2025-10-10 14:41 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:41 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, Loïc Molinari,
kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> From: Loïc Molinari <loic.molinari@collabora.com>
>
> Will be used by the UMD to optimize CPU accesses to buffers
> that are frequently read by the CPU, or on which the access
> pattern makes non-cacheable mappings inefficient.
>
> Mapping buffers CPU-cached implies taking care of the CPU
> cache maintenance in the UMD, unless the GPU is IO coherent.
>
> v2:
> - Add more to the commit message
> - Tweak the doc
> - Make sure we sync the section of the BO pointing to the CS
> syncobj before we read its seqno
>
> Signed-off-by: Loïc Molinari <loic.molinari@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> drivers/gpu/drm/panthor/panthor_drv.c | 8 +++++++-
> drivers/gpu/drm/panthor/panthor_gem.c | 3 +++
> drivers/gpu/drm/panthor/panthor_sched.c | 18 ++++++++++++++++--
> include/uapi/drm/panthor_drm.h | 9 +++++++++
> 4 files changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> index 9004d0ba0e45..af9b431255a4 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -900,7 +900,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
> return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
> }
>
> -#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP
> +#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \
> + DRM_PANTHOR_BO_WB_MMAP)
>
> static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
> struct drm_file *file)
> @@ -919,6 +920,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
> goto out_dev_exit;
> }
>
> + if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) &&
> + (args->flags & DRM_PANTHOR_BO_WB_MMAP))
> + return -EINVAL;
> +
> if (args->exclusive_vm_id) {
> vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
> if (!vm) {
> @@ -1450,6 +1455,7 @@ static int panthor_ioctl_bo_query_info(struct drm_device *ddev, void *data,
> return -ENOENT;
>
> bo = to_panthor_bo(obj);
> +
NIT: Stray newline added.
> args->pad = 0;
> args->create_flags = bo->flags;
>
> diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
> index 617e04134d30..a0ccc316e375 100644
> --- a/drivers/gpu/drm/panthor/panthor_gem.c
> +++ b/drivers/gpu/drm/panthor/panthor_gem.c
> @@ -280,6 +280,9 @@ panthor_gem_create_with_handle(struct drm_file *file,
> bo = to_panthor_bo(&shmem->base);
> bo->flags = flags;
>
> + if (flags & DRM_PANTHOR_BO_WB_MMAP)
> + shmem->map_wc = false;
> +
> if (exclusive_vm) {
> bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
> drm_gem_object_get(bo->exclusive_vm_root_gem);
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 0cc9055f4ee5..5f9dba3a9d65 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -868,8 +868,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
> struct iosys_map map;
> int ret;
>
> - if (queue->syncwait.kmap)
> - return queue->syncwait.kmap + queue->syncwait.offset;
> + if (queue->syncwait.kmap) {
> + bo = container_of(queue->syncwait.obj,
> + struct panthor_gem_object, base.base);
> + goto out_sync;
> + }
>
> bo = panthor_vm_get_bo_for_va(group->vm,
> queue->syncwait.gpu_va,
> @@ -886,6 +889,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
> if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
> goto err_put_syncwait_obj;
>
> +out_sync:
> + /* Make sure the CPU caches are invalidated before the seqno is read.
> + * drm_gem_shmem_sync() is a NOP if wap_wc=false, so no need to check
NIT: s/wap_wc/map_wc
> + * it here.
> + */
> + drm_gem_shmem_sync(&bo->base, queue->syncwait.offset,
> + queue->syncwait.sync64 ?
> + sizeof(struct panthor_syncobj_64b) :
> + sizeof(struct panthor_syncobj_32b),
> + DRM_GEM_OBJECT_CPU_ACCESS | DRM_GEM_OBJECT_READ_ACCESS);
> +
> return queue->syncwait.kmap + queue->syncwait.offset;
>
> err_put_syncwait_obj:
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index 54502286c8b1..e77d65e51c64 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state {
> enum drm_panthor_bo_flags {
> /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */
> DRM_PANTHOR_BO_NO_MMAP = (1 << 0),
> +
> + /**
> + * @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping.
> + *
> + * CPU map the buffer object in userspace by forcing the "Write-Back
> + * Cacheable" cacheability attribute. The mapping otherwise uses the
> + * "Non-Cacheable" attribute if the GPU is not IO coherent.
> + */
> + DRM_PANTHOR_BO_WB_MMAP = (1 << 1),
> };
>
> /**
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 08/13] drm/panthor: Bump the driver version to 1.6
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (6 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 07/13] drm/panthor: Add flag to map GEM object Write-Back Cacheable Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:41 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD Boris Brezillon
` (4 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Bump the driver version to reflect the new cached-CPU mapping
capability.
v2:
- Quickly describe what the new version exposes in the commit message
Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panthor/panthor_drv.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index af9b431255a4..f65c50e14294 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1676,6 +1676,10 @@ static void panthor_debugfs_init(struct drm_minor *minor)
* - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag
* - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl
* - 1.5 - adds DRM_PANTHOR_SET_USER_MMIO_OFFSET ioctl
+ * - 1.6 - adds DRM_PANTHOR_BO_WB_MMAP flag
+ * - adds DRM_IOCTL_PANTHOR_BO_SYNC ioctl
+ * - adds DRM_IOCTL_PANTHOR_BO_QUERY_INFO ioctl
+ * - adds drm_panthor_gpu_info::selected_coherency
*/
static const struct drm_driver panthor_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
@@ -1689,7 +1693,7 @@ static const struct drm_driver panthor_drm_driver = {
.name = "panthor",
.desc = "Panthor DRM driver",
.major = 1,
- .minor = 5,
+ .minor = 6,
.gem_create_object = panthor_gem_create_object,
.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 08/13] drm/panthor: Bump the driver version to 1.6
2025-10-10 10:11 ` [PATCH v2 08/13] drm/panthor: Bump the driver version to 1.6 Boris Brezillon
@ 2025-10-10 14:41 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:41 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> From: Faith Ekstrand <faith.ekstrand@collabora.com>
>
> Bump the driver version to reflect the new cached-CPU mapping
> capability.
>
> v2:
> - Quickly describe what the new version exposes in the commit message
>
> Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
> ---
> drivers/gpu/drm/panthor/panthor_drv.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> index af9b431255a4..f65c50e14294 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -1676,6 +1676,10 @@ static void panthor_debugfs_init(struct drm_minor *minor)
> * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag
> * - 1.4 - adds DRM_IOCTL_PANTHOR_BO_SET_LABEL ioctl
> * - 1.5 - adds DRM_PANTHOR_SET_USER_MMIO_OFFSET ioctl
> + * - 1.6 - adds DRM_PANTHOR_BO_WB_MMAP flag
> + * - adds DRM_IOCTL_PANTHOR_BO_SYNC ioctl
> + * - adds DRM_IOCTL_PANTHOR_BO_QUERY_INFO ioctl
> + * - adds drm_panthor_gpu_info::selected_coherency
> */
> static const struct drm_driver panthor_drm_driver = {
> .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
> @@ -1689,7 +1693,7 @@ static const struct drm_driver panthor_drm_driver = {
> .name = "panthor",
> .desc = "Panthor DRM driver",
> .major = 1,
> - .minor = 5,
> + .minor = 6,
>
> .gem_create_object = panthor_gem_create_object,
> .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (7 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 08/13] drm/panthor: Bump the driver version to 1.6 Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:50 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 10/13] drm/panfrost: Add a PANFROST_SYNC_BO ioctl Boris Brezillon
` (3 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
Will be needed if we want to skip CPU cache maintenance operations when
the GPU can snoop CPU caches.
v2:
- New commit
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panfrost/panfrost_device.h | 1 +
drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
drivers/gpu/drm/panfrost/panfrost_gpu.c | 18 +++++++++++++++++-
drivers/gpu/drm/panfrost/panfrost_regs.h | 2 ++
include/uapi/drm/panfrost_drm.h | 7 +++++++
5 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 1e73efad02a8..bd38b7ae169e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -75,6 +75,7 @@ struct panfrost_features {
u32 thread_max_workgroup_sz;
u32 thread_max_barrier_sz;
u32 coherency_features;
+ u32 selected_coherency;
u32 afbc_features;
u32 texture_features[4];
u32 js_features[16];
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 607a5b8448d0..3ffcd08f7745 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -94,6 +94,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15);
PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups);
PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc);
+ PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency);
case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP:
ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 174e190ba40f..fed323e6a307 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -260,7 +260,23 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
- pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
+
+ if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG))
+ pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
+ else
+ pfdev->features.coherency_features = COHERENCY_ACE_LITE;
+
+ if (!pfdev->coherent) {
+ pfdev->features.selected_coherency = COHERENCY_NONE;
+ } else if (pfdev->features.coherency_features & COHERENCY_ACE) {
+ pfdev->features.selected_coherency = COHERENCY_ACE;
+ } else if (pfdev->features.coherency_features & COHERENCY_ACE_LITE) {
+ pfdev->features.selected_coherency = COHERENCY_ACE_LITE;
+ } else {
+ drm_WARN(pfdev->ddev, true, "No known coherency protocol supported");
+ pfdev->features.selected_coherency = COHERENCY_NONE;
+ }
+
pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES);
for (i = 0; i < 4; i++)
pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 2b8f1617b836..775ad88f7a86 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -103,8 +103,10 @@
#define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
#define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
+#define GPU_COHERENCY_ENABLE 0x304 /* (RW) Coherency protocol selection */
#define COHERENCY_ACE_LITE BIT(0)
#define COHERENCY_ACE BIT(1)
+#define COHERENCY_NONE 31
#define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
#define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index e8b47c9f6976..d3f6c005463c 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -188,6 +188,13 @@ enum drm_panfrost_param {
DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP,
DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY,
DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES,
+ DRM_PANFROST_PARAM_SELECTED_COHERENCY,
+};
+
+enum drm_panfrost_gpu_coherency {
+ DRM_PANFROST_GPU_COHERENCY_ACE_LITE = (1 << 0),
+ DRM_PANFROST_GPU_COHERENCY_ACE = (1 << 1),
+ DRM_PANFROST_GPU_COHERENCY_NONE = 31,
};
struct drm_panfrost_get_param {
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD
2025-10-10 10:11 ` [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD Boris Brezillon
@ 2025-10-10 14:50 ` Steven Price
2025-10-15 11:41 ` Boris Brezillon
0 siblings, 1 reply; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:50 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> Will be needed if we want to skip CPU cache maintenance operations when
> the GPU can snoop CPU caches.
>
> v2:
> - New commit
>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> drivers/gpu/drm/panfrost/panfrost_device.h | 1 +
> drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
> drivers/gpu/drm/panfrost/panfrost_gpu.c | 18 +++++++++++++++++-
> drivers/gpu/drm/panfrost/panfrost_regs.h | 2 ++
> include/uapi/drm/panfrost_drm.h | 7 +++++++
> 5 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 1e73efad02a8..bd38b7ae169e 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -75,6 +75,7 @@ struct panfrost_features {
> u32 thread_max_workgroup_sz;
> u32 thread_max_barrier_sz;
> u32 coherency_features;
> + u32 selected_coherency;
> u32 afbc_features;
> u32 texture_features[4];
> u32 js_features[16];
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 607a5b8448d0..3ffcd08f7745 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -94,6 +94,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
> PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15);
> PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups);
> PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc);
> + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency);
>
> case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP:
> ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> index 174e190ba40f..fed323e6a307 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> @@ -260,7 +260,23 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
> pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
> pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
> pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
> - pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
> +
> + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG))
> + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
> + else
> + pfdev->features.coherency_features = COHERENCY_ACE_LITE;
> +
> + if (!pfdev->coherent) {
> + pfdev->features.selected_coherency = COHERENCY_NONE;
> + } else if (pfdev->features.coherency_features & COHERENCY_ACE) {
> + pfdev->features.selected_coherency = COHERENCY_ACE;
> + } else if (pfdev->features.coherency_features & COHERENCY_ACE_LITE) {
> + pfdev->features.selected_coherency = COHERENCY_ACE_LITE;
> + } else {
> + drm_WARN(pfdev->ddev, true, "No known coherency protocol supported");
> + pfdev->features.selected_coherency = COHERENCY_NONE;
> + }
Same comment as for panthor about not using bits when we can't have more
than one. But also here because selected_coherency is only a UAPI
concept, it would make sense to use the UAPI values, i.e.
DRM_PANFROST_GPU_COHERENCY_ACE_LITE etc rather than the private
COHERENCY_ACE_LITE defines.
Although there is actually a COHERENCY_ENABLE register on some GPUs
(BASE_HW_FEATURE_COHERENCY_REG in the kbase driver). Looks like it was
probably introduced for Bifrost. But AFAIK the above checks should be ok.
Thanks,
Steve
> +
> pfdev->features.afbc_features = gpu_read(pfdev, GPU_AFBC_FEATURES);
> for (i = 0; i < 4; i++)
> pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
> diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
> index 2b8f1617b836..775ad88f7a86 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_regs.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
> @@ -103,8 +103,10 @@
> #define GPU_L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
>
> #define GPU_COHERENCY_FEATURES 0x300 /* (RO) Coherency features present */
> +#define GPU_COHERENCY_ENABLE 0x304 /* (RW) Coherency protocol selection */
> #define COHERENCY_ACE_LITE BIT(0)
> #define COHERENCY_ACE BIT(1)
> +#define COHERENCY_NONE 31
>
> #define GPU_STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
> #define GPU_STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index e8b47c9f6976..d3f6c005463c 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -188,6 +188,13 @@ enum drm_panfrost_param {
> DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP,
> DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY,
> DRM_PANFROST_PARAM_ALLOWED_JM_CTX_PRIORITIES,
> + DRM_PANFROST_PARAM_SELECTED_COHERENCY,
> +};
> +
> +enum drm_panfrost_gpu_coherency {
> + DRM_PANFROST_GPU_COHERENCY_ACE_LITE = (1 << 0),
> + DRM_PANFROST_GPU_COHERENCY_ACE = (1 << 1),
> + DRM_PANFROST_GPU_COHERENCY_NONE = 31,
> };
>
> struct drm_panfrost_get_param {
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD
2025-10-10 14:50 ` Steven Price
@ 2025-10-15 11:41 ` Boris Brezillon
2025-10-15 13:18 ` Steven Price
0 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-15 11:41 UTC (permalink / raw)
To: Steven Price
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On Fri, 10 Oct 2025 15:50:58 +0100
Steven Price <steven.price@arm.com> wrote:
> On 10/10/2025 11:11, Boris Brezillon wrote:
> > Will be needed if we want to skip CPU cache maintenance operations when
> > the GPU can snoop CPU caches.
> >
> > v2:
> > - New commit
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
> > ---
> > drivers/gpu/drm/panfrost/panfrost_device.h | 1 +
> > drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
> > drivers/gpu/drm/panfrost/panfrost_gpu.c | 18 +++++++++++++++++-
> > drivers/gpu/drm/panfrost/panfrost_regs.h | 2 ++
> > include/uapi/drm/panfrost_drm.h | 7 +++++++
> > 5 files changed, 28 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> > index 1e73efad02a8..bd38b7ae169e 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> > @@ -75,6 +75,7 @@ struct panfrost_features {
> > u32 thread_max_workgroup_sz;
> > u32 thread_max_barrier_sz;
> > u32 coherency_features;
> > + u32 selected_coherency;
> > u32 afbc_features;
> > u32 texture_features[4];
> > u32 js_features[16];
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index 607a5b8448d0..3ffcd08f7745 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -94,6 +94,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
> > PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15);
> > PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups);
> > PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc);
> > + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency);
> >
> > case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP:
> > ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value);
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> > index 174e190ba40f..fed323e6a307 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> > @@ -260,7 +260,23 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
> > pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
> > pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
> > pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
> > - pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
> > +
> > + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG))
> > + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
> > + else
> > + pfdev->features.coherency_features = COHERENCY_ACE_LITE;
> > +
> > + if (!pfdev->coherent) {
> > + pfdev->features.selected_coherency = COHERENCY_NONE;
> > + } else if (pfdev->features.coherency_features & COHERENCY_ACE) {
> > + pfdev->features.selected_coherency = COHERENCY_ACE;
> > + } else if (pfdev->features.coherency_features & COHERENCY_ACE_LITE) {
> > + pfdev->features.selected_coherency = COHERENCY_ACE_LITE;
> > + } else {
> > + drm_WARN(pfdev->ddev, true, "No known coherency protocol supported");
> > + pfdev->features.selected_coherency = COHERENCY_NONE;
> > + }
>
> Same comment as for panthor about not using bits when we can't have more
> than one. But also here because selected_coherency is only a UAPI
> concept, it would make sense to use the UAPI values, i.e.
> DRM_PANFROST_GPU_COHERENCY_ACE_LITE etc rather than the private
> COHERENCY_ACE_LITE defines.
For simplicity (we simply copy the coherency_features from the GPU reg
at the moment), I want the HW/uAPI values to match, so I've added
BUILD_BUG_ON()s. And I think I'd prefer to stick to the defs in
panfrost_regs.h, such that if we ever end up writing that back to
COHERENCY_ENABLE on newer HW, it's obvious we based the initialization
on those HW values.
>
> Although there is actually a COHERENCY_ENABLE register on some GPUs
> (BASE_HW_FEATURE_COHERENCY_REG in the kbase driver). Looks like it was
> probably introduced for Bifrost. But AFAIK the above checks should be ok.
Yep. I didn't dare writing it back, because it's working as-is on all
supported HW, and I don't want to regress things. Not that I've played
with this COHERENCY_ENABLE reg on my amlogic board, which is coherent,
to fake a non-coherent setup, and it works like a charm :-).
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD
2025-10-15 11:41 ` Boris Brezillon
@ 2025-10-15 13:18 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-15 13:18 UTC (permalink / raw)
To: Boris Brezillon
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On 15/10/2025 12:41, Boris Brezillon wrote:
> On Fri, 10 Oct 2025 15:50:58 +0100
> Steven Price <steven.price@arm.com> wrote:
>
>> On 10/10/2025 11:11, Boris Brezillon wrote:
>>> Will be needed if we want to skip CPU cache maintenance operations when
>>> the GPU can snoop CPU caches.
>>>
>>> v2:
>>> - New commit
>>>
>>> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>>> ---
>>> drivers/gpu/drm/panfrost/panfrost_device.h | 1 +
>>> drivers/gpu/drm/panfrost/panfrost_drv.c | 1 +
>>> drivers/gpu/drm/panfrost/panfrost_gpu.c | 18 +++++++++++++++++-
>>> drivers/gpu/drm/panfrost/panfrost_regs.h | 2 ++
>>> include/uapi/drm/panfrost_drm.h | 7 +++++++
>>> 5 files changed, 28 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
>>> index 1e73efad02a8..bd38b7ae169e 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>>> @@ -75,6 +75,7 @@ struct panfrost_features {
>>> u32 thread_max_workgroup_sz;
>>> u32 thread_max_barrier_sz;
>>> u32 coherency_features;
>>> + u32 selected_coherency;
>>> u32 afbc_features;
>>> u32 texture_features[4];
>>> u32 js_features[16];
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
>>> index 607a5b8448d0..3ffcd08f7745 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
>>> @@ -94,6 +94,7 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
>>> PANFROST_FEATURE_ARRAY(JS_FEATURES, js_features, 15);
>>> PANFROST_FEATURE(NR_CORE_GROUPS, nr_core_groups);
>>> PANFROST_FEATURE(THREAD_TLS_ALLOC, thread_tls_alloc);
>>> + PANFROST_FEATURE(SELECTED_COHERENCY, selected_coherency);
>>>
>>> case DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP:
>>> ret = panfrost_ioctl_query_timestamp(pfdev, ¶m->value);
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
>>> index 174e190ba40f..fed323e6a307 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
>>> @@ -260,7 +260,23 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
>>> pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
>>> pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
>>> pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
>>> - pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
>>> +
>>> + if (panfrost_has_hw_feature(pfdev, HW_FEATURE_COHERENCY_REG))
>>> + pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
>>> + else
>>> + pfdev->features.coherency_features = COHERENCY_ACE_LITE;
>>> +
>>> + if (!pfdev->coherent) {
>>> + pfdev->features.selected_coherency = COHERENCY_NONE;
>>> + } else if (pfdev->features.coherency_features & COHERENCY_ACE) {
>>> + pfdev->features.selected_coherency = COHERENCY_ACE;
>>> + } else if (pfdev->features.coherency_features & COHERENCY_ACE_LITE) {
>>> + pfdev->features.selected_coherency = COHERENCY_ACE_LITE;
>>> + } else {
>>> + drm_WARN(pfdev->ddev, true, "No known coherency protocol supported");
>>> + pfdev->features.selected_coherency = COHERENCY_NONE;
>>> + }
>>
>> Same comment as for panthor about not using bits when we can't have more
>> than one. But also here because selected_coherency is only a UAPI
>> concept, it would make sense to use the UAPI values, i.e.
>> DRM_PANFROST_GPU_COHERENCY_ACE_LITE etc rather than the private
>> COHERENCY_ACE_LITE defines.
>
> For simplicity (we simply copy the coherency_features from the GPU reg
> at the moment), I want the HW/uAPI values to match, so I've added
> BUILD_BUG_ON()s. And I think I'd prefer to stick to the defs in
> panfrost_regs.h, such that if we ever end up writing that back to
> COHERENCY_ENABLE on newer HW, it's obvious we based the initialization
> on those HW values.
Yeah, BUILD_BUG_ON works as well. It just seemed a little fragile to be
using the 'wrong' symbol - and we're messed these symbols up before ;)
>>
>> Although there is actually a COHERENCY_ENABLE register on some GPUs
>> (BASE_HW_FEATURE_COHERENCY_REG in the kbase driver). Looks like it was
>> probably introduced for Bifrost. But AFAIK the above checks should be ok.
>
> Yep. I didn't dare writing it back, because it's working as-is on all
> supported HW, and I don't want to regress things. Not that I've played
> with this COHERENCY_ENABLE reg on my amlogic board, which is coherent,
> to fake a non-coherent setup, and it works like a charm :-).
>
Yeah, I don't know if it's actually useful beyond testing the
non-coherent mode, so probably not worth changing things.
Thanks,
Steve
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 10/13] drm/panfrost: Add a PANFROST_SYNC_BO ioctl
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (8 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 09/13] drm/panfrost: Expose the selected coherency protocol to the UMD Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 14:57 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 11/13] drm/panfrost: Add an ioctl to query BO flags Boris Brezillon
` (2 subsequent siblings)
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
From: Faith Ekstrand <faith.ekstrand@collabora.com>
This will be used by the UMD to synchronize CPU-cached mappings when
the UMD can't do it directly (no usermode cache maintenance instruction
on Arm32).
v2:
- Add more to the commit message
- Change the flags to better match the drm_gem_shmem_sync semantics
Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panfrost/panfrost_drv.c | 57 +++++++++++++++++++++++++
drivers/gpu/drm/panfrost/panfrost_gem.c | 20 +++++++++
drivers/gpu/drm/panfrost/panfrost_gem.h | 2 +
include/uapi/drm/panfrost_drm.h | 47 ++++++++++++++++++++
4 files changed, 126 insertions(+)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 3ffcd08f7745..cabf544f0437 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -579,6 +579,62 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device *dev, void *data,
return panfrost_jm_ctx_destroy(file, args->handle);
}
+#define PANFROST_BO_SYNC_OP_FLAGS \
+ (PANFROST_BO_SYNC_FOR_DEV | PANFROST_BO_SYNC_FOR_READ | \
+ PANFROST_BO_SYNC_FOR_WRITE)
+
+static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data,
+ struct drm_file *file)
+{
+ struct drm_panfrost_sync_bo *args = data;
+ struct drm_panfrost_bo_sync_op *ops;
+ struct drm_gem_object *obj;
+ int ret;
+ u32 i;
+
+ if (args->pad)
+ return -EINVAL;
+
+ ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL);
+ if (!ops) {
+ DRM_DEBUG("Failed to allocate incoming BO sync ops array\n");
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
+ args->op_count * sizeof(*ops))) {
+ DRM_DEBUG("Failed to copy in BO sync ops\n");
+ ret = -EFAULT;
+ goto err_ops;
+ }
+
+ for (i = 0; i < args->op_count; i++) {
+ if (ops[i].flags & ~PANFROST_BO_SYNC_OP_FLAGS) {
+ ret = -EINVAL;
+ goto err_ops;
+ }
+
+ obj = drm_gem_object_lookup(file, ops[i].handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto err_ops;
+ }
+
+ ret = panfrost_gem_sync(obj, ops[i].flags,
+ ops[i].offset, ops[i].size);
+
+ drm_gem_object_put(obj);
+
+ if (ret)
+ goto err_ops;
+ }
+
+err_ops:
+ kvfree(ops);
+
+ return ret;
+}
+
int panfrost_unstable_ioctl_check(void)
{
if (!unstable_ioctls)
@@ -648,6 +704,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW),
PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW),
PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW),
+ PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW),
};
static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 85d6289a6eda..da0362202d94 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -365,6 +365,26 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const char *label)
kfree_const(old_label);
}
+int
+panfrost_gem_sync(struct drm_gem_object *obj, u32 flags,
+ u32 offset, u32 size)
+{
+ struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+ enum drm_gem_object_access_flags gem_access_flags = 0;
+
+ if (flags & PANFROST_BO_SYNC_FOR_DEV)
+ gem_access_flags |= DRM_GEM_OBJECT_DEV_ACCESS;
+ else
+ gem_access_flags |= DRM_GEM_OBJECT_CPU_ACCESS;
+
+ if (flags & PANFROST_BO_SYNC_FOR_READ)
+ gem_access_flags |= DRM_GEM_OBJECT_READ_ACCESS;
+ if (flags & PANFROST_BO_SYNC_FOR_WRITE)
+ gem_access_flags |= DRM_GEM_OBJECT_WRITE_ACCESS;
+
+ return drm_gem_shmem_sync(&bo->base, offset, size, gem_access_flags);
+}
+
void
panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label)
{
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index 8de3e76f2717..6a0e090aa59b 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -148,6 +148,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev);
void panfrost_gem_shrinker_cleanup(struct drm_device *dev);
void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label);
+int panfrost_gem_sync(struct drm_gem_object *obj, u32 flags,
+ u32 offset, u32 size);
void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label);
#ifdef CONFIG_DEBUG_FS
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index d3f6c005463c..fcc9f930df5f 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -24,6 +24,7 @@ extern "C" {
#define DRM_PANFROST_SET_LABEL_BO 0x09
#define DRM_PANFROST_JM_CTX_CREATE 0x0a
#define DRM_PANFROST_JM_CTX_DESTROY 0x0b
+#define DRM_PANFROST_SYNC_BO 0x0c
#define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
#define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
@@ -35,6 +36,7 @@ extern "C" {
#define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo)
#define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create)
#define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy)
+#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo)
/*
* Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
@@ -266,6 +268,51 @@ struct drm_panfrost_set_label_bo {
__u64 label;
};
+/* Valid flags to pass to drm_panfrost_bo_sync_op */
+#define PANFROST_BO_SYNC_FOR_CPU (0 << 0)
+#define PANFROST_BO_SYNC_FOR_DEV (1 << 0)
+#define PANFROST_BO_SYNC_FOR_READ (1 << 1)
+#define PANFROST_BO_SYNC_FOR_WRITE (1 << 2)
+
+/**
+ * struct drm_panthor_bo_flush_map_op - BO map sync op
+ */
+struct drm_panfrost_bo_sync_op {
+ /** @handle: Handle of the buffer object to sync. */
+ __u32 handle;
+
+ /** @flags: Flags controlling the sync operation. */
+ __u32 flags;
+
+ /**
+ * @offset: Offset into the BO at which the sync range starts.
+ *
+ * This will be rounded down to the nearest cache line as needed.
+ */
+ __u32 offset;
+
+ /**
+ * @size: Size of the range to sync
+ *
+ * @size + @offset will be rounded up to the nearest cache line as
+ * needed.
+ */
+ __u32 size;
+};
+
+/**
+ * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps
+ */
+struct drm_panfrost_sync_bo {
+ /** Array of struct drm_panfrost_bo_sync_op */
+ __u64 ops;
+
+ /** Number of BO sync ops */
+ __u32 op_count;
+
+ __u32 pad;
+};
+
/* Definitions for coredump decoding in user space */
#define PANFROSTDUMP_MAJOR 1
#define PANFROSTDUMP_MINOR 0
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 10/13] drm/panfrost: Add a PANFROST_SYNC_BO ioctl
2025-10-10 10:11 ` [PATCH v2 10/13] drm/panfrost: Add a PANFROST_SYNC_BO ioctl Boris Brezillon
@ 2025-10-10 14:57 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 14:57 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> From: Faith Ekstrand <faith.ekstrand@collabora.com>
>
> This will be used by the UMD to synchronize CPU-cached mappings when
> the UMD can't do it directly (no usermode cache maintenance instruction
> on Arm32).
>
> v2:
> - Add more to the commit message
> - Change the flags to better match the drm_gem_shmem_sync semantics
>
> Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
> ---
> drivers/gpu/drm/panfrost/panfrost_drv.c | 57 +++++++++++++++++++++++++
> drivers/gpu/drm/panfrost/panfrost_gem.c | 20 +++++++++
> drivers/gpu/drm/panfrost/panfrost_gem.h | 2 +
> include/uapi/drm/panfrost_drm.h | 47 ++++++++++++++++++++
> 4 files changed, 126 insertions(+)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 3ffcd08f7745..cabf544f0437 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -579,6 +579,62 @@ static int panfrost_ioctl_jm_ctx_destroy(struct drm_device *dev, void *data,
> return panfrost_jm_ctx_destroy(file, args->handle);
> }
>
> +#define PANFROST_BO_SYNC_OP_FLAGS \
> + (PANFROST_BO_SYNC_FOR_DEV | PANFROST_BO_SYNC_FOR_READ | \
> + PANFROST_BO_SYNC_FOR_WRITE)
> +
> +static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_panfrost_sync_bo *args = data;
> + struct drm_panfrost_bo_sync_op *ops;
> + struct drm_gem_object *obj;
> + int ret;
> + u32 i;
> +
> + if (args->pad)
> + return -EINVAL;
> +
> + ops = kvmalloc_array(args->op_count, sizeof(*ops), GFP_KERNEL);
> + if (!ops) {
> + DRM_DEBUG("Failed to allocate incoming BO sync ops array\n");
> + return -ENOMEM;
> + }
> +
> + if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
> + args->op_count * sizeof(*ops))) {
> + DRM_DEBUG("Failed to copy in BO sync ops\n");
> + ret = -EFAULT;
> + goto err_ops;
> + }
> +
> + for (i = 0; i < args->op_count; i++) {
> + if (ops[i].flags & ~PANFROST_BO_SYNC_OP_FLAGS) {
> + ret = -EINVAL;
> + goto err_ops;
> + }
> +
> + obj = drm_gem_object_lookup(file, ops[i].handle);
> + if (!obj) {
> + ret = -ENOENT;
> + goto err_ops;
> + }
> +
> + ret = panfrost_gem_sync(obj, ops[i].flags,
> + ops[i].offset, ops[i].size);
> +
> + drm_gem_object_put(obj);
> +
> + if (ret)
> + goto err_ops;
> + }
> +
> +err_ops:
> + kvfree(ops);
> +
> + return ret;
> +}
> +
> int panfrost_unstable_ioctl_check(void)
> {
> if (!unstable_ioctls)
> @@ -648,6 +704,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
> PANFROST_IOCTL(SET_LABEL_BO, set_label_bo, DRM_RENDER_ALLOW),
> PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW),
> PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW),
> + PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW),
> };
>
> static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> index 85d6289a6eda..da0362202d94 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> @@ -365,6 +365,26 @@ panfrost_gem_set_label(struct drm_gem_object *obj, const char *label)
> kfree_const(old_label);
> }
>
> +int
> +panfrost_gem_sync(struct drm_gem_object *obj, u32 flags,
> + u32 offset, u32 size)
> +{
> + struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> + enum drm_gem_object_access_flags gem_access_flags = 0;
> +
> + if (flags & PANFROST_BO_SYNC_FOR_DEV)
> + gem_access_flags |= DRM_GEM_OBJECT_DEV_ACCESS;
> + else
> + gem_access_flags |= DRM_GEM_OBJECT_CPU_ACCESS;
> +
> + if (flags & PANFROST_BO_SYNC_FOR_READ)
> + gem_access_flags |= DRM_GEM_OBJECT_READ_ACCESS;
> + if (flags & PANFROST_BO_SYNC_FOR_WRITE)
> + gem_access_flags |= DRM_GEM_OBJECT_WRITE_ACCESS;
> +
> + return drm_gem_shmem_sync(&bo->base, offset, size, gem_access_flags);
> +}
> +
> void
> panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label)
> {
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> index 8de3e76f2717..6a0e090aa59b 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> @@ -148,6 +148,8 @@ int panfrost_gem_shrinker_init(struct drm_device *dev);
> void panfrost_gem_shrinker_cleanup(struct drm_device *dev);
>
> void panfrost_gem_set_label(struct drm_gem_object *obj, const char *label);
> +int panfrost_gem_sync(struct drm_gem_object *obj, u32 flags,
> + u32 offset, u32 size);
> void panfrost_gem_internal_set_label(struct drm_gem_object *obj, const char *label);
>
> #ifdef CONFIG_DEBUG_FS
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index d3f6c005463c..fcc9f930df5f 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -24,6 +24,7 @@ extern "C" {
> #define DRM_PANFROST_SET_LABEL_BO 0x09
> #define DRM_PANFROST_JM_CTX_CREATE 0x0a
> #define DRM_PANFROST_JM_CTX_DESTROY 0x0b
> +#define DRM_PANFROST_SYNC_BO 0x0c
>
> #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
> #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
> @@ -35,6 +36,7 @@ extern "C" {
> #define DRM_IOCTL_PANFROST_SET_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, struct drm_panfrost_set_label_bo)
> #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create)
> #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy)
> +#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo)
>
> /*
> * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
> @@ -266,6 +268,51 @@ struct drm_panfrost_set_label_bo {
> __u64 label;
> };
>
> +/* Valid flags to pass to drm_panfrost_bo_sync_op */
> +#define PANFROST_BO_SYNC_FOR_CPU (0 << 0)
> +#define PANFROST_BO_SYNC_FOR_DEV (1 << 0)
> +#define PANFROST_BO_SYNC_FOR_READ (1 << 1)
> +#define PANFROST_BO_SYNC_FOR_WRITE (1 << 2)
> +
> +/**
> + * struct drm_panthor_bo_flush_map_op - BO map sync op
> + */
> +struct drm_panfrost_bo_sync_op {
> + /** @handle: Handle of the buffer object to sync. */
> + __u32 handle;
> +
> + /** @flags: Flags controlling the sync operation. */
> + __u32 flags;
> +
> + /**
> + * @offset: Offset into the BO at which the sync range starts.
> + *
> + * This will be rounded down to the nearest cache line as needed.
> + */
> + __u32 offset;
> +
> + /**
> + * @size: Size of the range to sync
> + *
> + * @size + @offset will be rounded up to the nearest cache line as
> + * needed.
> + */
> + __u32 size;
> +};
> +
> +/**
> + * struct drm_panfrost_sync_bo - ioctl argument for syncing BO maps
> + */
> +struct drm_panfrost_sync_bo {
> + /** Array of struct drm_panfrost_bo_sync_op */
> + __u64 ops;
> +
> + /** Number of BO sync ops */
> + __u32 op_count;
> +
> + __u32 pad;
> +};
> +
> /* Definitions for coredump decoding in user space */
> #define PANFROSTDUMP_MAJOR 1
> #define PANFROSTDUMP_MINOR 0
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 11/13] drm/panfrost: Add an ioctl to query BO flags
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (9 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 10/13] drm/panfrost: Add a PANFROST_SYNC_BO ioctl Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 15:00 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 12/13] drm/panfrost: Add flag to map GEM object Write-Back Cacheable Boris Brezillon
2025-10-10 10:11 ` [PATCH v2 13/13] drm/panfrost: Bump the driver version to 1.6 Boris Brezillon
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
This is useful when importing BOs, so we can know about cacheability
and flush the caches when needed.
v2:
- New commit
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panfrost/panfrost_drv.c | 33 +++++++++++++++++++++++++
include/uapi/drm/panfrost_drm.h | 19 ++++++++++++++
2 files changed, 52 insertions(+)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index cabf544f0437..00c0881fa2f0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -635,6 +635,38 @@ static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data,
return ret;
}
+static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_panfrost_query_bo_info *args = data;
+ struct drm_gem_object *gem_obj;
+ struct panfrost_gem_object *bo;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+
+ bo = to_panfrost_bo(gem_obj);
+ args->pad = 0;
+ args->create_flags = 0;
+ args->extra_flags = 0;
+
+ if (drm_gem_is_imported(gem_obj)) {
+ args->extra_flags |= DRM_PANFROST_BO_IS_IMPORTED;
+ } else {
+ if (bo->noexec)
+ args->create_flags |= PANFROST_BO_NOEXEC;
+
+ if (bo->is_heap)
+ args->create_flags |= PANFROST_BO_HEAP;
+ }
+
+ drm_gem_object_put(gem_obj);
+ return 0;
+}
+
int panfrost_unstable_ioctl_check(void)
{
if (!unstable_ioctls)
@@ -705,6 +737,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW),
PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW),
PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW),
+ PANFROST_IOCTL(QUERY_BO_INFO, query_bo_info, DRM_RENDER_ALLOW),
};
static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index fcc9f930df5f..5832a291adc4 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -25,6 +25,7 @@ extern "C" {
#define DRM_PANFROST_JM_CTX_CREATE 0x0a
#define DRM_PANFROST_JM_CTX_DESTROY 0x0b
#define DRM_PANFROST_SYNC_BO 0x0c
+#define DRM_PANFROST_QUERY_BO_INFO 0x0d
#define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
#define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
@@ -37,6 +38,7 @@ extern "C" {
#define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create)
#define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy)
#define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo)
+#define DRM_IOCTL_PANFROST_QUERY_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_QUERY_BO_INFO, struct drm_panfrost_query_bo_info)
/*
* Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
@@ -313,6 +315,23 @@ struct drm_panfrost_sync_bo {
__u32 pad;
};
+/** BO comes from a different subsystem. */
+#define DRM_PANFROST_BO_IS_IMPORTED (1 << 0)
+
+struct drm_panfrost_query_bo_info {
+ /** Handle of the object being queried. */
+ __u32 handle;
+
+ /** Extra flags that are not coming from the BO_CREATE ioctl(). */
+ __u32 extra_flags;
+
+ /** Flags passed at creation time. */
+ __u32 create_flags;
+
+ /** Will be zero on return. */
+ __u32 pad;
+};
+
/* Definitions for coredump decoding in user space */
#define PANFROSTDUMP_MAJOR 1
#define PANFROSTDUMP_MINOR 0
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 11/13] drm/panfrost: Add an ioctl to query BO flags
2025-10-10 10:11 ` [PATCH v2 11/13] drm/panfrost: Add an ioctl to query BO flags Boris Brezillon
@ 2025-10-10 15:00 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 15:00 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> This is useful when importing BOs, so we can know about cacheability
> and flush the caches when needed.
>
> v2:
> - New commit
>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Although see comment on the next patch...
Thanks,
Steve
> ---
> drivers/gpu/drm/panfrost/panfrost_drv.c | 33 +++++++++++++++++++++++++
> include/uapi/drm/panfrost_drm.h | 19 ++++++++++++++
> 2 files changed, 52 insertions(+)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index cabf544f0437..00c0881fa2f0 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -635,6 +635,38 @@ static int panfrost_ioctl_sync_bo(struct drm_device *ddev, void *data,
> return ret;
> }
>
> +static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data,
> + struct drm_file *file_priv)
> +{
> + struct drm_panfrost_query_bo_info *args = data;
> + struct drm_gem_object *gem_obj;
> + struct panfrost_gem_object *bo;
> +
> + gem_obj = drm_gem_object_lookup(file_priv, args->handle);
> + if (!gem_obj) {
> + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
> + return -ENOENT;
> + }
> +
> + bo = to_panfrost_bo(gem_obj);
> + args->pad = 0;
> + args->create_flags = 0;
> + args->extra_flags = 0;
> +
> + if (drm_gem_is_imported(gem_obj)) {
> + args->extra_flags |= DRM_PANFROST_BO_IS_IMPORTED;
> + } else {
> + if (bo->noexec)
> + args->create_flags |= PANFROST_BO_NOEXEC;
> +
> + if (bo->is_heap)
> + args->create_flags |= PANFROST_BO_HEAP;
> + }
> +
> + drm_gem_object_put(gem_obj);
> + return 0;
> +}
> +
> int panfrost_unstable_ioctl_check(void)
> {
> if (!unstable_ioctls)
> @@ -705,6 +737,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
> PANFROST_IOCTL(JM_CTX_CREATE, jm_ctx_create, DRM_RENDER_ALLOW),
> PANFROST_IOCTL(JM_CTX_DESTROY, jm_ctx_destroy, DRM_RENDER_ALLOW),
> PANFROST_IOCTL(SYNC_BO, sync_bo, DRM_RENDER_ALLOW),
> + PANFROST_IOCTL(QUERY_BO_INFO, query_bo_info, DRM_RENDER_ALLOW),
> };
>
> static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index fcc9f930df5f..5832a291adc4 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -25,6 +25,7 @@ extern "C" {
> #define DRM_PANFROST_JM_CTX_CREATE 0x0a
> #define DRM_PANFROST_JM_CTX_DESTROY 0x0b
> #define DRM_PANFROST_SYNC_BO 0x0c
> +#define DRM_PANFROST_QUERY_BO_INFO 0x0d
>
> #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
> #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
> @@ -37,6 +38,7 @@ extern "C" {
> #define DRM_IOCTL_PANFROST_JM_CTX_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_CREATE, struct drm_panfrost_jm_ctx_create)
> #define DRM_IOCTL_PANFROST_JM_CTX_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_JM_CTX_DESTROY, struct drm_panfrost_jm_ctx_destroy)
> #define DRM_IOCTL_PANFROST_SYNC_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SYNC_BO, struct drm_panfrost_sync_bo)
> +#define DRM_IOCTL_PANFROST_QUERY_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_QUERY_BO_INFO, struct drm_panfrost_query_bo_info)
>
> /*
> * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
> @@ -313,6 +315,23 @@ struct drm_panfrost_sync_bo {
> __u32 pad;
> };
>
> +/** BO comes from a different subsystem. */
> +#define DRM_PANFROST_BO_IS_IMPORTED (1 << 0)
> +
> +struct drm_panfrost_query_bo_info {
> + /** Handle of the object being queried. */
> + __u32 handle;
> +
> + /** Extra flags that are not coming from the BO_CREATE ioctl(). */
> + __u32 extra_flags;
> +
> + /** Flags passed at creation time. */
> + __u32 create_flags;
> +
> + /** Will be zero on return. */
> + __u32 pad;
> +};
> +
> /* Definitions for coredump decoding in user space */
> #define PANFROSTDUMP_MAJOR 1
> #define PANFROSTDUMP_MINOR 0
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 12/13] drm/panfrost: Add flag to map GEM object Write-Back Cacheable
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (10 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 11/13] drm/panfrost: Add an ioctl to query BO flags Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 15:00 ` Steven Price
2025-10-10 10:11 ` [PATCH v2 13/13] drm/panfrost: Bump the driver version to 1.6 Boris Brezillon
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Will be used by the UMD to optimize CPU accesses to buffers
that are frequently read by the CPU, or on which the access
pattern makes non-cacheable mappings inefficient.
Mapping buffers CPU-cached implies taking care of the CPU
cache maintenance in the UMD, unless the GPU is IO coherent.
v2:
- Add more to the commit message
Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++++--
drivers/gpu/drm/panfrost/panfrost_gem.c | 3 +++
include/uapi/drm/panfrost_drm.h | 1 +
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 00c0881fa2f0..0f51b1dc1abc 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -125,6 +125,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
return 0;
}
+#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \
+ PANFROST_BO_HEAP | \
+ PANFROST_BO_WB_MMAP)
+
static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
struct drm_file *file)
{
@@ -134,8 +138,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
struct panfrost_gem_mapping *mapping;
int ret;
- if (!args->size || args->pad ||
- (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
+ if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS))
return -EINVAL;
/* Heaps should never be executable */
@@ -661,6 +664,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data,
if (bo->is_heap)
args->create_flags |= PANFROST_BO_HEAP;
+
+ if (bo->base.map_wc)
+ args->create_flags |= PANFROST_BO_WB_MMAP;
}
drm_gem_object_put(gem_obj);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index da0362202d94..0e8028ee9d1f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -320,6 +320,9 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
bo->is_heap = !!(flags & PANFROST_BO_HEAP);
+ if (flags & PANFROST_BO_WB_MMAP)
+ bo->base.map_wc = false;
+
return bo;
}
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 5832a291adc4..2c1e28f9dd07 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -104,6 +104,7 @@ struct drm_panfrost_wait_bo {
/* Valid flags to pass to drm_panfrost_create_bo */
#define PANFROST_BO_NOEXEC 1
#define PANFROST_BO_HEAP 2
+#define PANFROST_BO_WB_MMAP 4
/**
* struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 12/13] drm/panfrost: Add flag to map GEM object Write-Back Cacheable
2025-10-10 10:11 ` [PATCH v2 12/13] drm/panfrost: Add flag to map GEM object Write-Back Cacheable Boris Brezillon
@ 2025-10-10 15:00 ` Steven Price
2025-10-10 16:31 ` Boris Brezillon
0 siblings, 1 reply; 33+ messages in thread
From: Steven Price @ 2025-10-10 15:00 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> From: Faith Ekstrand <faith.ekstrand@collabora.com>
>
> Will be used by the UMD to optimize CPU accesses to buffers
> that are frequently read by the CPU, or on which the access
> pattern makes non-cacheable mappings inefficient.
>
> Mapping buffers CPU-cached implies taking care of the CPU
> cache maintenance in the UMD, unless the GPU is IO coherent.
>
> v2:
> - Add more to the commit message
>
> Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Should we not expose the PANFROST_BO_WB_MMAP flag through
DRM_IOCTL_PANFROST_QUERY_BO_INFO? The documentation states that we
return "Flags passed at creation time", but then this new flag isn't
returned.
Thanks,
Steve
> ---
> drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++++--
> drivers/gpu/drm/panfrost/panfrost_gem.c | 3 +++
> include/uapi/drm/panfrost_drm.h | 1 +
> 3 files changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 00c0881fa2f0..0f51b1dc1abc 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -125,6 +125,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
> return 0;
> }
>
> +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \
> + PANFROST_BO_HEAP | \
> + PANFROST_BO_WB_MMAP)
> +
> static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
> struct drm_file *file)
> {
> @@ -134,8 +138,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
> struct panfrost_gem_mapping *mapping;
> int ret;
>
> - if (!args->size || args->pad ||
> - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
> + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS))
> return -EINVAL;
>
> /* Heaps should never be executable */
> @@ -661,6 +664,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data,
>
> if (bo->is_heap)
> args->create_flags |= PANFROST_BO_HEAP;
> +
> + if (bo->base.map_wc)
> + args->create_flags |= PANFROST_BO_WB_MMAP;
> }
>
> drm_gem_object_put(gem_obj);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> index da0362202d94..0e8028ee9d1f 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> @@ -320,6 +320,9 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
> bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
> bo->is_heap = !!(flags & PANFROST_BO_HEAP);
>
> + if (flags & PANFROST_BO_WB_MMAP)
> + bo->base.map_wc = false;
> +
> return bo;
> }
>
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index 5832a291adc4..2c1e28f9dd07 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -104,6 +104,7 @@ struct drm_panfrost_wait_bo {
> /* Valid flags to pass to drm_panfrost_create_bo */
> #define PANFROST_BO_NOEXEC 1
> #define PANFROST_BO_HEAP 2
> +#define PANFROST_BO_WB_MMAP 4
>
> /**
> * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
^ permalink raw reply [flat|nested] 33+ messages in thread* Re: [PATCH v2 12/13] drm/panfrost: Add flag to map GEM object Write-Back Cacheable
2025-10-10 15:00 ` Steven Price
@ 2025-10-10 16:31 ` Boris Brezillon
0 siblings, 0 replies; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 16:31 UTC (permalink / raw)
To: Steven Price
Cc: Liviu Dudau, Adrián Larumbe, dri-devel, Maarten Lankhorst,
Maxime Ripard, Thomas Zimmermann, David Airlie, Simona Vetter,
Faith Ekstrand, kernel
On Fri, 10 Oct 2025 16:00:45 +0100
Steven Price <steven.price@arm.com> wrote:
> On 10/10/2025 11:11, Boris Brezillon wrote:
> > From: Faith Ekstrand <faith.ekstrand@collabora.com>
> >
> > Will be used by the UMD to optimize CPU accesses to buffers
> > that are frequently read by the CPU, or on which the access
> > pattern makes non-cacheable mappings inefficient.
> >
> > Mapping buffers CPU-cached implies taking care of the CPU
> > cache maintenance in the UMD, unless the GPU is IO coherent.
> >
> > v2:
> > - Add more to the commit message
> >
> > Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>
> Should we not expose the PANFROST_BO_WB_MMAP flag through
> DRM_IOCTL_PANFROST_QUERY_BO_INFO? The documentation states that we
> return "Flags passed at creation time", but then this new flag isn't
> returned.
It was meant to be, I just got the test reversed in
panfrost_ioctl_query_bo_info().
>
> Thanks,
> Steve
>
> > ---
> > drivers/gpu/drm/panfrost/panfrost_drv.c | 10 ++++++++--
> > drivers/gpu/drm/panfrost/panfrost_gem.c | 3 +++
> > include/uapi/drm/panfrost_drm.h | 1 +
> > 3 files changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index 00c0881fa2f0..0f51b1dc1abc 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -125,6 +125,10 @@ static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct
> > return 0;
> > }
> >
> > +#define PANFROST_BO_FLAGS (PANFROST_BO_NOEXEC | \
> > + PANFROST_BO_HEAP | \
> > + PANFROST_BO_WB_MMAP)
> > +
> > static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
> > struct drm_file *file)
> > {
> > @@ -134,8 +138,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
> > struct panfrost_gem_mapping *mapping;
> > int ret;
> >
> > - if (!args->size || args->pad ||
> > - (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
> > + if (!args->size || args->pad || (args->flags & ~PANFROST_BO_FLAGS))
> > return -EINVAL;
> >
> > /* Heaps should never be executable */
> > @@ -661,6 +664,9 @@ static int panfrost_ioctl_query_bo_info(struct drm_device *dev, void *data,
> >
> > if (bo->is_heap)
> > args->create_flags |= PANFROST_BO_HEAP;
> > +
> > + if (bo->base.map_wc)
> > + args->create_flags |= PANFROST_BO_WB_MMAP;
Oops, it's actually
if (!bo->base.map_wc)
args->create_flags |= PANFROST_BO_WB_MMAP;
> > }
> >
> > drm_gem_object_put(gem_obj);
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > index da0362202d94..0e8028ee9d1f 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > @@ -320,6 +320,9 @@ panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
> > bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
> > bo->is_heap = !!(flags & PANFROST_BO_HEAP);
> >
> > + if (flags & PANFROST_BO_WB_MMAP)
> > + bo->base.map_wc = false;
> > +
> > return bo;
> > }
> >
> > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> > index 5832a291adc4..2c1e28f9dd07 100644
> > --- a/include/uapi/drm/panfrost_drm.h
> > +++ b/include/uapi/drm/panfrost_drm.h
> > @@ -104,6 +104,7 @@ struct drm_panfrost_wait_bo {
> > /* Valid flags to pass to drm_panfrost_create_bo */
> > #define PANFROST_BO_NOEXEC 1
> > #define PANFROST_BO_HEAP 2
> > +#define PANFROST_BO_WB_MMAP 4
> >
> > /**
> > * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs.
>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH v2 13/13] drm/panfrost: Bump the driver version to 1.6
2025-10-10 10:11 [PATCH v2 00/13] drm/panfrost, panthor: Cached maps and explicit flushing Boris Brezillon
` (11 preceding siblings ...)
2025-10-10 10:11 ` [PATCH v2 12/13] drm/panfrost: Add flag to map GEM object Write-Back Cacheable Boris Brezillon
@ 2025-10-10 10:11 ` Boris Brezillon
2025-10-10 15:01 ` Steven Price
12 siblings, 1 reply; 33+ messages in thread
From: Boris Brezillon @ 2025-10-10 10:11 UTC (permalink / raw)
To: Boris Brezillon, Steven Price, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Bump the driver version to reflect the new cached-CPU mapping
capability.
v2:
- Quickly describe what the new version exposes in the commit message
Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
drivers/gpu/drm/panfrost/panfrost_drv.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 0f51b1dc1abc..9316daa91543 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -934,6 +934,9 @@ static void panfrost_debugfs_init(struct drm_minor *minor)
* - 1.4 - adds SET_LABEL_BO
* - 1.5 - adds JM_CTX_{CREATE,DESTROY} ioctls and extend SUBMIT to allow
* context creation with configurable priorities/affinity
+ * - 1.6 - adds PANFROST_BO_MAP_WB, PANFROST_IOCTL_SYNC_BO,
+ * PANFROST_IOCTL_QUERY_BO_INFO and
+ * DRM_PANFROST_PARAM_SELECTED_COHERENCY
*/
static const struct drm_driver panfrost_drm_driver = {
.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
@@ -946,7 +949,7 @@ static const struct drm_driver panfrost_drm_driver = {
.name = "panfrost",
.desc = "panfrost DRM",
.major = 1,
- .minor = 5,
+ .minor = 6,
.gem_create_object = panfrost_gem_create_object,
.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
--
2.51.0
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v2 13/13] drm/panfrost: Bump the driver version to 1.6
2025-10-10 10:11 ` [PATCH v2 13/13] drm/panfrost: Bump the driver version to 1.6 Boris Brezillon
@ 2025-10-10 15:01 ` Steven Price
0 siblings, 0 replies; 33+ messages in thread
From: Steven Price @ 2025-10-10 15:01 UTC (permalink / raw)
To: Boris Brezillon, Liviu Dudau, Adrián Larumbe
Cc: dri-devel, Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
David Airlie, Simona Vetter, Faith Ekstrand, kernel
On 10/10/2025 11:11, Boris Brezillon wrote:
> From: Faith Ekstrand <faith.ekstrand@collabora.com>
>
> Bump the driver version to reflect the new cached-CPU mapping
> capability.
>
> v2:
> - Quickly describe what the new version exposes in the commit message
>
> Signed-off-by: Faith Ekstrand <faith.ekstrand@collabora.com>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
> ---
> drivers/gpu/drm/panfrost/panfrost_drv.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 0f51b1dc1abc..9316daa91543 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -934,6 +934,9 @@ static void panfrost_debugfs_init(struct drm_minor *minor)
> * - 1.4 - adds SET_LABEL_BO
> * - 1.5 - adds JM_CTX_{CREATE,DESTROY} ioctls and extend SUBMIT to allow
> * context creation with configurable priorities/affinity
> + * - 1.6 - adds PANFROST_BO_MAP_WB, PANFROST_IOCTL_SYNC_BO,
> + * PANFROST_IOCTL_QUERY_BO_INFO and
> + * DRM_PANFROST_PARAM_SELECTED_COHERENCY
> */
> static const struct drm_driver panfrost_drm_driver = {
> .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
> @@ -946,7 +949,7 @@ static const struct drm_driver panfrost_drm_driver = {
> .name = "panfrost",
> .desc = "panfrost DRM",
> .major = 1,
> - .minor = 5,
> + .minor = 6,
>
> .gem_create_object = panfrost_gem_create_object,
> .gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
^ permalink raw reply [flat|nested] 33+ messages in thread