public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/xe/vm: prevent UAF with asid based lookup
@ 2024-04-12 11:31 Matthew Auld
  2024-04-12 14:06 ` Lucas De Marchi
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Auld @ 2024-04-12 11:31 UTC (permalink / raw)
  To: intel-xe; +Cc: Matthew Brost, stable

The asid is only erased from the xarray when the vm refcount reaches
zero, however this leads to potential UAF since the xe_vm_get() only
works on a vm with refcount != 0. Since the asid is allocated in the vm
create ioctl, rather erase it when closing the vm, prior to dropping the
potential last ref. This should also work when user closes driver fd
without explicit vm destroy.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1594
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
---
 drivers/gpu/drm/xe/xe_vm.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a196dbe65252..c5c26b3d1b76 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1581,6 +1581,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		xe->usm.num_vm_in_fault_mode--;
 	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
 		xe->usm.num_vm_in_non_fault_mode--;
+
+	if (vm->usm.asid) {
+		void *lookup;
+
+		xe_assert(xe, xe->info.has_asid);
+		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+		xe_assert(xe, lookup == vm);
+	}
 	mutex_unlock(&xe->usm.lock);
 
 	for_each_tile(tile, xe, id)
@@ -1596,24 +1606,15 @@ static void vm_destroy_work_func(struct work_struct *w)
 	struct xe_device *xe = vm->xe;
 	struct xe_tile *tile;
 	u8 id;
-	void *lookup;
 
 	/* xe_vm_close_and_put was not called? */
 	xe_assert(xe, !vm->size);
 
 	mutex_destroy(&vm->snap_mutex);
 
-	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+	if (!(vm->flags & XE_VM_FLAG_MIGRATION))
 		xe_device_mem_access_put(xe);
 
-		if (xe->info.has_asid && vm->usm.asid) {
-			mutex_lock(&xe->usm.lock);
-			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
-			xe_assert(xe, lookup == vm);
-			mutex_unlock(&xe->usm.lock);
-		}
-	}
-
 	for_each_tile(tile, xe, id)
 		XE_WARN_ON(vm->pt_root[id]);
 
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/3] drm/xe/vm: prevent UAF with asid based lookup
  2024-04-12 11:31 [PATCH 1/3] drm/xe/vm: prevent UAF with asid based lookup Matthew Auld
@ 2024-04-12 14:06 ` Lucas De Marchi
  2024-04-12 14:42   ` Matthew Auld
  0 siblings, 1 reply; 4+ messages in thread
From: Lucas De Marchi @ 2024-04-12 14:06 UTC (permalink / raw)
  To: Matthew Auld; +Cc: intel-xe, Matthew Brost, stable

On Fri, Apr 12, 2024 at 12:31:45PM +0100, Matthew Auld wrote:
>The asid is only erased from the xarray when the vm refcount reaches
>zero, however this leads to potential UAF since the xe_vm_get() only

I'm not sure I understand the call chain an where xe_vm_get() is coming
into play here.


>works on a vm with refcount != 0. Since the asid is allocated in the vm
>create ioctl, rather erase it when closing the vm, prior to dropping the
>potential last ref. This should also work when user closes driver fd
>without explicit vm destroy.

what seems weird is that you are moving it earlier in the call stack
rather than later, outside of the worker, to prevent the UAF.

what exactly was the UAF on?

Lucas De Marchi

>
>Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
>Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1594
>Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>Cc: Matthew Brost <matthew.brost@intel.com>
>Cc: <stable@vger.kernel.org> # v6.8+
>---
> drivers/gpu/drm/xe/xe_vm.c | 21 +++++++++++----------
> 1 file changed, 11 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>index a196dbe65252..c5c26b3d1b76 100644
>--- a/drivers/gpu/drm/xe/xe_vm.c
>+++ b/drivers/gpu/drm/xe/xe_vm.c
>@@ -1581,6 +1581,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
> 		xe->usm.num_vm_in_fault_mode--;
> 	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
> 		xe->usm.num_vm_in_non_fault_mode--;
>+
>+	if (vm->usm.asid) {
>+		void *lookup;
>+
>+		xe_assert(xe, xe->info.has_asid);
>+		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
>+
>+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
>+		xe_assert(xe, lookup == vm);
>+	}
> 	mutex_unlock(&xe->usm.lock);
>
> 	for_each_tile(tile, xe, id)
>@@ -1596,24 +1606,15 @@ static void vm_destroy_work_func(struct work_struct *w)
> 	struct xe_device *xe = vm->xe;
> 	struct xe_tile *tile;
> 	u8 id;
>-	void *lookup;
>
> 	/* xe_vm_close_and_put was not called? */
> 	xe_assert(xe, !vm->size);
>
> 	mutex_destroy(&vm->snap_mutex);
>
>-	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
>+	if (!(vm->flags & XE_VM_FLAG_MIGRATION))
> 		xe_device_mem_access_put(xe);
>
>-		if (xe->info.has_asid && vm->usm.asid) {
>-			mutex_lock(&xe->usm.lock);
>-			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
>-			xe_assert(xe, lookup == vm);
>-			mutex_unlock(&xe->usm.lock);
>-		}
>-	}
>-
> 	for_each_tile(tile, xe, id)
> 		XE_WARN_ON(vm->pt_root[id]);
>
>-- 
>2.44.0
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/3] drm/xe/vm: prevent UAF with asid based lookup
  2024-04-12 14:06 ` Lucas De Marchi
@ 2024-04-12 14:42   ` Matthew Auld
  2024-04-12 22:26     ` Matthew Brost
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Auld @ 2024-04-12 14:42 UTC (permalink / raw)
  To: Lucas De Marchi; +Cc: intel-xe, Matthew Brost, stable

On 12/04/2024 15:06, Lucas De Marchi wrote:
> On Fri, Apr 12, 2024 at 12:31:45PM +0100, Matthew Auld wrote:
>> The asid is only erased from the xarray when the vm refcount reaches
>> zero, however this leads to potential UAF since the xe_vm_get() only
> 
> I'm not sure I understand the call chain an where xe_vm_get() is coming
> into play here.
> 
> 
>> works on a vm with refcount != 0. Since the asid is allocated in the vm
>> create ioctl, rather erase it when closing the vm, prior to dropping the
>> potential last ref. This should also work when user closes driver fd
>> without explicit vm destroy.
> 
> what seems weird is that you are moving it earlier in the call stack
> rather than later, outside of the worker, to prevent the UAF.
> 
> what exactly was the UAF on?

UAF on the vm object. From the bug report it's when servicing some GPU 
fault, so inside handle_pagefault(). At this stage it just has some asid 
which is meant to map to some vm AFAICT. The lookup dance relies on 
calling xe_vm_get() after getting back the vm pointer from the xarray. 
Currently the asid is only erased from the xarray in 
vm_destroy_work_func() which is long after the refcount reaches zero and 
we are about to free the memory for the vm.

However xe_vm_get() is only meant to be called if you are already 
holding a ref, so if the vm refcount is already zero it just throws an 
error and continues on, and the caller has no idea. If that happens then 
as soon as we drop the usm lock the memory can be freed and it's game 
over. This looks to be what happens with the vm refcount reaching zero, 
and the handle_pagefault() still being able to reach the 
soon-to-be-freed vm via the xarray. With this patch we now erase from 
the xarray before we drop what is potentially the final ref. That way if 
you can reach the vm via the xarray you should always be able get a 
valid ref.

> 
> Lucas De Marchi
> 
>>
>> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
>> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1594
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: <stable@vger.kernel.org> # v6.8+
>> ---
>> drivers/gpu/drm/xe/xe_vm.c | 21 +++++++++++----------
>> 1 file changed, 11 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index a196dbe65252..c5c26b3d1b76 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1581,6 +1581,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>>         xe->usm.num_vm_in_fault_mode--;
>>     else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
>>         xe->usm.num_vm_in_non_fault_mode--;
>> +
>> +    if (vm->usm.asid) {
>> +        void *lookup;
>> +
>> +        xe_assert(xe, xe->info.has_asid);
>> +        xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
>> +
>> +        lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
>> +        xe_assert(xe, lookup == vm);
>> +    }
>>     mutex_unlock(&xe->usm.lock);
>>
>>     for_each_tile(tile, xe, id)
>> @@ -1596,24 +1606,15 @@ static void vm_destroy_work_func(struct 
>> work_struct *w)
>>     struct xe_device *xe = vm->xe;
>>     struct xe_tile *tile;
>>     u8 id;
>> -    void *lookup;
>>
>>     /* xe_vm_close_and_put was not called? */
>>     xe_assert(xe, !vm->size);
>>
>>     mutex_destroy(&vm->snap_mutex);
>>
>> -    if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
>> +    if (!(vm->flags & XE_VM_FLAG_MIGRATION))
>>         xe_device_mem_access_put(xe);
>>
>> -        if (xe->info.has_asid && vm->usm.asid) {
>> -            mutex_lock(&xe->usm.lock);
>> -            lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
>> -            xe_assert(xe, lookup == vm);
>> -            mutex_unlock(&xe->usm.lock);
>> -        }
>> -    }
>> -
>>     for_each_tile(tile, xe, id)
>>         XE_WARN_ON(vm->pt_root[id]);
>>
>> -- 
>> 2.44.0
>>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/3] drm/xe/vm: prevent UAF with asid based lookup
  2024-04-12 14:42   ` Matthew Auld
@ 2024-04-12 22:26     ` Matthew Brost
  0 siblings, 0 replies; 4+ messages in thread
From: Matthew Brost @ 2024-04-12 22:26 UTC (permalink / raw)
  To: Matthew Auld; +Cc: Lucas De Marchi, intel-xe, stable

On Fri, Apr 12, 2024 at 03:42:00PM +0100, Matthew Auld wrote:
> On 12/04/2024 15:06, Lucas De Marchi wrote:
> > On Fri, Apr 12, 2024 at 12:31:45PM +0100, Matthew Auld wrote:
> > > The asid is only erased from the xarray when the vm refcount reaches
> > > zero, however this leads to potential UAF since the xe_vm_get() only
> > 
> > I'm not sure I understand the call chain an where xe_vm_get() is coming
> > into play here.
> > 
> > 
> > > works on a vm with refcount != 0. Since the asid is allocated in the vm
> > > create ioctl, rather erase it when closing the vm, prior to dropping the
> > > potential last ref. This should also work when user closes driver fd
> > > without explicit vm destroy.
> > 
> > what seems weird is that you are moving it earlier in the call stack
> > rather than later, outside of the worker, to prevent the UAF.
> > 
> > what exactly was the UAF on?
> 
> UAF on the vm object. From the bug report it's when servicing some GPU
> fault, so inside handle_pagefault(). At this stage it just has some asid
> which is meant to map to some vm AFAICT. The lookup dance relies on calling
> xe_vm_get() after getting back the vm pointer from the xarray. Currently the
> asid is only erased from the xarray in vm_destroy_work_func() which is long
> after the refcount reaches zero and we are about to free the memory for the
> vm.
> 
> However xe_vm_get() is only meant to be called if you are already holding a
> ref, so if the vm refcount is already zero it just throws an error and
> continues on, and the caller has no idea. If that happens then as soon as we
> drop the usm lock the memory can be freed and it's game over. This looks to
> be what happens with the vm refcount reaching zero, and the
> handle_pagefault() still being able to reach the soon-to-be-freed vm via the
> xarray. With this patch we now erase from the xarray before we drop what is
> potentially the final ref. That way if you can reach the vm via the xarray
> you should always be able get a valid ref.
> 

This explaination makes sense to me. Thanks for the fix.

Reviewed-by: Matthew.brost@intel.com>

> > 
> > Lucas De Marchi
> > 
> > > 
> > > Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
> > > Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1594
> > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > Cc: Matthew Brost <matthew.brost@intel.com>
> > > Cc: <stable@vger.kernel.org> # v6.8+
> > > ---
> > > drivers/gpu/drm/xe/xe_vm.c | 21 +++++++++++----------
> > > 1 file changed, 11 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index a196dbe65252..c5c26b3d1b76 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -1581,6 +1581,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
> > >         xe->usm.num_vm_in_fault_mode--;
> > >     else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
> > >         xe->usm.num_vm_in_non_fault_mode--;
> > > +
> > > +    if (vm->usm.asid) {
> > > +        void *lookup;
> > > +
> > > +        xe_assert(xe, xe->info.has_asid);
> > > +        xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
> > > +
> > > +        lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
> > > +        xe_assert(xe, lookup == vm);
> > > +    }
> > >     mutex_unlock(&xe->usm.lock);
> > > 
> > >     for_each_tile(tile, xe, id)
> > > @@ -1596,24 +1606,15 @@ static void vm_destroy_work_func(struct
> > > work_struct *w)
> > >     struct xe_device *xe = vm->xe;
> > >     struct xe_tile *tile;
> > >     u8 id;
> > > -    void *lookup;
> > > 
> > >     /* xe_vm_close_and_put was not called? */
> > >     xe_assert(xe, !vm->size);
> > > 
> > >     mutex_destroy(&vm->snap_mutex);
> > > 
> > > -    if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
> > > +    if (!(vm->flags & XE_VM_FLAG_MIGRATION))
> > >         xe_device_mem_access_put(xe);
> > > 
> > > -        if (xe->info.has_asid && vm->usm.asid) {
> > > -            mutex_lock(&xe->usm.lock);
> > > -            lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
> > > -            xe_assert(xe, lookup == vm);
> > > -            mutex_unlock(&xe->usm.lock);
> > > -        }
> > > -    }
> > > -
> > >     for_each_tile(tile, xe, id)
> > >         XE_WARN_ON(vm->pt_root[id]);
> > > 
> > > -- 
> > > 2.44.0
> > > 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-04-12 22:27 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-12 11:31 [PATCH 1/3] drm/xe/vm: prevent UAF with asid based lookup Matthew Auld
2024-04-12 14:06 ` Lucas De Marchi
2024-04-12 14:42   ` Matthew Auld
2024-04-12 22:26     ` Matthew Brost

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox