Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Yadav, Arvind" <arvind.yadav@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	Matthew Auld <matthew.auld@intel.com>
Cc: intel-xe@lists.freedesktop.org,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"José Roberto de Souza" <jose.souza@intel.com>,
	"Michal Mrozek" <michal.mrozek@intel.com>,
	"Carl Zhang" <carl.zhang@intel.com>,
	stable@vger.kernel.org
Subject: Re: [PATCH v3 1/2] drm/xe/uapi: disallow bind queue sharing
Date: Thu, 20 Nov 2025 21:17:44 +0530	[thread overview]
Message-ID: <c26e2aa2-2073-4e89-b30b-637ed3357d83@intel.com> (raw)
In-Reply-To: <aR8uXCNK/jJx0Rsc@lstrano-desk.jf.intel.com>

[-- Attachment #1: Type: text/plain, Size: 7717 bytes --]


On 20-11-2025 20:36, Matthew Brost wrote:
> On Thu, Nov 20, 2025 at 01:27:29PM +0000, Matthew Auld wrote:
>> Currently this is very broken if someone attempts to create a bind
>> queue and share it across multiple VMs. For example currently we assume
>> it is safe to acquire the user VM lock to protect some of the bind queue
>> state, but if allow sharing the bind queue with multiple VMs then this
>> quickly breaks down.
>>
>> To fix this reject using a bind queue with any VM that is not the same
>> VM that was originally passed when creating the bind queue. This a uAPI
>> change, however this was more of an oversight on kernel side that we
>> didn't reject this, and expectation is that userspace shouldn't be using
>> bind queues in this way, so in theory this change should go unnoticed.
>>
>> Based on a patch from Matt Brost.
>>
>> v2 (Matt B):
>>    - Hold the vm lock over queue create, to ensure it can't be closed as
>>      we attach the user_vm to the queue.
>>    - Make sure we actually check for NULL user_vm in destruction path.
>> v3:
>>    - Fix error path handling.
>>
>> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
>> Reported-by: Thomas Hellström<thomas.hellstrom@linux.intel.com>
>> Signed-off-by: Matthew Auld<matthew.auld@intel.com>
>> Cc: José Roberto de Souza<jose.souza@intel.com>
>> Cc: Matthew Brost<matthew.brost@intel.com>
> Reviewed-by: Matthew Brost<matthew.brost@intel.com>
Looks good to me.
Reviewed-by: Arvind Yadav <arvind.yadav@intel.com> 
<mailto:arvind.yadav@intel.com>
>> Cc: Michal Mrozek<michal.mrozek@intel.com>
>> Cc: Carl Zhang<carl.zhang@intel.com>
>> Cc:<stable@vger.kernel.org> # v6.8+
>> Acked-by: José Roberto de Souza<jose.souza@intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_exec_queue.c       | 32 +++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_exec_queue.h       |  1 +
>>   drivers/gpu/drm/xe/xe_exec_queue_types.h |  6 +++++
>>   drivers/gpu/drm/xe/xe_sriov_vf_ccs.c     |  2 +-
>>   drivers/gpu/drm/xe/xe_vm.c               |  7 +++++-
>>   5 files changed, 45 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>> index 8724f8de67e2..779d7e7e2d2e 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>> @@ -328,6 +328,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
>>    * @xe: Xe device.
>>    * @tile: tile which bind exec queue belongs to.
>>    * @flags: exec queue creation flags
>> + * @user_vm: The user VM which this exec queue belongs to
>>    * @extensions: exec queue creation extensions
>>    *
>>    * Normalize bind exec queue creation. Bind exec queue is tied to migration VM
>> @@ -341,6 +342,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
>>    */
>>   struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
>>   						struct xe_tile *tile,
>> +						struct xe_vm *user_vm,
>>   						u32 flags, u64 extensions)
>>   {
>>   	struct xe_gt *gt = tile->primary_gt;
>> @@ -377,6 +379,9 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
>>   			xe_exec_queue_put(q);
>>   			return ERR_PTR(err);
>>   		}
>> +
>> +		if (user_vm)
>> +			q->user_vm = xe_vm_get(user_vm);
>>   	}
>>   
>>   	return q;
>> @@ -407,6 +412,11 @@ void xe_exec_queue_destroy(struct kref *ref)
>>   			xe_exec_queue_put(eq);
>>   	}
>>   
>> +	if (q->user_vm) {
>> +		xe_vm_put(q->user_vm);
>> +		q->user_vm = NULL;
>> +	}
>> +
>>   	q->ops->destroy(q);
>>   }
>>   
>> @@ -742,6 +752,22 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>>   		    XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
>>   			return -EINVAL;
>>   
>> +		vm = xe_vm_lookup(xef, args->vm_id);
>> +		if (XE_IOCTL_DBG(xe, !vm))
>> +			return -ENOENT;
>> +
>> +		err = down_read_interruptible(&vm->lock);
>> +		if (err) {
>> +			xe_vm_put(vm);
>> +			return err;
>> +		}
>> +
>> +		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
>> +			up_read(&vm->lock);
>> +			xe_vm_put(vm);
>> +			return -ENOENT;
>> +		}
>> +
>>   		for_each_tile(tile, xe, id) {
>>   			struct xe_exec_queue *new;
>>   
>> @@ -749,9 +775,11 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>>   			if (id)
>>   				flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
>>   
>> -			new = xe_exec_queue_create_bind(xe, tile, flags,
>> +			new = xe_exec_queue_create_bind(xe, tile, vm, flags,
>>   							args->extensions);
>>   			if (IS_ERR(new)) {
>> +				up_read(&vm->lock);
>> +				xe_vm_put(vm);
>>   				err = PTR_ERR(new);
>>   				if (q)
>>   					goto put_exec_queue;
>> @@ -763,6 +791,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>>   				list_add_tail(&new->multi_gt_list,
>>   					      &q->multi_gt_link);
>>   		}
>> +		up_read(&vm->lock);
>> +		xe_vm_put(vm);
>>   	} else {
>>   		logical_mask = calc_validate_logical_mask(xe, eci,
>>   							  args->width,
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
>> index fda4d4f9bda8..37a9da22f420 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.h
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
>> @@ -28,6 +28,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
>>   						 u32 flags, u64 extensions);
>>   struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
>>   						struct xe_tile *tile,
>> +						struct xe_vm *user_vm,
>>   						u32 flags, u64 extensions);
>>   
>>   void xe_exec_queue_fini(struct xe_exec_queue *q);
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
>> index 771ffe35cd0c..3a4263c92b3d 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
>> @@ -54,6 +54,12 @@ struct xe_exec_queue {
>>   	struct kref refcount;
>>   	/** @vm: VM (address space) for this exec queue */
>>   	struct xe_vm *vm;
>> +	/**
>> +	 * @user_vm: User VM (address space) for this exec queue (bind queues
>> +	 * only)
>> +	 */
>> +	struct xe_vm *user_vm;
>> +
>>   	/** @class: class of this exec queue */
>>   	enum xe_engine_class class;
>>   	/**
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> index 052a5071e69f..db023fb66a27 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> @@ -350,7 +350,7 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>>   		flags = EXEC_QUEUE_FLAG_KERNEL |
>>   			EXEC_QUEUE_FLAG_PERMANENT |
>>   			EXEC_QUEUE_FLAG_MIGRATE;
>> -		q = xe_exec_queue_create_bind(xe, tile, flags, 0);
>> +		q = xe_exec_queue_create_bind(xe, tile, NULL, flags, 0);
>>   		if (IS_ERR(q)) {
>>   			err = PTR_ERR(q);
>>   			goto err_ret;
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index f9989a7a710c..7973d654540a 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1614,7 +1614,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
>>   			if (!vm->pt_root[id])
>>   				continue;
>>   
>> -			q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
>> +			q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0);
>>   			if (IS_ERR(q)) {
>>   				err = PTR_ERR(q);
>>   				goto err_close;
>> @@ -3571,6 +3571,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>>   		}
>>   	}
>>   
>> +	if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
>> +		err = -EINVAL;
>> +		goto put_exec_queue;
>> +	}
>> +
>>   	/* Ensure all UNMAPs visible */
>>   	xe_svm_flush(vm);
>>   
>> -- 
>> 2.51.1
>>

[-- Attachment #2: Type: text/html, Size: 44230 bytes --]

  reply	other threads:[~2025-11-20 15:48 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-20 13:27 [PATCH v3 0/2] Some bind queue fixes Matthew Auld
2025-11-20 13:27 ` [PATCH v3 1/2] drm/xe/uapi: disallow bind queue sharing Matthew Auld
2025-11-20 15:06   ` Matthew Brost
2025-11-20 15:47     ` Yadav, Arvind [this message]
2025-11-20 15:34   ` Lucas De Marchi
2025-11-24 13:41     ` Matthew Auld
2025-12-18 20:40       ` Matthew Brost
2025-12-19  6:36         ` Zhang, Carl
2026-01-19 12:37           ` Matthew Auld
2026-01-19 14:14             ` Mrozek, Michal
2026-01-19 15:09               ` Matthew Auld
2025-11-20 13:27 ` [PATCH v3 2/2] drm/xe/migrate: fix job lock assert Matthew Auld
2025-11-20 15:30   ` Yadav, Arvind
2025-11-20 13:52 ` ✗ CI.checkpatch: warning for Some bind queue fixes (rev3) Patchwork
2025-11-20 13:53 ` ✓ CI.KUnit: success " Patchwork
2025-11-20 14:44 ` ✓ Xe.CI.BAT: " Patchwork
2025-11-20 17:38 ` ✗ Xe.CI.Full: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c26e2aa2-2073-4e89-b30b-637ed3357d83@intel.com \
    --to=arvind.yadav@intel.com \
    --cc=carl.zhang@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=jose.souza@intel.com \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=michal.mrozek@intel.com \
    --cc=stable@vger.kernel.org \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox