Re: [PATCH i-g-t v7 04/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU atomic operations

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Sharma, Nishit" <nishit.sharma@intel.com>
To: "Hellstrom, Thomas" <thomas.hellstrom@intel.com>,
	"igt-dev@lists.freedesktop.org" <igt-dev@lists.freedesktop.org>
Subject: Re: [PATCH i-g-t v7 04/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU atomic operations
Date: Mon, 17 Nov 2025 21:20:59 +0530	[thread overview]
Message-ID: <4e88d41e-e65e-4fa8-b910-30de1f8a3ce2@intel.com> (raw)
In-Reply-To: <556b427ab266daec61e90f9f6db863dc5d19a279.camel@intel.com>


On 11/17/2025 6:40 PM, Hellstrom, Thomas wrote:
> On Thu, 2025-11-13 at 16:33 +0000, nishit.sharma@intel.com wrote:
>> From: Nishit Sharma <nishit.sharma@intel.com>
>>
>> This test performs atomic increment operation on a shared SVM buffer
>> from both GPUs and the CPU in a multi-GPU environment. It uses
>> madvise
>> and prefetch to control buffer placement and verifies correctness and
>> ordering of atomic updates across agents.
>>
>> Signed-off-by: Nishit Sharma <nishit.sharma@intel.com>
>> ---
>>   tests/intel/xe_multi_gpusvm.c | 157
>> +++++++++++++++++++++++++++++++++-
>>   1 file changed, 156 insertions(+), 1 deletion(-)
>>
>> diff --git a/tests/intel/xe_multi_gpusvm.c
>> b/tests/intel/xe_multi_gpusvm.c
>> index 6614ea3d1..54e036724 100644
>> --- a/tests/intel/xe_multi_gpusvm.c
>> +++ b/tests/intel/xe_multi_gpusvm.c
>> @@ -31,6 +31,11 @@
>>    *      region both remotely and locally and copies to it. Reads
>> back to
>>    *      system memory and checks the result.
>>    *
>> + * SUBTEST: atomic-inc-gpu-op
>> + * Description:
>> + * 	This test does atomic operation in multi-gpu by executing
>> atomic
>> + *	operation on GPU1 and then atomic operation on GPU2 using
>> same
>> + *	adress
>>    */
>>   
>>   #define MAX_XE_REGIONS	8
>> @@ -40,6 +45,7 @@
>>   #define BIND_SYNC_VAL 0x686868
>>   #define EXEC_SYNC_VAL 0x676767
>>   #define COPY_SIZE SZ_64M
>> +#define	ATOMIC_OP_VAL	56
>>   
>>   struct xe_svm_gpu_info {
>>   	bool supports_faults;
>> @@ -49,6 +55,16 @@ struct xe_svm_gpu_info {
>>   	int fd;
>>   };
>>   
>> +struct test_exec_data {
>> +	uint32_t batch[32];
>> +	uint64_t pad;
>> +	uint64_t vm_sync;
>> +	uint64_t exec_sync;
>> +	uint32_t data;
>> +	uint32_t expected_data;
>> +	uint64_t batch_addr;
>> +};
>> +
>>   struct multigpu_ops_args {
>>   	bool prefetch_req;
>>   	bool op_mod;
>> @@ -72,7 +88,10 @@ static void gpu_mem_access_wrapper(struct
>> xe_svm_gpu_info *src,
>>   				   struct
>> drm_xe_engine_class_instance *eci,
>>   				   void *extra_args);
>>   
>> -static void open_pagemaps(int fd, struct xe_svm_gpu_info *info);
>> +static void gpu_atomic_inc_wrapper(struct xe_svm_gpu_info *src,
>> +				   struct xe_svm_gpu_info *dst,
>> +				   struct
>> drm_xe_engine_class_instance *eci,
>> +				   void *extra_args);
>>   
>>   static void
>>   create_vm_and_queue(struct xe_svm_gpu_info *gpu, struct
>> drm_xe_engine_class_instance *eci,
>> @@ -166,6 +185,35 @@ static void for_each_gpu_pair(int num_gpus,
>> struct xe_svm_gpu_info *gpus,
>>   	}
>>   }
>>   
>> +static void open_pagemaps(int fd, struct xe_svm_gpu_info *info);
>> +
>> +static void
>> +atomic_batch_init(int fd, uint32_t vm, uint64_t src_addr,
>> +		  uint32_t *bo, uint64_t *addr)
>> +{
>> +	uint32_t batch_bo_size = BATCH_SIZE(fd);
>> +	uint32_t batch_bo;
>> +	uint64_t batch_addr;
>> +	void *batch;
>> +	uint32_t *cmd;
>> +	int i = 0;
>> +
>> +	batch_bo = xe_bo_create(fd, vm, batch_bo_size,
>> vram_if_possible(fd, 0), 0);
>> +	batch = xe_bo_map(fd, batch_bo, batch_bo_size);
>> +	cmd = (uint32_t *)batch;
>> +
>> +	cmd[i++] = MI_ATOMIC | MI_ATOMIC_INC;
>> +	cmd[i++] = src_addr;
>> +	cmd[i++] = src_addr >> 32;
>> +	cmd[i++] = MI_BATCH_BUFFER_END;
>> +
>> +	batch_addr = to_user_pointer(batch);
>> +	/* Punch a gap in the SVM map where we map the batch_bo */
>> +	xe_vm_bind_lr_sync(fd, vm, batch_bo, 0, batch_addr,
>> batch_bo_size, 0);
>> +	*bo = batch_bo;
>> +	*addr = batch_addr;
>> +}
>> +
>>   static void batch_init(int fd, uint32_t vm, uint64_t src_addr,
>>   		       uint64_t dst_addr, uint64_t copy_size,
>>   		       uint32_t *bo, uint64_t *addr)
>> @@ -325,6 +373,105 @@ gpu_mem_access_wrapper(struct xe_svm_gpu_info
>> *src,
>>   	copy_src_dst(src, dst, eci, args->prefetch_req);
>>   }
>>   
>> +static void
>> +atomic_inc_op(struct xe_svm_gpu_info *gpu0,
>> +	      struct xe_svm_gpu_info *gpu1,
>> +	      struct drm_xe_engine_class_instance *eci,
>> +	      bool prefetch_req)
>> +{
>> +	uint64_t addr;
>> +	uint32_t vm[2];
>> +	uint32_t exec_queue[2];
>> +	uint32_t batch_bo;
>> +	struct test_exec_data *data;
>> +	uint64_t batch_addr;
>> +	struct drm_xe_sync sync = {};
>> +	volatile uint64_t *sync_addr;
>> +	volatile uint32_t *shared_val;
>> +
>> +	create_vm_and_queue(gpu0, eci, &vm[0], &exec_queue[0]);
>> +	create_vm_and_queue(gpu1, eci, &vm[1], &exec_queue[1]);
>> +
>> +	data = aligned_alloc(SZ_2M, SZ_4K);
>> +	igt_assert(data);
>> +	data[0].vm_sync = 0;
>> +	addr = to_user_pointer(data);
>> +
>> +	shared_val = (volatile uint32_t *)addr;
>> +	*shared_val = ATOMIC_OP_VAL - 1;
>> +
>> +	atomic_batch_init(gpu0->fd, vm[0], addr, &batch_bo,
>> &batch_addr);
>> +
>> +	/* Place destination in an optionally remote location to
>> test */
>> +	xe_multigpu_madvise(gpu0->fd, vm[0], addr, SZ_4K, 0,
>> +			    DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC,
>> +			    gpu0->fd, 0, gpu0->vram_regions[0],
>> exec_queue[0],
>> +			    0, 0);
>> +
>> +	setup_sync(&sync, &sync_addr, BIND_SYNC_VAL);
>> +	xe_multigpu_prefetch(gpu0->fd, vm[0], addr, SZ_4K, &sync,
>> +			     sync_addr, exec_queue[0],
>> prefetch_req);
>> +
>> +	sync_addr = (void *)((char *)batch_addr + SZ_4K);
>> +	sync.addr = to_user_pointer((uint64_t *)sync_addr);
>> +	sync.timeline_value = EXEC_SYNC_VAL;
>> +	*sync_addr = 0;
>> +
>> +	/* Executing ATOMIC_INC on GPU0. */
>> +	xe_exec_sync(gpu0->fd, exec_queue[0], batch_addr, &sync, 1);
>> +	if (*sync_addr != EXEC_SYNC_VAL)
>> +		xe_wait_ufence(gpu0->fd, (uint64_t *)sync_addr,
>> EXEC_SYNC_VAL, exec_queue[0],
>> +			       NSEC_PER_SEC * 10);
>> +
>> +	igt_assert_eq(*shared_val, ATOMIC_OP_VAL);
>> +
>> +	atomic_batch_init(gpu1->fd, vm[1], addr, &batch_bo,
>> &batch_addr);
>> +
>> +	/* Place destination in an optionally remote location to
>> test */
> We're actually never using a remote location here? It's always advised
> to local.
will edit the explanation.
>
>> +	xe_multigpu_madvise(gpu1->fd, vm[1], addr, SZ_4K, 0,
>> +			    DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC,
>> +			    gpu1->fd, 0, gpu1->vram_regions[0],
>> exec_queue[0],
>> +			    0, 0);
>
>
>> +
>> +	setup_sync(&sync, &sync_addr, BIND_SYNC_VAL);
>> +	xe_multigpu_prefetch(gpu1->fd, vm[1], addr, SZ_4K, &sync,
>> +			     sync_addr, exec_queue[1],
>> prefetch_req);
>> +
>> +	sync_addr = (void *)((char *)batch_addr + SZ_4K);
>> +	sync.addr = to_user_pointer((uint64_t *)sync_addr);
>> +	sync.timeline_value = EXEC_SYNC_VAL;
>> +	*sync_addr = 0;
>> +
>> +	/* Execute ATOMIC_INC on GPU1 */
>> +	xe_exec_sync(gpu1->fd, exec_queue[1], batch_addr, &sync, 1);
> If gpu1 here doesn't support faults, we shouldn't execute this.
So this condition is applicable for all tests. if fault not supported 
xe_exec_sync(gpxx->fd,.....) shouldn't be called?
>
>
>> +	if (*sync_addr != EXEC_SYNC_VAL)
>> +		xe_wait_ufence(gpu1->fd, (uint64_t *)sync_addr,
>> EXEC_SYNC_VAL, exec_queue[1],
>> +			       NSEC_PER_SEC * 10);
>> +
>> +	igt_assert_eq(*shared_val, ATOMIC_OP_VAL + 1);
>> +
>> +	munmap((void *)batch_addr, BATCH_SIZE(gpu0->fd));
>> +	batch_fini(gpu0->fd, vm[0], batch_bo, batch_addr);
>> +	batch_fini(gpu1->fd, vm[1], batch_bo, batch_addr);
>> +	free(data);
>> +
>> +	cleanup_vm_and_queue(gpu0, vm[0], exec_queue[0]);
>> +	cleanup_vm_and_queue(gpu1, vm[1], exec_queue[1]);
>> +}
>> +
>> +static void
>> +gpu_atomic_inc_wrapper(struct xe_svm_gpu_info *src,
>> +		       struct xe_svm_gpu_info *dst,
>> +		       struct drm_xe_engine_class_instance *eci,
>> +		       void *extra_args)
>> +{
>> +	struct multigpu_ops_args *args = (struct multigpu_ops_args
>> *)extra_args;
>> +	igt_assert(src);
>> +	igt_assert(dst);
>> +
>> +	atomic_inc_op(src, dst, eci, args->prefetch_req);
>> +}
>> +
>>   igt_main
>>   {
>>   	struct xe_svm_gpu_info gpus[MAX_XE_GPUS];
>> @@ -364,6 +511,14 @@ igt_main
>>   		for_each_gpu_pair(gpu_cnt, gpus, &eci,
>> gpu_mem_access_wrapper, &op_args);
>>   	}
>>   
>> +	igt_subtest("atomic-inc-gpu-op") {
>> +		struct multigpu_ops_args atomic_args;
>> +		atomic_args.prefetch_req = 1;
>> +		for_each_gpu_pair(gpu_cnt, gpus, &eci,
>> gpu_atomic_inc_wrapper, &atomic_args);
>> +		atomic_args.prefetch_req = 0;
>> +		for_each_gpu_pair(gpu_cnt, gpus, &eci,
>> gpu_atomic_inc_wrapper, &atomic_args);
> Same comment here as for the first test.
>
> /Thomas
>
>
>
>> +	}
>> +
>>   	igt_fixture {
>>   		int cnt;
>>

next prev parent reply	other threads:[~2025-11-17 15:52 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-13 16:32 [PATCH i-g-t v7 00/10] Madvise feature in SVM for Multi-GPU configs nishit.sharma
2025-11-13 16:33 ` [PATCH i-g-t v7 01/10] lib/xe: Add instance parameter to xe_vm_madvise and introduce lr_sync helpers nishit.sharma
2025-11-17 12:34   ` Hellstrom, Thomas
2025-11-17 15:43     ` Sharma, Nishit
2025-11-18  9:23       ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 02/10] tests/intel/xe_exec_system_allocator: Add parameter in madvise call nishit.sharma
2025-11-17 12:38   ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 03/10] tests/intel/xe_multi_gpusvm: Add SVM multi-GPU cross-GPU memory access test nishit.sharma
2025-11-17 13:00   ` Hellstrom, Thomas
2025-11-17 15:49     ` Sharma, Nishit
2025-11-17 20:40       ` Hellstrom, Thomas
2025-11-18  9:24       ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 04/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU atomic operations nishit.sharma
2025-11-17 13:10   ` Hellstrom, Thomas
2025-11-17 15:50     ` Sharma, Nishit [this message]
2025-11-18  9:26       ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 05/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU coherency test nishit.sharma
2025-11-17 14:02   ` Hellstrom, Thomas
2025-11-17 16:18     ` Sharma, Nishit
2025-11-27  7:36       ` Gurram, Pravalika
2025-11-13 16:33 ` [PATCH i-g-t v7 06/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU performance test nishit.sharma
2025-11-17 14:39   ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 07/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU fault handling test nishit.sharma
2025-11-17 14:48   ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 08/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU simultaneous access test nishit.sharma
2025-11-17 14:57   ` Hellstrom, Thomas
2025-11-13 16:33 ` [PATCH i-g-t v7 09/10] tests/intel/xe_multi_gpusvm.c: Add SVM multi-GPU conflicting madvise test nishit.sharma
2025-11-17 15:11   ` Hellstrom, Thomas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4e88d41e-e65e-4fa8-b910-30de1f8a3ce2@intel.com \
    --to=nishit.sharma@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=thomas.hellstrom@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.