Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
To: "Mishra, Pallavi" <pallavi.mishra@intel.com>,
	"intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>
Cc: "Vishwanathapura,
	Niranjana" <niranjana.vishwanathapura@intel.com>,
	"Summers, Stuart" <stuart.summers@intel.com>
Subject: Re: [PATCH v2] drm/xe/tests: Fix g2g_test_array indexing
Date: Wed, 28 Jan 2026 16:27:27 -0800	[thread overview]
Message-ID: <855a551b-3275-4697-a816-2ca8ab86a284@intel.com> (raw)
In-Reply-To: <SJ5PPFC0624F2CABC96211C6507CF372CD68294A@SJ5PPFC0624F2CA.namprd11.prod.outlook.com>



On 1/22/2026 6:41 PM, Mishra, Pallavi wrote:
>
>> -----Original Message-----
>> From: Ceraolo Spurio, Daniele <daniele.ceraolospurio@intel.com>
>> Sent: Thursday, January 22, 2026 3:01 PM
>> To: Mishra, Pallavi <pallavi.mishra@intel.com>; intel-xe@lists.freedesktop.org
>> Cc: Vishwanathapura, Niranjana <niranjana.vishwanathapura@intel.com>;
>> Summers, Stuart <stuart.summers@intel.com>
>> Subject: Re: [PATCH v2] drm/xe/tests: Fix g2g_test_array indexing
>>
>>
>>
>> On 1/22/2026 11:33 AM, Mishra, Pallavi wrote:
>>>> -----Original Message-----
>>>> From: Ceraolo Spurio, Daniele <daniele.ceraolospurio@intel.com>
>>>> Sent: Wednesday, January 21, 2026 4:20 PM
>>>> To: Mishra, Pallavi <pallavi.mishra@intel.com>;
>>>> intel-xe@lists.freedesktop.org
>>>> Cc: Vishwanathapura, Niranjana <niranjana.vishwanathapura@intel.com>;
>>>> Summers, Stuart <stuart.summers@intel.com>
>>>> Subject: Re: [PATCH v2] drm/xe/tests: Fix g2g_test_array indexing
>>>>
>>>>
>>>>
>>>> On 1/15/2026 4:25 PM, Pallavi Mishra wrote:
>>>>> The G2G KUnit test allocates a compact N×N matrix sized by gt_count
>>>>> and verifies entries using dense indices: idx = (j * gt_count) + i
>>>>>
>>>>> The producer path currently computes idx using
>>>>> gt->info.id. However, gt->info.id values
>>>>> are not guaranteed to be contiguous.
>>>>> For example, with gt_count=2 and IDs {0,3}, this formula produces
>>>>> indices beyond the allocated range, causing mismatches and potential
>>>>> out-of-bounds access.
>>>>>
>>>>> Update the producer to map each GT to a dense index in
>>>>> [0..gt_count-1] and compute:
>>>>>        idx = (tx_dense * gt_count) + rx_dense
>>>>>
>>>>> Additionally, introduce an event-based delay in g2g_test_in_order()
>>>>> to ensure ordering between sends.
>>>>>
>>>>> v2: Add single helper function (Daniele)
>>>>>
>>>>> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
>>>>> ---
>>>>>     drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c | 56
>>>> +++++++++++++++++++++-
>>>>>     1 file changed, 54 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>>>>> b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>>>>> index 3b213fcae916..ec18086b98d8 100644
>>>>> --- a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>>>>> +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>>>>> @@ -48,6 +48,38 @@ struct g2g_test_payload  {
>>>>>     	u32 seqno;
>>>>>     };
>>>>>
>>>>> +static int slot_index_from_gts(struct xe_gt *tx_gt, struct xe_gt
>>>>> +*rx_gt) {
>>>>> +	struct xe_device *xe = gt_to_xe(tx_gt);
>>>>> +	int idx = 0, found = 0, id, tx_idx, rx_idx;
>>>>> +	struct xe_gt *gt;
>>>>> +	struct kunit *test = kunit_get_current_test();
>>>>> +
>>>>> +	for (id = 0; id < xe->info.tile_count * xe->info.max_gt_per_tile; id++) {
>>>>> +		gt = xe_device_get_gt(xe, id);
>>>>> +		if (!gt)
>>>>> +			continue;
>>>>> +		if (gt == tx_gt) {
>>>>> +			tx_idx = idx;
>>>>> +			found++;
>>>>> +		}
>>>>> +		if (gt == rx_gt) {
>>>>> +			rx_idx = idx;
>>>>> +			found++;
>>>>> +		}
>>>>> +
>>>>> +		if (found == 2)
>>>>> +			break;
>>>>> +
>>>>> +		idx++;
>>>>> +	}
>>>>> +
>>>>> +	if (found != 2)
>>>>> +		KUNIT_FAIL(test, "GT index not found");
>>>>> +
>>>>> +	return (tx_idx * xe->info.gt_count) + rx_idx; }
>>>>> +
>>>>>     static void g2g_test_send(struct kunit *test, struct xe_guc *guc,
>>>>>     			  u32 far_tile, u32 far_dev,
>>>>>     			  struct g2g_test_payload *payload) @@ -163,7
>>>> +195,7 @@ int
>>>>> xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
>>>>>     		goto done;
>>>>>     	}
>>>>>
>>>>> -	idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id;
>>>>> +	idx = slot_index_from_gts(tx_gt, rx_gt);
>>>>>
>>>>>     	if (xe->g2g_test_array[idx] != payload->seqno - 1) {
>>>>>     		xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -
>>>>>
>>>>> %d:%d!\n", @@ -180,13 +212,17 @@ int
>>>> xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
>>>>>     	return ret;
>>>>>     }
>>>>>
>>>>> +#define G2G_WAIT_TIMEOUT_MS 100
>>>>> +#define G2G_WAIT_POLL_MS 1
>>>>> +
>>>>>     /*
>>>>>      * Send the given seqno from all GuCs to all other GuCs in tile/GT order
>>>>>      */
>>>>>     static void g2g_test_in_order(struct kunit *test, struct
>>>>> xe_device *xe, u32
>>>> seqno)
>>>>>     {
>>>>>     	struct xe_gt *near_gt, *far_gt;
>>>>> -	int i, j;
>>>>> +	int i, j, waited;
>>>>> +	u32 idx;
>>>>>
>>>>>     	for_each_gt(near_gt, xe, i) {
>>>>>     		u32 near_tile = gt_to_tile(near_gt)->id; @@ -205,6 +241,22
>>>> @@
>>>>> static void g2g_test_in_order(struct kunit *test, struct xe_device
>>>>> *xe, u32
>>>> seqn
>>>>>     			payload.rx_dev = far_dev;
>>>>>     			payload.rx_tile = far_tile;
>>>>>     			payload.seqno = seqno;
>>>>> +
>>>>> +			/* Calculate idx for event-based wait */
>>>>> +			idx = slot_index_from_gts(near_gt, far_gt);
>>>>> +			waited = 0;
>>>>> +
>>>>> +			/* Wait for previous seqno to be acknowledged
>>>> before sending */
>>>>> +			while (xe->g2g_test_array[idx] != (seqno - 1)) {
>>>>> +				msleep(G2G_WAIT_POLL_MS);
>>>>> +				waited += G2G_WAIT_POLL_MS;
>>>>> +				if (waited >= G2G_WAIT_TIMEOUT_MS) {
>>>>> +					kunit_info(test, "Timeout waiting! tx
>>>> gt: %d, rx gt: %d\n",
>>>>> +						   near_gt->info.id, far_gt-
>>>>> info.id);
>>>> Sorry I missed this on the previous rev, but shouldn't this be a
>>>> failure? If we don't get one of the previous messages then something has
>> gone wrong.
>>>> The patch looks good to me apart from this.
>>>>
>>>> Daniele
>>> I think marking a failure at this point may be too early. Seqno check
>>> validation happens later on in g2g_run_test() and also in
>>> xe_guc_g2g_test_notification(). If any notification is missed or out of order,
>> the test will fail at that stage.
>>
>> Then why wait for it at all? either waiting for it is significant (and therefore we
>> fail the test if the wait times out) or it doesn't matter because we have other
>> checks later (in which case we can just avoid waiting).
>>
>> Daniele
> To provide some background here, the while loop and wait are primarily added to pace the test and avoid sending back-to-back messages, which previously caused the test to always time out waiting for notifications. Introducing a delay/sleep before the g2g_test_send() resolved this. During internal review it was suggested that a fixed delay here may not work across platforms, so I modified the same to an event based delay. So the wait here isn't meant to validate correctness at this stage, but simply pace the test. The timeout message here might be confusing, so I can remove it if preferred. Please let me know your thoughts.

Ok, then please expand the comment to reflect this. Maybe something like 
"Wait for previous seqno to be acknowledged before sending, to avoid 
queuing too many messages back-to-back and causing a test timeout. 
Actual message correctness will be checked in 
xe_guc_g2g_test_notification()".

Daniele

>
>>> Pallavi
>>>>> +					break;
>>>>> +				}
>>>>> +			}
>>>>> +
>>>>>     			g2g_test_send(test, &near_gt->uc.guc, far_tile,
>>>> far_dev, &payload);
>>>>>     		}
>>>>>     	}


  reply	other threads:[~2026-01-29  0:27 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-16  0:25 [PATCH v2] drm/xe/tests: Fix g2g_test_array indexing Pallavi Mishra
2026-01-16  0:32 ` ✓ CI.KUnit: success for drm/xe/tests: Fix g2g_test_array indexing (rev2) Patchwork
2026-01-16  1:13 ` ✓ Xe.CI.BAT: " Patchwork
2026-01-16  5:39 ` ✗ Xe.CI.Full: failure " Patchwork
2026-01-22  0:20 ` [PATCH v2] drm/xe/tests: Fix g2g_test_array indexing Daniele Ceraolo Spurio
2026-01-22 19:33   ` Mishra, Pallavi
2026-01-22 23:01     ` Daniele Ceraolo Spurio
2026-01-23  2:41       ` Mishra, Pallavi
2026-01-29  0:27         ` Daniele Ceraolo Spurio [this message]
2026-01-29  0:40           ` Mishra, Pallavi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=855a551b-3275-4697-a816-2ca8ab86a284@intel.com \
    --to=daniele.ceraolospurio@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=niranjana.vishwanathapura@intel.com \
    --cc=pallavi.mishra@intel.com \
    --cc=stuart.summers@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox