Re: [PATCH v4 11/12] drm/xe: batch CT pagefault acks with periodic flush

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Maciej Patelczyk <maciej.patelczyk@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	<intel-xe@lists.freedesktop.org>
Cc: <stuart.summers@intel.com>, <arvind.yadav@intel.com>,
	<himal.prasad.ghimiray@intel.com>,
	<thomas.hellstrom@linux.intel.com>, <francois.dugast@intel.com>
Subject: Re: [PATCH v4 11/12] drm/xe: batch CT pagefault acks with periodic flush
Date: Fri, 8 May 2026 11:24:21 +0200	[thread overview]
Message-ID: <8c6eb092-b0c3-4364-b053-6749e4c0c36d@intel.com> (raw)
In-Reply-To: <20260226042834.2963245-12-matthew.brost@intel.com>

On 26/02/2026 05:28, Matthew Brost wrote:

> Pagefault storms can generate long chains of acknowledgments back to the
> GuC. Sending each ack as a full CT submission forces a barrier,
> descriptor update and doorbell per fault.
>
> Extend xe_guc_ct_send_locked() with a “write-only” mode that copies the
> message into the H2G ring but defers publishing the descriptor and
> ringing the doorbell. Add xe_guc_ct_send_flush() to publish pending
> writes and notify GuC once per batch. Wire this into the pagefault
> producer via new ack_fault_begin/ack_fault_end callbacks and CT lock
> wrappers.
>
> To avoid excessive flush latency while still amortizing MMIO costs, use
> a simple periodic flush heuristic for GuC pagefault acks: batch most
> acks as write-only and force a publish at a fixed interval (e.g., every
> 16th ack), with a final flush at end-of-batch.
>
> Also increase the H2G CTB size to 16K to better absorb bursts.
>
> Assistent-by: Chat-GPT # Documentation
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Interesting addition to chaining.
> ---
>   drivers/gpu/drm/xe/xe_guc_ct.c          | 94 +++++++++++++++++++------
>   drivers/gpu/drm/xe/xe_guc_ct.h          | 35 ++++++++-
>   drivers/gpu/drm/xe/xe_guc_pagefault.c   | 28 +++++++-
>   drivers/gpu/drm/xe/xe_guc_types.h       |  6 ++
>   drivers/gpu/drm/xe/xe_pagefault.c       | 12 +++-
>   drivers/gpu/drm/xe/xe_pagefault_types.h | 14 ++++
>   6 files changed, 164 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 3a262d3af8cf..5a126e19c53e 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -255,7 +255,7 @@ static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
>   
>   #define CTB_DESC_SIZE		ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
>   #define CTB_H2G_BUFFER_OFFSET	(CTB_DESC_SIZE * 2)
> -#define CTB_H2G_BUFFER_SIZE	(SZ_4K)
> +#define CTB_H2G_BUFFER_SIZE	(SZ_16K)
>   #define CTB_H2G_BUFFER_DWORDS	(CTB_H2G_BUFFER_SIZE / sizeof(u32))
>   #define CTB_G2H_BUFFER_SIZE	(SZ_128K)
>   #define CTB_G2H_BUFFER_DWORDS	(CTB_G2H_BUFFER_SIZE / sizeof(u32))
> @@ -912,7 +912,7 @@ static bool vf_action_can_safely_fail(struct xe_device *xe, u32 action)
>   #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
>   
>   static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
> -		     u32 ct_fence_value, bool want_response)
> +		     u32 ct_fence_value, bool want_response, bool write_only)
>   {
>   	struct xe_device *xe = ct_to_xe(ct);
>   	struct xe_gt *gt = ct_to_gt(ct);
> @@ -936,15 +936,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   	}
>   
>   	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
> -		u32 desc_tail = desc_read(xe, h2g, tail);
>   		u32 desc_head = desc_read(xe, h2g, head);
>   
> -		if (tail != desc_tail) {
> -			desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_MISMATCH);
> -			xe_gt_err(gt, "CT write: tail was modified %u != %u\n", desc_tail, tail);
> -			goto corrupted;
> -		}
> -
>   		if (tail > h2g->info.size) {
>   			desc_write(xe, h2g, status, desc_status | GUC_CTB_STATUS_OVERFLOW);
>   			xe_gt_err(gt, "CT write: tail out of range: %u vs %u\n",
> @@ -966,7 +959,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   			      (h2g->info.size - tail) * sizeof(u32));
>   		h2g_reserve_space(ct, (h2g->info.size - tail));
>   		h2g->info.tail = 0;
> -		desc_write(xe, h2g, tail, h2g->info.tail);
> +		if (!write_only)
> +			desc_write(xe, h2g, tail, h2g->info.tail);
>   
>   		return -EAGAIN;
>   	}
> @@ -997,14 +991,15 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   	/* Write H2G ensuring visible before descriptor update */
>   	xe_map_memcpy_to(xe, &map, 0, cmd, H2G_CT_HEADERS * sizeof(u32));
>   	xe_map_memcpy_to(xe, &map, H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32));
> -	xe_device_wmb(xe);
> -
>   	/* Update local copies */
>   	h2g->info.tail = (tail + full_len) % h2g->info.size;
>   	h2g_reserve_space(ct, full_len);
>   
>   	/* Update descriptor */
> -	desc_write(xe, h2g, tail, h2g->info.tail);
> +	if (!write_only) {
> +		xe_device_wmb(xe);
> +		desc_write(xe, h2g, tail, h2g->info.tail);
> +	}
>   
>   	trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len,
>   			     desc_read(xe, h2g, head), h2g->info.tail);
> @@ -1018,7 +1013,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   
>   static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
>   				u32 len, u32 g2h_len, u32 num_g2h,
> -				struct g2h_fence *g2h_fence)
> +				struct g2h_fence *g2h_fence, bool write_only)
>   {
>   	struct xe_gt *gt = ct_to_gt(ct);
>   	u16 seqno;
> @@ -1073,7 +1068,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
>   	if (unlikely(ret))
>   		goto out_unlock;
>   
> -	ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
> +	ret = h2g_write(ct, action, len, seqno, !!g2h_fence, write_only);
>   	if (unlikely(ret)) {
>   		if (ret == -EAGAIN)
>   			goto retry;
> @@ -1081,7 +1076,8 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
>   	}
>   
>   	__g2h_reserve_space(ct, g2h_len, num_g2h);
> -	xe_guc_notify(ct_to_guc(ct));
> +	if (!write_only)
> +		xe_guc_notify(ct_to_guc(ct));
>   out_unlock:
>   	if (g2h_len)
>   		spin_unlock_irq(&ct->fast_lock);
> @@ -1157,7 +1153,7 @@ static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len,
>   
>   static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   			      u32 g2h_len, u32 num_g2h,
> -			      struct g2h_fence *g2h_fence)
> +			      struct g2h_fence *g2h_fence, bool write_only)
>   {
>   	struct xe_gt *gt = ct_to_gt(ct);
>   	unsigned int sleep_period_ms = 1;
> @@ -1170,9 +1166,11 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   
>   try_again:
>   	ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
> -				   g2h_fence);
> +				   g2h_fence, write_only);
>   
>   	if (unlikely(ret == -EBUSY)) {
> +		if (write_only)
> +			xe_guc_ct_send_flush(ct);


Just to check if the 'if' is correct here. So flush is for 'write_only' 
only when got -EBUSY.


>   		if (!guc_ct_send_wait_for_retry(ct, len, g2h_len, g2h_fence,
>   						&sleep_period_ms, &sleep_total_ms))
>   			goto broken;
> @@ -1196,7 +1194,8 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   	xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
>   
>   	mutex_lock(&ct->lock);
> -	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
> +	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence,
> +				 false);
>   	mutex_unlock(&ct->lock);
>   
>   	return ret;
> @@ -1214,25 +1213,76 @@ int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   	return ret;
>   }
>   
> +/**
> + * xe_guc_ct_send_locked() - submit a GuC CT H2G message with CT lock held
> + * @ct: GuC CT object
> + * @action: payload dwords (HxG header dword is expected at @action[-1])
> + * @len: number of payload dwords in @action
> + * @write_only: defer publishing/doorbell for batching
> + *
> + * Sends a single H2G message to the GuC CT buffer while the caller already
> + * holds @ct->lock.
> + *
> + * If @write_only is false, the function completes the submission immediately:
> + * it makes the payload visible to the device, updates the H2G descriptor and
> + * rings the GuC doorbell.
> + *
> + * If @write_only is true, the message payload is copied into the H2G ring and
> + * the software tail is advanced, but the descriptor update and doorbell are
> + * deferred so multiple messages can be batched. In this mode, the caller must
> + * eventually call xe_guc_ct_send_flush() (still holding @ct->lock) to publish
> + * the descriptor and notify the GuC. On internal retry paths (-EBUSY), the
> + * implementation may force a flush to ensure forward progress.
> + *
> + * Return: 0 on success, negative errno on failure.
> + *
> + * Locking:
> + *   Must be called with @ct->lock held.
> + */
>   int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
> -			  u32 g2h_len, u32 num_g2h)
> +			  bool write_only)
>   {
>   	int ret;
>   
> -	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, NULL);
> +	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL, write_only);
>   	if (ret == -EDEADLK)
>   		kick_reset(ct);
>   
>   	return ret;
>   }
>   
> +/**
> + * xe_guc_ct_send_flush() - flush pending GuC CT H2G writes
> + * @ct: GuC CT instance
> + *
> + * Some callers batch multiple H2G writes using xe_guc_ct_send_locked() in
> + * "write-only" mode (i.e., queue the message payloads but defer ringing the
> + * doorbell / updating the CT descriptor). This helper completes the submission
> + * by ensuring the payload writes are visible to the device, updating the H2G
> + * descriptor, and ringing the GuC CT doorbell.
> + *
> + * Locking:
> + *   Must be called with @ct->lock held.
> + */
> +void xe_guc_ct_send_flush(struct xe_guc_ct *ct)
> +{
> +	struct xe_device *xe = ct_to_xe(ct);
> +	struct guc_ctb *h2g = &ct->ctbs.h2g;
> +
> +	lockdep_assert_held(&ct->lock);
> +
> +	xe_device_wmb(xe);
> +	desc_write(xe, h2g, tail, h2g->info.tail);
> +	xe_guc_notify(ct_to_guc(ct));
> +}
> +
>   int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action, u32 len)
>   {
>   	int ret;
>   
>   	lockdep_assert_held(&ct->lock);
>   
> -	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL);
> +	ret = guc_ct_send_locked(ct, action, len, 0, 0, NULL, false);
>   	if (ret == -EDEADLK)
>   		kick_reset(ct);
>   
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
> index 767365a33dee..2db4dded6b96 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.h
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.h
> @@ -54,7 +54,7 @@ static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
>   int xe_guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   		   u32 g2h_len, u32 num_g2h);
>   int xe_guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
> -			  u32 g2h_len, u32 num_g2h);
> +			  bool write_only);
>   int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
>   			u32 *response_buffer);
>   static inline int
> @@ -62,6 +62,7 @@ xe_guc_ct_send_block(struct xe_guc_ct *ct, const u32 *action, u32 len)
>   {
>   	return xe_guc_ct_send_recv(ct, action, len, NULL);
>   }
> +void xe_guc_ct_send_flush(struct xe_guc_ct *ct);
>   
>   /* This is only version of the send CT you can call from a G2H handler */
>   int xe_guc_ct_send_g2h_handler(struct xe_guc_ct *ct, const u32 *action,
> @@ -87,4 +88,36 @@ static inline void xe_guc_ct_wake_waiters(struct xe_guc_ct *ct)
>   	wake_up_all(&ct->wq);
>   }
>   
> +/**
> + * xe_guc_ct_lock() - take the GuC CT mutex
> + * @ct: GuC CT object
> + *
> + * Wrapper around mutex_lock(&ct->lock) for cases where CT operations need to be
> + * performed from contexts that want an explicit "CT locked" pair without
> + * exporting the lock itself.
> + *
> + * Return/Locking:
> + *   Acquires @ct->lock.
> + */
> +static inline void xe_guc_ct_lock(struct xe_guc_ct *ct)
> +__acquires(&ct->lock)
> +{
> +	mutex_lock(&ct->lock);
> +}
> +
> +/**
> + * xe_guc_ct_unlock() - release the GuC CT mutex
> + * @ct: GuC CT object
> + *
> + * Counterpart to xe_guc_ct_lock().
> + *
> + * Locking:
> + *   Releases @ct->lock.
> + */
> +static inline void xe_guc_ct_unlock(struct xe_guc_ct *ct)
> +__releases(&ct->lock)
> +{
> +	mutex_unlock(&ct->lock);
> +}
> +
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c
> index 2470faf3d5d8..cee653bf463b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c
> @@ -10,6 +10,19 @@
>   #include "xe_pagefault.h"
>   #include "xe_pagefault_types.h"
>   
> +#define XE_GUC_PAGEFAULT_FLUSH_PERIOD	BIT(4)	/* Sixteen */
> +
> +static void guc_ack_fault_begin(void *private)
> +{
> +	struct xe_guc *guc = private;
> +
> +	xe_guc_ct_lock(&guc->ct);
> +
> +	/* Ack the 2th, then 18th, etc... */
> +	guc->pagefault_ack_counter =
> +		XE_GUC_PAGEFAULT_FLUSH_PERIOD - 2;
> +}
> +
>   static void guc_ack_fault(struct xe_pagefault *pf, int err)
>   {
>   	u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]);
> @@ -36,12 +49,25 @@ static void guc_ack_fault(struct xe_pagefault *pf, int err)
>   		FIELD_PREP(PFR_PDATA, pdata),
>   	};
>   	struct xe_guc *guc = pf->producer.private;
> +	bool write_only = guc->pagefault_ack_counter++ &
> +		(XE_GUC_PAGEFAULT_FLUSH_PERIOD - 1);
> +
> +	xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
> +			      write_only);
> +}
> +
> +static void guc_ack_fault_end(void *private)
> +{
> +	struct xe_guc *guc = private;
>   
> -	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
Maybe
if (!((guc->pagefault_ack_counter -1) & (XE_GUC_PAGEFAULT_FLUSH_PERIOD - 
1)))
> +	xe_guc_ct_send_flush(&guc->ct);
Not to wake up GuC without new ACK where the last one was !write_only.
> +	xe_guc_ct_unlock(&guc->ct);
>   }
>   
>   static const struct xe_pagefault_ops guc_pagefault_ops = {
> +	.ack_fault_begin = guc_ack_fault_begin,
>   	.ack_fault = guc_ack_fault,
> +	.ack_fault_end = guc_ack_fault_end,
>   };
>   
>   /**
> diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
> index c7b9642b41ba..2996e5903ccb 100644
> --- a/drivers/gpu/drm/xe/xe_guc_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_types.h
> @@ -124,6 +124,12 @@ struct xe_guc {
>   	struct xe_reg notify_reg;
>   	/** @params: Control params for fw initialization */
>   	u32 params[GUC_CTL_MAX_DWORDS];
> +
> +	/**
> +	 * @pagefault_ack_counter: Counter to determine when periodically ack
> +	 * pagefaults in a batch.
> +	 */
> +	u32 pagefault_ack_counter;
>   };
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
> index 2cfda29321c9..d252a8c9d88c 100644
> --- a/drivers/gpu/drm/xe/xe_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_pagefault.c
> @@ -425,6 +425,10 @@ static bool xe_pagefault_cache_hit(struct xe_pagefault_queue *pf_queue,
>   			xe_assert(xe, pf_work->cache.pf->consumer.alloc_state ==
>   				  XE_PAGEFAULT_ALLOC_STATE_ACTIVE);
>   
> +			if (pf->producer.private !=
> +			    pf_work->cache.pf->producer.private)
> +				continue;
> +
>   			xe_gt_stats_incr(pf->gt,
>   					 XE_GT_STATS_ID_CHAIN_PAGEFAULT_COUNT,
>   					 1);
> @@ -559,6 +563,8 @@ static void xe_pagefault_queue_work(struct work_struct *w)
>   
>   
>   	while (xe_pagefault_queue_pop(pf_queue, &pf, pf_work->id)) {
> +		const struct xe_pagefault_ops *ops = pf->producer.ops;
> +		void *private = pf->producer.private;
>   		struct xe_gt *gt = pf->gt;
>   		u32 asid = pf->consumer.asid;
>   		int err = 0;
> @@ -599,6 +605,7 @@ static void xe_pagefault_queue_work(struct work_struct *w)
>   			  XE_PAGEFAULT_ALLOC_STATE_ACTIVE);
>   		xe_assert(xe, pf == pf_work->cache.pf);
>   
> +		ops->ack_fault_begin(private);
>   		while (pf) {
>   			struct xe_pagefault *next;
>   
> @@ -606,8 +613,10 @@ static void xe_pagefault_queue_work(struct work_struct *w)
>   				  XE_PAGEFAULT_ALLOC_STATE_CHAINED ||
>   				  pf->consumer.alloc_state ==
>   				  XE_PAGEFAULT_ALLOC_STATE_ACTIVE);
> +			xe_assert(xe, ops == pf->producer.ops);
> +			xe_assert(xe, gt == pf->gt);
>   
> -			pf->producer.ops->ack_fault(pf, err);
> +			ops->ack_fault(pf, err);
>   
>   			if (pf->consumer.alloc_state ==
>   			    XE_PAGEFAULT_ALLOC_STATE_ACTIVE)
> @@ -635,6 +644,7 @@ static void xe_pagefault_queue_work(struct work_struct *w)
>   				pf = xe_pagefault_queue_requeue(pf_queue, pf,
>   								gt);
>   		}
> +		ops->ack_fault_end(private);

For every chain, the worker proceeds, every 16-th pagefault will update 
the GuC and at the end additional update.


>   
>   		if (time_after(jiffies, threshold)) {
>   			queue_work(xe->usm.pf_wq, w);
> diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h
> index 57cb292105d7..bc8f582b4e03 100644
> --- a/drivers/gpu/drm/xe/xe_pagefault_types.h
> +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h
> @@ -33,6 +33,13 @@ enum xe_pagefault_type {
>   
>   /** struct xe_pagefault_ops - Xe pagefault ops (producer) */
>   struct xe_pagefault_ops {
> +	/**
> +	 * @ack_fault_begin: Ack fault begin
> +	 * @private: producer private data
> +	 *
> +	 * Page fault producer begins acknowledgment from the consumer.
> +	 */
> +	void (*ack_fault_begin)(void *private);
>   	/**
>   	 * @ack_fault: Ack fault
>   	 * @pf: Page fault
> @@ -42,6 +49,13 @@ struct xe_pagefault_ops {
>   	 * sends the result to the HW/FW interface.
>   	 */
>   	void (*ack_fault)(struct xe_pagefault *pf, int err);
> +	/**
> +	 * @ack_fault_end: Ack fault end
> +	 * @private: producer private data
> +	 *
> +	 * Page fault producer ends acknowledgment from the consumer.
> +	 */
> +	void (*ack_fault_end)(void *private);
>   };
>   
>   /**

In the end, each chain will result with GuC update. In case of long 
chains the updates are every 16th ACK.


Maciej

next prev parent reply	other threads:[~2026-05-08  9:24 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-26  4:28 [PATCH v4 00/12] Fine grained fault locking, threaded prefetch, storm cache Matthew Brost
2026-02-26  4:28 ` [PATCH v4 01/12] drm/xe: Fine grained page fault locking Matthew Brost
2026-02-26  4:28 ` [PATCH v4 02/12] drm/xe: Allow prefetch-only VM bind IOCTLs to use VM read lock Matthew Brost
2026-02-26  4:28 ` [PATCH v4 03/12] drm/xe: Thread prefetch of SVM ranges Matthew Brost
2026-02-26  4:28 ` [PATCH v4 04/12] drm/xe: Use a single page-fault queue with multiple workers Matthew Brost
2026-05-06 15:46   ` Maciej Patelczyk
2026-05-06 19:42     ` Matthew Brost
2026-05-07 12:41       ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 05/12] drm/xe: Add num_pf_work modparam Matthew Brost
2026-05-06 15:59   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 06/12] drm/xe: Engine class and instance into a u8 Matthew Brost
2026-05-06 16:04   ` Maciej Patelczyk
2026-05-07 16:20     ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 07/12] drm/xe: Track pagefault worker runtime Matthew Brost
2026-05-07 12:51   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 08/12] drm/xe: Chain page faults via queue-resident cache to avoid fault storms Matthew Brost
2026-05-08 12:03   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 09/12] drm/xe: Add pagefault chaining stats Matthew Brost
2026-05-07 13:15   ` Maciej Patelczyk
2026-05-07 13:52     ` Francois Dugast
2026-02-26  4:28 ` [PATCH v4 10/12] drm/xe: Add debugfs pagefault_info Matthew Brost
2026-05-07 10:07   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 11/12] drm/xe: batch CT pagefault acks with periodic flush Matthew Brost
2026-05-08  9:24   ` Maciej Patelczyk [this message]
2026-02-26  4:28 ` [PATCH v4 12/12] drm/xe: Track parallel page fault activity in GT stats Matthew Brost
2026-05-07 13:56   ` Maciej Patelczyk
2026-05-07 14:23     ` Francois Dugast
2026-02-26  4:35 ` ✗ CI.checkpatch: warning for Fine grained fault locking, threaded prefetch, storm cache (rev4) Patchwork
2026-02-26  4:36 ` ✓ CI.KUnit: success " Patchwork
2026-02-26  5:26 ` ✗ Xe.CI.BAT: failure " Patchwork
2026-02-26  8:59 ` ✗ Xe.CI.FULL: " Patchwork
2026-02-26 13:43 ` [PATCH v4 00/12] Fine grained fault locking, threaded prefetch, storm cache Thomas Hellström
2026-02-26 19:36   ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8c6eb092-b0c3-4364-b053-6749e4c0c36d@intel.com \
    --to=maciej.patelczyk@intel.com \
    --cc=arvind.yadav@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=stuart.summers@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.