* [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv()
@ 2024-10-01 8:43 Matthew Auld
2024-10-01 8:43 ` [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking Matthew Auld
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Matthew Auld @ 2024-10-01 8:43 UTC (permalink / raw)
To: intel-xe; +Cc: Matthew Brost, Badal Nilawar, stable
Ensure we serialize with completion side to prevent UAF with fence going
out of scope on the stack, since we have no clue if it will fire after
the timeout before we can erase from the xa. Also we have some dependent
loads and stores for which we need the correct ordering, and we lack the
needed barriers. Fix this by grabbing the ct->lock after the wait, which
is also held by the completion side.
v2 (Badal):
- Also print done after acquiring the lock and seeing timeout.
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Badal Nilawar <badal.nilawar@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_guc_ct.c | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 4b95f75b1546..44263b3cd8c7 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -903,16 +903,26 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
}
ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+
+ /*
+ * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+ * the stack, since we have no clue if it will fire after the timeout before we can erase
+ * from the xa. Also we have some dependent loads and stores below for which we need the
+ * correct ordering, and we lack the needed barriers.
+ */
+ mutex_lock(&ct->lock);
if (!ret) {
- xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
- g2h_fence.seqno, action[0]);
+ xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
+ g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+ mutex_unlock(&ct->lock);
return -ETIME;
}
if (g2h_fence.retry) {
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
+ mutex_unlock(&ct->lock);
goto retry;
}
if (g2h_fence.fail) {
@@ -921,7 +931,12 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
ret = -EIO;
}
- return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+ if (ret > 0)
+ ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+
+ mutex_unlock(&ct->lock);
+
+ return ret;
}
/**
--
2.46.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking
2024-10-01 8:43 [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Matthew Auld
@ 2024-10-01 8:43 ` Matthew Auld
2024-10-01 13:48 ` Nilawar, Badal
2024-10-01 8:43 ` [PATCH v2 3/4] drm/xe/guc_submit: " Matthew Auld
2024-10-01 13:22 ` [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Nilawar, Badal
2 siblings, 1 reply; 6+ messages in thread
From: Matthew Auld @ 2024-10-01 8:43 UTC (permalink / raw)
To: intel-xe; +Cc: Matthew Brost, Badal Nilawar, stable
Looks like we are meant to use xa_err() to extract the error encoded in
the ptr.
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Badal Nilawar <badal.nilawar@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_guc_ct.c | 23 ++++++++---------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 44263b3cd8c7..d3de2e6d690f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -667,16 +667,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
num_g2h = 1;
if (g2h_fence_needs_alloc(g2h_fence)) {
- void *ptr;
-
g2h_fence->seqno = next_ct_seqno(ct, true);
- ptr = xa_store(&ct->fence_lookup,
- g2h_fence->seqno,
- g2h_fence, GFP_ATOMIC);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
+ ret = xa_err(xa_store(&ct->fence_lookup,
+ g2h_fence->seqno, g2h_fence,
+ GFP_ATOMIC));
+ if (ret)
goto out;
- }
}
seqno = g2h_fence->seqno;
@@ -879,14 +875,11 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
retry_same_fence:
ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
if (unlikely(ret == -ENOMEM)) {
- void *ptr;
-
/* Retry allocation /w GFP_KERNEL */
- ptr = xa_store(&ct->fence_lookup,
- g2h_fence.seqno,
- &g2h_fence, GFP_KERNEL);
- if (IS_ERR(ptr))
- return PTR_ERR(ptr);
+ ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
+ &g2h_fence, GFP_KERNEL));
+ if (ret)
+ return ret;
goto retry_same_fence;
} else if (unlikely(ret)) {
--
2.46.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 3/4] drm/xe/guc_submit: fix xa_store() error checking
2024-10-01 8:43 [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Matthew Auld
2024-10-01 8:43 ` [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking Matthew Auld
@ 2024-10-01 8:43 ` Matthew Auld
2024-10-01 13:50 ` Nilawar, Badal
2024-10-01 13:22 ` [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Nilawar, Badal
2 siblings, 1 reply; 6+ messages in thread
From: Matthew Auld @ 2024-10-01 8:43 UTC (permalink / raw)
To: intel-xe; +Cc: Matthew Brost, Badal Nilawar, stable
Looks like we are meant to use xa_err() to extract the error encoded in
the ptr.
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Badal Nilawar <badal.nilawar@intel.com>
Cc: <stable@vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 80062e1d3f66..8a5c21a87977 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -393,7 +393,6 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
{
int ret;
- void *ptr;
int i;
/*
@@ -413,12 +412,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
q->guc->id = ret;
for (i = 0; i < q->width; ++i) {
- ptr = xa_store(&guc->submission_state.exec_queue_lookup,
- q->guc->id + i, q, GFP_NOWAIT);
- if (IS_ERR(ptr)) {
- ret = PTR_ERR(ptr);
+ ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
+ q->guc->id + i, q, GFP_NOWAIT));
+ if (ret)
goto err_release;
- }
}
return 0;
--
2.46.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv()
2024-10-01 8:43 [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Matthew Auld
2024-10-01 8:43 ` [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking Matthew Auld
2024-10-01 8:43 ` [PATCH v2 3/4] drm/xe/guc_submit: " Matthew Auld
@ 2024-10-01 13:22 ` Nilawar, Badal
2 siblings, 0 replies; 6+ messages in thread
From: Nilawar, Badal @ 2024-10-01 13:22 UTC (permalink / raw)
To: Matthew Auld, intel-xe; +Cc: Matthew Brost, stable
On 01-10-2024 14:13, Matthew Auld wrote:
> Ensure we serialize with completion side to prevent UAF with fence going
> out of scope on the stack, since we have no clue if it will fire after
> the timeout before we can erase from the xa. Also we have some dependent
> loads and stores for which we need the correct ordering, and we lack the
> needed barriers. Fix this by grabbing the ct->lock after the wait, which
> is also held by the completion side.
>
> v2 (Badal):
> - Also print done after acquiring the lock and seeing timeout.
>
> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Badal Nilawar <badal.nilawar@intel.com>
> Cc: <stable@vger.kernel.org> # v6.8+
> ---
> drivers/gpu/drm/xe/xe_guc_ct.c | 21 ++++++++++++++++++---
> 1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 4b95f75b1546..44263b3cd8c7 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -903,16 +903,26 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
> }
>
> ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
> +
> + /*
> + * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
> + * the stack, since we have no clue if it will fire after the timeout before we can erase
> + * from the xa. Also we have some dependent loads and stores below for which we need the
> + * correct ordering, and we lack the needed barriers.
> + */
> + mutex_lock(&ct->lock);
> if (!ret) {
> - xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
> - g2h_fence.seqno, action[0]);
> + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
> + g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
> xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
> + mutex_unlock(&ct->lock);
> return -ETIME;
> }
>
> if (g2h_fence.retry) {
> xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
> action[0], g2h_fence.reason);
> + mutex_unlock(&ct->lock);
> goto retry;
> }
> if (g2h_fence.fail) {
> @@ -921,7 +931,12 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
> ret = -EIO;
> }
>
> - return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
> + if (ret > 0)
> + ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
> +
> + mutex_unlock(&ct->lock);
> +
> + return ret;
> }
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Regards,
Badal
>
> /**
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking
2024-10-01 8:43 ` [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking Matthew Auld
@ 2024-10-01 13:48 ` Nilawar, Badal
0 siblings, 0 replies; 6+ messages in thread
From: Nilawar, Badal @ 2024-10-01 13:48 UTC (permalink / raw)
To: Matthew Auld, intel-xe; +Cc: Matthew Brost, stable
On 01-10-2024 14:13, Matthew Auld wrote:
> Looks like we are meant to use xa_err() to extract the error encoded in
> the ptr.
>
> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Badal Nilawar <badal.nilawar@intel.com>
> Cc: <stable@vger.kernel.org> # v6.8+
> ---
> drivers/gpu/drm/xe/xe_guc_ct.c | 23 ++++++++---------------
> 1 file changed, 8 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 44263b3cd8c7..d3de2e6d690f 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -667,16 +667,12 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
> num_g2h = 1;
>
> if (g2h_fence_needs_alloc(g2h_fence)) {
> - void *ptr;
> -
> g2h_fence->seqno = next_ct_seqno(ct, true);
> - ptr = xa_store(&ct->fence_lookup,
> - g2h_fence->seqno,
> - g2h_fence, GFP_ATOMIC);
> - if (IS_ERR(ptr)) {
> - ret = PTR_ERR(ptr);
> + ret = xa_err(xa_store(&ct->fence_lookup,
> + g2h_fence->seqno, g2h_fence,
> + GFP_ATOMIC));
> + if (ret)
> goto out;
> - }
> }
>
> seqno = g2h_fence->seqno;
> @@ -879,14 +875,11 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
> retry_same_fence:
> ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
> if (unlikely(ret == -ENOMEM)) {
> - void *ptr;
> -
> /* Retry allocation /w GFP_KERNEL */
> - ptr = xa_store(&ct->fence_lookup,
> - g2h_fence.seqno,
> - &g2h_fence, GFP_KERNEL);
> - if (IS_ERR(ptr))
> - return PTR_ERR(ptr);
> + ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
> + &g2h_fence, GFP_KERNEL));
> + if (ret)
> + return ret;
Looks good to me.
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Regards,
Badal
>
> goto retry_same_fence;
> } else if (unlikely(ret)) {
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 3/4] drm/xe/guc_submit: fix xa_store() error checking
2024-10-01 8:43 ` [PATCH v2 3/4] drm/xe/guc_submit: " Matthew Auld
@ 2024-10-01 13:50 ` Nilawar, Badal
0 siblings, 0 replies; 6+ messages in thread
From: Nilawar, Badal @ 2024-10-01 13:50 UTC (permalink / raw)
To: Matthew Auld, intel-xe; +Cc: Matthew Brost, stable
On 01-10-2024 14:13, Matthew Auld wrote:
> Looks like we are meant to use xa_err() to extract the error encoded in
> the ptr.
>
> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Badal Nilawar <badal.nilawar@intel.com>
> Cc: <stable@vger.kernel.org> # v6.8+
> ---
> drivers/gpu/drm/xe/xe_guc_submit.c | 9 +++------
> 1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 80062e1d3f66..8a5c21a87977 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -393,7 +393,6 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
> static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> {
> int ret;
> - void *ptr;
> int i;
>
> /*
> @@ -413,12 +412,10 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> q->guc->id = ret;
>
> for (i = 0; i < q->width; ++i) {
> - ptr = xa_store(&guc->submission_state.exec_queue_lookup,
> - q->guc->id + i, q, GFP_NOWAIT);
> - if (IS_ERR(ptr)) {
> - ret = PTR_ERR(ptr);
> + ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
> + q->guc->id + i, q, GFP_NOWAIT));
> + if (ret)
> goto err_release;
> - }
> }
Looks good to me.
Reviewed-by: Badal Nilawar <badal.nilawar@intel.com>
Regards,
Badal
>
> return 0;
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-10-01 13:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-01 8:43 [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Matthew Auld
2024-10-01 8:43 ` [PATCH v2 2/4] drm/xe/ct: fix xa_store() error checking Matthew Auld
2024-10-01 13:48 ` Nilawar, Badal
2024-10-01 8:43 ` [PATCH v2 3/4] drm/xe/guc_submit: " Matthew Auld
2024-10-01 13:50 ` Nilawar, Badal
2024-10-01 13:22 ` [PATCH v2 1/4] drm/xe/ct: prevent UAF in send_recv() Nilawar, Badal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox