* [PATCH] drm/xe: Add wait for completion after gt force reset
@ 2023-12-14 10:06 Karthik Poosa
2023-12-14 11:42 ` Nilawar, Badal
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Karthik Poosa @ 2023-12-14 10:06 UTC (permalink / raw)
To: intel-gfx; +Cc: Karthik Poosa
Wait for gt reset to complete before returning from force_reset
sysfs call. Without this igt test freq_reset_multiple fails
sporadically in case xe_guc_pc is not started.
Testcase: igt@xe_guc_pc@freq_reset_multiple
Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 3 +++
drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
3 files changed, 16 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index dfd9cf01a5d5..eb7552b6dfa5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
gt->tile = tile;
gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+ init_completion(>->reset_done);
return gt;
}
@@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
xe_device_mem_access_put(gt_to_xe(gt));
XE_WARN_ON(err);
+ complete(>->reset_done);
+
xe_gt_info(gt, "reset done\n");
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c4b67cf09f8f..49b30937a28b 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -23,6 +23,8 @@
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
+#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
+
static struct xe_gt *node_to_gt(struct drm_info_node *node)
{
return node->info_ent->data;
@@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data)
static int force_reset(struct seq_file *m, void *data)
{
struct xe_gt *gt = node_to_gt(m->private);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned long timeout;
xe_gt_reset_async(gt);
+ timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS);
+ if (timeout == 0) {
+ drm_err(&xe->drm, "gt reset timed out");
+ return -ETIMEDOUT;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index f74684660475..6f2fb9e3cfea 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -358,6 +358,9 @@ struct xe_gt {
/** @oob: bitmap with active OOB workaroudns */
unsigned long *oob;
} wa_active;
+
+ /** @reset_done : Completion of GT reset */
+ struct completion reset_done;
};
#endif
--
2.25.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH] drm/xe: Add wait for completion after gt force reset
2023-12-14 10:06 [PATCH] drm/xe: Add wait for completion after gt force reset Karthik Poosa
@ 2023-12-14 11:42 ` Nilawar, Badal
2023-12-14 11:56 ` Jani Nikula
2023-12-14 13:40 ` ✗ Fi.CI.BUILD: failure for " Patchwork
2 siblings, 0 replies; 9+ messages in thread
From: Nilawar, Badal @ 2023-12-14 11:42 UTC (permalink / raw)
To: Karthik Poosa, intel-gfx
I think title should be make sysfs gt force reset synchronous.
On 14-12-2023 15:36, Karthik Poosa wrote:
> Wait for gt reset to complete before returning from force_reset
> sysfs call. Without this igt test freq_reset_multiple fails
> sporadically in case xe_guc_pc is not started.
>
> Testcase: igt@xe_guc_pc@freq_reset_multiple
> Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 3 +++
> drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
> drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
> 3 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index dfd9cf01a5d5..eb7552b6dfa5 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
>
> gt->tile = tile;
> gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
> + init_completion(>->reset_done);
>
> return gt;
> }
> @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
> xe_device_mem_access_put(gt_to_xe(gt));
> XE_WARN_ON(err);
>
> + complete(>->reset_done);
> +
> xe_gt_info(gt, "reset done\n");
>
> return 0;
> diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> index c4b67cf09f8f..49b30937a28b 100644
> --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> @@ -23,6 +23,8 @@
> #include "xe_uc_debugfs.h"
> #include "xe_wa.h"
>
> +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
> +
> static struct xe_gt *node_to_gt(struct drm_info_node *node)
> {
> return node->info_ent->data;
> @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data)
> static int force_reset(struct seq_file *m, void *data)
> {
> struct xe_gt *gt = node_to_gt(m->private);
> + struct xe_device *xe = gt_to_xe(gt);
> + unsigned long timeout;
>
This may not work when multiple processes tries gt reset simultaneously.
Check for reset in progress should be here.
Regards,
Badal
> xe_gt_reset_async(gt);
>
> + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS);
> + if (timeout == 0) {
> + drm_err(&xe->drm, "gt reset timed out");
> + return -ETIMEDOUT;
> + }
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index f74684660475..6f2fb9e3cfea 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -358,6 +358,9 @@ struct xe_gt {
> /** @oob: bitmap with active OOB workaroudns */
> unsigned long *oob;
> } wa_active;
> +
> + /** @reset_done : Completion of GT reset */
> + struct completion reset_done;
> };
>
> #endif
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] drm/xe: Add wait for completion after gt force reset
2023-12-14 10:06 [PATCH] drm/xe: Add wait for completion after gt force reset Karthik Poosa
2023-12-14 11:42 ` Nilawar, Badal
@ 2023-12-14 11:56 ` Jani Nikula
2023-12-14 13:40 ` ✗ Fi.CI.BUILD: failure for " Patchwork
2 siblings, 0 replies; 9+ messages in thread
From: Jani Nikula @ 2023-12-14 11:56 UTC (permalink / raw)
To: Karthik Poosa, intel-gfx; +Cc: Karthik Poosa
On Thu, 14 Dec 2023, Karthik Poosa <karthik.poosa@intel.com> wrote:
> Wait for gt reset to complete before returning from force_reset
> sysfs call. Without this igt test freq_reset_multiple fails
> sporadically in case xe_guc_pc is not started.
Please send xe changes to intel-xe mailing list.
Thanks,
Jani.
>
> Testcase: igt@xe_guc_pc@freq_reset_multiple
> Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 3 +++
> drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
> drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
> 3 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index dfd9cf01a5d5..eb7552b6dfa5 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
>
> gt->tile = tile;
> gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
> + init_completion(>->reset_done);
>
> return gt;
> }
> @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
> xe_device_mem_access_put(gt_to_xe(gt));
> XE_WARN_ON(err);
>
> + complete(>->reset_done);
> +
> xe_gt_info(gt, "reset done\n");
>
> return 0;
> diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> index c4b67cf09f8f..49b30937a28b 100644
> --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> @@ -23,6 +23,8 @@
> #include "xe_uc_debugfs.h"
> #include "xe_wa.h"
>
> +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
> +
> static struct xe_gt *node_to_gt(struct drm_info_node *node)
> {
> return node->info_ent->data;
> @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data)
> static int force_reset(struct seq_file *m, void *data)
> {
> struct xe_gt *gt = node_to_gt(m->private);
> + struct xe_device *xe = gt_to_xe(gt);
> + unsigned long timeout;
>
> xe_gt_reset_async(gt);
>
> + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS);
> + if (timeout == 0) {
> + drm_err(&xe->drm, "gt reset timed out");
> + return -ETIMEDOUT;
> + }
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index f74684660475..6f2fb9e3cfea 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -358,6 +358,9 @@ struct xe_gt {
> /** @oob: bitmap with active OOB workaroudns */
> unsigned long *oob;
> } wa_active;
> +
> + /** @reset_done : Completion of GT reset */
> + struct completion reset_done;
> };
>
> #endif
--
Jani Nikula, Intel
^ permalink raw reply [flat|nested] 9+ messages in thread* ✗ Fi.CI.BUILD: failure for drm/xe: Add wait for completion after gt force reset
2023-12-14 10:06 [PATCH] drm/xe: Add wait for completion after gt force reset Karthik Poosa
2023-12-14 11:42 ` Nilawar, Badal
2023-12-14 11:56 ` Jani Nikula
@ 2023-12-14 13:40 ` Patchwork
2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2023-12-14 13:40 UTC (permalink / raw)
To: Karthik Poosa; +Cc: intel-gfx
== Series Details ==
Series: drm/xe: Add wait for completion after gt force reset
URL : https://patchwork.freedesktop.org/series/127796/
State : failure
== Summary ==
Error: patch https://patchwork.freedesktop.org/api/1.0/series/127796/revisions/1/mbox/ not applied
Applying: drm/xe: Add wait for completion after gt force reset
error: sha1 information is lacking or useless (drivers/gpu/drm/xe/xe_gt.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 drm/xe: Add wait for completion after gt force reset
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
Build failed, no error log produced
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] drm/xe: Add wait for completion after gt force reset
@ 2023-12-15 5:15 Karthik Poosa
2023-12-15 16:43 ` Rodrigo Vivi
0 siblings, 1 reply; 9+ messages in thread
From: Karthik Poosa @ 2023-12-15 5:15 UTC (permalink / raw)
To: intel-xe; +Cc: Karthik Poosa
Wait for gt reset to complete before returning from force_reset
sysfs call. Without this igt test freq_reset_multiple fails
sporadically in case xe_guc_pc is not started.
Testcase: igt@xe_guc_pc@freq_reset_multiple
Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 3 +++
drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
3 files changed, 16 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index dfd9cf01a5d5..eb7552b6dfa5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
gt->tile = tile;
gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
+ init_completion(>->reset_done);
return gt;
}
@@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
xe_device_mem_access_put(gt_to_xe(gt));
XE_WARN_ON(err);
+ complete(>->reset_done);
+
xe_gt_info(gt, "reset done\n");
return 0;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c4b67cf09f8f..49b30937a28b 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -23,6 +23,8 @@
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
+#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
+
static struct xe_gt *node_to_gt(struct drm_info_node *node)
{
return node->info_ent->data;
@@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data)
static int force_reset(struct seq_file *m, void *data)
{
struct xe_gt *gt = node_to_gt(m->private);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned long timeout;
xe_gt_reset_async(gt);
+ timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS);
+ if (timeout == 0) {
+ drm_err(&xe->drm, "gt reset timed out");
+ return -ETIMEDOUT;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index f74684660475..6f2fb9e3cfea 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -358,6 +358,9 @@ struct xe_gt {
/** @oob: bitmap with active OOB workaroudns */
unsigned long *oob;
} wa_active;
+
+ /** @reset_done : Completion of GT reset */
+ struct completion reset_done;
};
#endif
--
2.25.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH] drm/xe: Add wait for completion after gt force reset
2023-12-15 5:15 [PATCH] " Karthik Poosa
@ 2023-12-15 16:43 ` Rodrigo Vivi
2023-12-15 17:32 ` Rodrigo Vivi
0 siblings, 1 reply; 9+ messages in thread
From: Rodrigo Vivi @ 2023-12-15 16:43 UTC (permalink / raw)
To: Karthik Poosa; +Cc: intel-xe
On Fri, Dec 15, 2023 at 10:45:41AM +0530, Karthik Poosa wrote:
> Wait for gt reset to complete before returning from force_reset
> sysfs call. Without this igt test freq_reset_multiple fails
> sporadically in case xe_guc_pc is not started.
\o/ I knew I was missing something there. Thanks for finding that out.
>
> Testcase: igt@xe_guc_pc@freq_reset_multiple
> Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt.c | 3 +++
> drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
> drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
> 3 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index dfd9cf01a5d5..eb7552b6dfa5 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
>
> gt->tile = tile;
> gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
> + init_completion(>->reset_done);
>
> return gt;
> }
> @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
> xe_device_mem_access_put(gt_to_xe(gt));
> XE_WARN_ON(err);
>
> + complete(>->reset_done);
> +
> xe_gt_info(gt, "reset done\n");
>
> return 0;
> diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> index c4b67cf09f8f..49b30937a28b 100644
> --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> @@ -23,6 +23,8 @@
> #include "xe_uc_debugfs.h"
> #include "xe_wa.h"
>
> +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
5s seems to much, but anyway, this is a debugfs function, so, no
if 5s fail, then we do have bigger problems.
perhaps we could get the timeout as input?
anyway, it is better to have protected like this than the current
racy one that we have.
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> +
> static struct xe_gt *node_to_gt(struct drm_info_node *node)
> {
> return node->info_ent->data;
> @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data)
> static int force_reset(struct seq_file *m, void *data)
> {
> struct xe_gt *gt = node_to_gt(m->private);
> + struct xe_device *xe = gt_to_xe(gt);
> + unsigned long timeout;
>
> xe_gt_reset_async(gt);
>
> + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS);
> + if (timeout == 0) {
> + drm_err(&xe->drm, "gt reset timed out");
> + return -ETIMEDOUT;
> + }
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index f74684660475..6f2fb9e3cfea 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -358,6 +358,9 @@ struct xe_gt {
> /** @oob: bitmap with active OOB workaroudns */
> unsigned long *oob;
> } wa_active;
> +
> + /** @reset_done : Completion of GT reset */
> + struct completion reset_done;
> };
>
> #endif
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] drm/xe: Add wait for completion after gt force reset
2023-12-15 16:43 ` Rodrigo Vivi
@ 2023-12-15 17:32 ` Rodrigo Vivi
2023-12-20 12:51 ` Gupta, Anshuman
0 siblings, 1 reply; 9+ messages in thread
From: Rodrigo Vivi @ 2023-12-15 17:32 UTC (permalink / raw)
To: Karthik Poosa; +Cc: intel-xe
On Fri, Dec 15, 2023 at 11:43:04AM -0500, Rodrigo Vivi wrote:
> On Fri, Dec 15, 2023 at 10:45:41AM +0530, Karthik Poosa wrote:
> > Wait for gt reset to complete before returning from force_reset
> > sysfs call. Without this igt test freq_reset_multiple fails
> > sporadically in case xe_guc_pc is not started.
>
> \o/ I knew I was missing something there. Thanks for finding that out.
>
> >
> > Testcase: igt@xe_guc_pc@freq_reset_multiple
> > Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_gt.c | 3 +++
> > drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
> > drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
> > 3 files changed, 16 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> > index dfd9cf01a5d5..eb7552b6dfa5 100644
> > --- a/drivers/gpu/drm/xe/xe_gt.c
> > +++ b/drivers/gpu/drm/xe/xe_gt.c
> > @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
> >
> > gt->tile = tile;
> > gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
> > + init_completion(>->reset_done);
> >
> > return gt;
> > }
> > @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
> > xe_device_mem_access_put(gt_to_xe(gt));
> > XE_WARN_ON(err);
> >
> > + complete(>->reset_done);
> > +
> > xe_gt_info(gt, "reset done\n");
> >
> > return 0;
> > diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > index c4b67cf09f8f..49b30937a28b 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > @@ -23,6 +23,8 @@
> > #include "xe_uc_debugfs.h"
> > #include "xe_wa.h"
> >
> > +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
>
> 5s seems to much, but anyway, this is a debugfs function, so, no
> if 5s fail, then we do have bigger problems.
>
> perhaps we could get the timeout as input?
>
> anyway, it is better to have protected like this than the current
> racy one that we have.
>
> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
ops, let me tune down my over excitement here.
We need to address the feedback from Badal that was given at intel-gfx ml.
>
> > +
> > static struct xe_gt *node_to_gt(struct drm_info_node *node)
> > {
> > return node->info_ent->data;
> > @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data)
> > static int force_reset(struct seq_file *m, void *data)
> > {
> > struct xe_gt *gt = node_to_gt(m->private);
> > + struct xe_device *xe = gt_to_xe(gt);
> > + unsigned long timeout;
> >
> > xe_gt_reset_async(gt);
> >
> > + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS);
> > + if (timeout == 0) {
> > + drm_err(&xe->drm, "gt reset timed out");
> > + return -ETIMEDOUT;
> > + }
> > +
> > return 0;
> > }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > index f74684660475..6f2fb9e3cfea 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -358,6 +358,9 @@ struct xe_gt {
> > /** @oob: bitmap with active OOB workaroudns */
> > unsigned long *oob;
> > } wa_active;
> > +
> > + /** @reset_done : Completion of GT reset */
> > + struct completion reset_done;
> > };
> >
> > #endif
> > --
> > 2.25.1
> >
^ permalink raw reply [flat|nested] 9+ messages in thread* RE: [PATCH] drm/xe: Add wait for completion after gt force reset
2023-12-15 17:32 ` Rodrigo Vivi
@ 2023-12-20 12:51 ` Gupta, Anshuman
2023-12-20 15:20 ` Rodrigo Vivi
0 siblings, 1 reply; 9+ messages in thread
From: Gupta, Anshuman @ 2023-12-20 12:51 UTC (permalink / raw)
To: Vivi, Rodrigo, Poosa, Karthik; +Cc: intel-xe@lists.freedesktop.org
> -----Original Message-----
> From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of Rodrigo
> Vivi
> Sent: Friday, December 15, 2023 11:02 PM
> To: Poosa, Karthik <karthik.poosa@intel.com>
> Cc: intel-xe@lists.freedesktop.org
> Subject: Re: [PATCH] drm/xe: Add wait for completion after gt force reset
>
> On Fri, Dec 15, 2023 at 11:43:04AM -0500, Rodrigo Vivi wrote:
> > On Fri, Dec 15, 2023 at 10:45:41AM +0530, Karthik Poosa wrote:
> > > Wait for gt reset to complete before returning from force_reset
> > > sysfs call. Without this igt test freq_reset_multiple fails
> > > sporadically in case xe_guc_pc is not started.
> >
> > \o/ I knew I was missing something there. Thanks for finding that out.
> >
> > >
> > > Testcase: igt@xe_guc_pc@freq_reset_multiple
> > > Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
> > > ---
> > > drivers/gpu/drm/xe/xe_gt.c | 3 +++
> > > drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
> > > drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
> > > 3 files changed, 16 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> > > index dfd9cf01a5d5..eb7552b6dfa5 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt.c
> > > @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
> > >
> > > gt->tile = tile;
> > > gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
> > > + init_completion(>->reset_done);
> > >
> > > return gt;
> > > }
> > > @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
> > > xe_device_mem_access_put(gt_to_xe(gt));
> > > XE_WARN_ON(err);
> > >
> > > + complete(>->reset_done);
> > > +
> > > xe_gt_info(gt, "reset done\n");
> > >
> > > return 0;
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > index c4b67cf09f8f..49b30937a28b 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > @@ -23,6 +23,8 @@
> > > #include "xe_uc_debugfs.h"
> > > #include "xe_wa.h"
> > >
> > > +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
> >
> > 5s seems to much, but anyway, this is a debugfs function, so, no if 5s
> > fail, then we do have bigger problems.
> >
> > perhaps we could get the timeout as input?
How should we get that, I guess debugfs or modparam, sysfs will require a uapi user ?
Thanks,
Anshuman.
> >
> > anyway, it is better to have protected like this than the current racy
> > one that we have.
> >
> > Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>
> ops, let me tune down my over excitement here.
> We need to address the feedback from Badal that was given at intel-gfx ml.
>
> >
> > > +
> > > static struct xe_gt *node_to_gt(struct drm_info_node *node) {
> > > return node->info_ent->data;
> > > @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void
> > > *data) static int force_reset(struct seq_file *m, void *data) {
> > > struct xe_gt *gt = node_to_gt(m->private);
> > > + struct xe_device *xe = gt_to_xe(gt);
> > > + unsigned long timeout;
> > >
> > > xe_gt_reset_async(gt);
> > >
> > > + timeout = wait_for_completion_timeout(>->reset_done,
> XE_GT_RESET_TIMEOUT_MS);
> > > + if (timeout == 0) {
> > > + drm_err(&xe->drm, "gt reset timed out");
> > > + return -ETIMEDOUT;
> > > + }
> > > +
> > > return 0;
> > > }
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > > b/drivers/gpu/drm/xe/xe_gt_types.h
> > > index f74684660475..6f2fb9e3cfea 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > @@ -358,6 +358,9 @@ struct xe_gt {
> > > /** @oob: bitmap with active OOB workaroudns */
> > > unsigned long *oob;
> > > } wa_active;
> > > +
> > > + /** @reset_done : Completion of GT reset */
> > > + struct completion reset_done;
> > > };
> > >
> > > #endif
> > > --
> > > 2.25.1
> > >
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] drm/xe: Add wait for completion after gt force reset
2023-12-20 12:51 ` Gupta, Anshuman
@ 2023-12-20 15:20 ` Rodrigo Vivi
0 siblings, 0 replies; 9+ messages in thread
From: Rodrigo Vivi @ 2023-12-20 15:20 UTC (permalink / raw)
To: Gupta, Anshuman; +Cc: Poosa, Karthik, intel-xe@lists.freedesktop.org
On Wed, Dec 20, 2023 at 07:51:31AM -0500, Gupta, Anshuman wrote:
>
>
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces@lists.freedesktop.org> On Behalf Of Rodrigo
> > Vivi
> > Sent: Friday, December 15, 2023 11:02 PM
> > To: Poosa, Karthik <karthik.poosa@intel.com>
> > Cc: intel-xe@lists.freedesktop.org
> > Subject: Re: [PATCH] drm/xe: Add wait for completion after gt force reset
> >
> > On Fri, Dec 15, 2023 at 11:43:04AM -0500, Rodrigo Vivi wrote:
> > > On Fri, Dec 15, 2023 at 10:45:41AM +0530, Karthik Poosa wrote:
> > > > Wait for gt reset to complete before returning from force_reset
> > > > sysfs call. Without this igt test freq_reset_multiple fails
> > > > sporadically in case xe_guc_pc is not started.
> > >
> > > \o/ I knew I was missing something there. Thanks for finding that out.
> > >
> > > >
> > > > Testcase: igt@xe_guc_pc@freq_reset_multiple
> > > > Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
> > > > ---
> > > > drivers/gpu/drm/xe/xe_gt.c | 3 +++
> > > > drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++
> > > > drivers/gpu/drm/xe/xe_gt_types.h | 3 +++
> > > > 3 files changed, 16 insertions(+)
> > > >
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> > > > index dfd9cf01a5d5..eb7552b6dfa5 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt.c
> > > > +++ b/drivers/gpu/drm/xe/xe_gt.c
> > > > @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
> > > >
> > > > gt->tile = tile;
> > > > gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
> > > > + init_completion(>->reset_done);
> > > >
> > > > return gt;
> > > > }
> > > > @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt)
> > > > xe_device_mem_access_put(gt_to_xe(gt));
> > > > XE_WARN_ON(err);
> > > >
> > > > + complete(>->reset_done);
> > > > +
> > > > xe_gt_info(gt, "reset done\n");
> > > >
> > > > return 0;
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > > b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > > index c4b67cf09f8f..49b30937a28b 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > > +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> > > > @@ -23,6 +23,8 @@
> > > > #include "xe_uc_debugfs.h"
> > > > #include "xe_wa.h"
> > > >
> > > > +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000))
> > >
> > > 5s seems to much, but anyway, this is a debugfs function, so, no if 5s
> > > fail, then we do have bigger problems.
> > >
> > > perhaps we could get the timeout as input?
> How should we get that, I guess debugfs or modparam, sysfs will require a uapi user ?
what I had in mind was simply a input of this force_reset debugfs file.
nothing special nor nothing extra.
> Thanks,
> Anshuman.
> > >
> > > anyway, it is better to have protected like this than the current racy
> > > one that we have.
> > >
> > > Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> >
> > ops, let me tune down my over excitement here.
> > We need to address the feedback from Badal that was given at intel-gfx ml.
> >
> > >
> > > > +
> > > > static struct xe_gt *node_to_gt(struct drm_info_node *node) {
> > > > return node->info_ent->data;
> > > > @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void
> > > > *data) static int force_reset(struct seq_file *m, void *data) {
> > > > struct xe_gt *gt = node_to_gt(m->private);
> > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > + unsigned long timeout;
> > > >
> > > > xe_gt_reset_async(gt);
> > > >
> > > > + timeout = wait_for_completion_timeout(>->reset_done,
> > XE_GT_RESET_TIMEOUT_MS);
> > > > + if (timeout == 0) {
> > > > + drm_err(&xe->drm, "gt reset timed out");
> > > > + return -ETIMEDOUT;
> > > > + }
> > > > +
> > > > return 0;
> > > > }
> > > >
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h
> > > > b/drivers/gpu/drm/xe/xe_gt_types.h
> > > > index f74684660475..6f2fb9e3cfea 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > > @@ -358,6 +358,9 @@ struct xe_gt {
> > > > /** @oob: bitmap with active OOB workaroudns */
> > > > unsigned long *oob;
> > > > } wa_active;
> > > > +
> > > > + /** @reset_done : Completion of GT reset */
> > > > + struct completion reset_done;
> > > > };
> > > >
> > > > #endif
> > > > --
> > > > 2.25.1
> > > >
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-12-20 15:20 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-14 10:06 [PATCH] drm/xe: Add wait for completion after gt force reset Karthik Poosa
2023-12-14 11:42 ` Nilawar, Badal
2023-12-14 11:56 ` Jani Nikula
2023-12-14 13:40 ` ✗ Fi.CI.BUILD: failure for " Patchwork
-- strict thread matches above, loose matches on Subject: below --
2023-12-15 5:15 [PATCH] " Karthik Poosa
2023-12-15 16:43 ` Rodrigo Vivi
2023-12-15 17:32 ` Rodrigo Vivi
2023-12-20 12:51 ` Gupta, Anshuman
2023-12-20 15:20 ` Rodrigo Vivi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.