From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: Bommu Krishnaiah <krishnaiah.bommu@intel.com>,
Francois Dugast <francois.dugast@intel.com>,
Matthew Brost <matthew.brost@intel.com>,
"Oak Zeng" <oak.zeng@intel.com>
Cc: intel-xe@lists.freedesktop.org
Subject: Re: [Intel-xe] [PATCH v2] RFC: drm/xe: Return correct error code for xe_wait_user_fence_ioctl
Date: Tue, 28 Nov 2023 16:25:14 -0500 [thread overview]
Message-ID: <ZWZaukdzs0chi5LK@intel.com> (raw)
In-Reply-To: <20231122042142.6648-1-krishnaiah.bommu@intel.com>
On Wed, Nov 22, 2023 at 09:51:42AM +0530, Bommu Krishnaiah wrote:
> return correct error code if exec_queue is reset/engine is hung
> remove the num_engines/instances members from drm_xe_wait_user_fence structure
> and add a exec_queue_id member
>
> v2: Addressed the review comments
>
> Need to validated the changes
>
> Signed-off-by: Bommu Krishnaiah <krishnaiah.bommu@intel.com>
> Cc: Oak Zeng <oak.zeng@intel.com>
> Cc: Kempczynski Zbigniew <Zbigniew.Kempczynski@intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
This patch here needs to replace my patch in Francois' series:
https://lore.kernel.org/all/20231122143833.7-12-francois.dugast@intel.com/
I finally saw some sense in some engine information in there.
Brost, thoughts?
> ---
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 +
> drivers/gpu/drm/xe/xe_execlist.c | 7 +++
> drivers/gpu/drm/xe/xe_guc_submit.c | 13 +++++
> drivers/gpu/drm/xe/xe_wait_user_fence.c | 60 +++++-------------------
> include/uapi/drm/xe_drm.h | 13 +----
> 5 files changed, 37 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 5ba47a5cfdbd..84ccf7242247 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -206,6 +206,8 @@ struct xe_exec_queue_ops {
> * signalled when this function is called.
> */
> void (*resume)(struct xe_exec_queue *q);
> + /** @reset_status: check exec queue reset status */
> + bool (*reset_status)(struct xe_exec_queue *q);
> };
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
> index e8754adfc52a..3b390f047108 100644
> --- a/drivers/gpu/drm/xe/xe_execlist.c
> +++ b/drivers/gpu/drm/xe/xe_execlist.c
> @@ -446,6 +446,12 @@ static void execlist_exec_queue_resume(struct xe_exec_queue *q)
> /* NIY */
> }
>
> +static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
> +{
> + /* NIY */
> + return false;
> +}
> +
> static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
> .init = execlist_exec_queue_init,
> .kill = execlist_exec_queue_kill,
> @@ -457,6 +463,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
> .suspend = execlist_exec_queue_suspend,
> .suspend_wait = execlist_exec_queue_suspend_wait,
> .resume = execlist_exec_queue_resume,
> + .reset_status = execlist_exec_queue_reset_status,
> };
>
> int xe_execlist_init(struct xe_gt *gt)
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 9e9e925c7353..e13792e49a67 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -852,6 +852,10 @@ static void simple_error_capture(struct xe_exec_queue *q)
> static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
> {
> struct xe_guc *guc = exec_queue_to_guc(q);
> + struct xe_device *xe = guc_to_xe(guc);
> +
> + /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
> + wake_up_all(&xe->ufence_wq);
>
> if (xe_exec_queue_is_lr(q))
> queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
> @@ -1392,6 +1396,14 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q)
> guc_exec_queue_add_msg(q, msg, RESUME);
> }
>
> +static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
> +{
> + if (exec_queue_registered(q))
> + return exec_queue_reset(q);
probably just rename the original function and use that directly.
> +
> + return false;
> +}
> +
> /*
> * All of these functions are an abstraction layer which other parts of XE can
> * use to trap into the GuC backend. All of these functions, aside from init,
> @@ -1409,6 +1421,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
> .suspend = guc_exec_queue_suspend,
> .suspend_wait = guc_exec_queue_suspend_wait,
> .resume = guc_exec_queue_resume,
> + .reset_status = guc_exec_queue_reset_status,
> };
>
> static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
> diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> index 4d5c2555ce41..97af879ee923 100644
> --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
> +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
> @@ -13,6 +13,7 @@
> #include "xe_device.h"
> #include "xe_gt.h"
> #include "xe_macros.h"
> +#include "xe_exec_queue.h"
>
> static int do_compare(u64 addr, u64 value, u64 mask, u16 op)
> {
> @@ -58,27 +59,6 @@ static const enum xe_engine_class user_to_xe_engine_class[] = {
> [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
> };
>
> -static int check_hw_engines(struct xe_device *xe,
> - struct drm_xe_engine_class_instance *eci,
> - int num_engines)
> -{
> - int i;
> -
> - for (i = 0; i < num_engines; ++i) {
> - enum xe_engine_class user_class =
> - user_to_xe_engine_class[eci[i].engine_class];
> -
> - if (eci[i].gt_id >= xe->info.tile_count)
> - return -EINVAL;
> -
> - if (!xe_gt_hw_engine(xe_device_get_gt(xe, eci[i].gt_id),
> - user_class, eci[i].engine_instance, true))
> - return -EINVAL;
> - }
> -
> - return 0;
> -}
> -
> #define VALID_FLAGS (DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP | \
> DRM_XE_UFENCE_WAIT_FLAG_ABSTIME)
> #define MAX_OP DRM_XE_UFENCE_WAIT_OP_LTE
> @@ -130,14 +110,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file)
> {
> struct xe_device *xe = to_xe_device(dev);
> + struct xe_file *xef = to_xe_file(file);
> DEFINE_WAIT_FUNC(w_wait, woken_wake_function);
> struct drm_xe_wait_user_fence *args = data;
> - struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
> - struct drm_xe_engine_class_instance __user *user_eci =
> - u64_to_user_ptr(args->instances);
> + struct xe_exec_queue *q = NULL;
> u64 addr = args->addr;
> int err;
> - bool no_engines = args->flags & DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP;
> long timeout;
> ktime_t start;
>
> @@ -151,35 +129,17 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
> if (XE_IOCTL_DBG(xe, args->op > MAX_OP))
> return -EINVAL;
>
> - if (XE_IOCTL_DBG(xe, no_engines &&
> - (args->num_engines || args->instances)))
> - return -EINVAL;
> -
> - if (XE_IOCTL_DBG(xe, !no_engines && !args->num_engines))
> - return -EINVAL;
> -
> if (XE_IOCTL_DBG(xe, addr & 0x7))
> return -EINVAL;
>
> - if (XE_IOCTL_DBG(xe, args->num_engines > XE_HW_ENGINE_MAX_INSTANCE))
> - return -EINVAL;
> -
> - if (!no_engines) {
> - err = copy_from_user(eci, user_eci,
> - sizeof(struct drm_xe_engine_class_instance) *
> - args->num_engines);
> - if (XE_IOCTL_DBG(xe, err))
> - return -EFAULT;
> -
> - if (XE_IOCTL_DBG(xe, check_hw_engines(xe, eci,
> - args->num_engines)))
> - return -EINVAL;
> - }
> -
> timeout = to_jiffies_timeout(xe, args);
>
> start = ktime_get();
>
> + q = xe_exec_queue_lookup(xef, args->exec_queue_id);
> + if (XE_IOCTL_DBG(xe, !q))
> + return -ENOENT;
> +
> /*
> * FIXME: Very simple implementation at the moment, single wait queue
> * for everything. Could be optimized to have a wait queue for every
> @@ -202,6 +162,12 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
> break;
> }
>
> + if (q->ops->reset_status(q)) {
> + drm_info(&xe->drm, "exec gueue reset detected\n");
> + err = -EIO;
> + break;
> + }
> +
> timeout = wait_woken(&w_wait, TASK_INTERRUPTIBLE, timeout);
> }
> remove_wait_queue(&xe->ufence_wq, &w_wait);
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 88f3aca02b08..ae2457dc30ad 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -962,17 +962,8 @@ struct drm_xe_wait_user_fence {
> */
> __s64 timeout;
>
> - /**
> - * @num_engines: number of engine instances to wait on, must be zero
> - * when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
> - */
> - __u64 num_engines;
> -
> - /**
> - * @instances: user pointer to array of drm_xe_engine_class_instance to
> - * wait on, must be NULL when DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP set
> - */
> - __u64 instances;
> + /** @exec_queue_id: exec_queue_id returned from xe_exec_queue_create_ioctl */
> + __u32 exec_queue_id;
>
> /** @reserved: Reserved */
> __u64 reserved[2];
> --
> 2.25.1
>
next prev parent reply other threads:[~2023-11-28 21:25 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-22 4:21 [Intel-xe] [PATCH v2] RFC: drm/xe: Return correct error code for xe_wait_user_fence_ioctl Bommu Krishnaiah
2023-11-22 8:23 ` [Intel-xe] ✓ CI.Patch_applied: success for RFC: drm/xe: Return correct error code for xe_wait_user_fence_ioctl (rev2) Patchwork
2023-11-22 8:24 ` [Intel-xe] ✗ CI.checkpatch: warning " Patchwork
2023-11-22 8:25 ` [Intel-xe] ✓ CI.KUnit: success " Patchwork
2023-11-22 8:32 ` [Intel-xe] ✓ CI.Build: " Patchwork
2023-11-22 8:32 ` [Intel-xe] ✗ CI.Hooks: failure " Patchwork
2023-11-22 8:34 ` [Intel-xe] ✓ CI.checksparse: success " Patchwork
2023-11-22 9:12 ` [Intel-xe] ✗ CI.BAT: failure " Patchwork
2023-11-24 18:18 ` [Intel-xe] [PATCH v2] RFC: drm/xe: Return correct error code for xe_wait_user_fence_ioctl Zeng, Oak
2023-11-28 9:30 ` Bommu, Krishnaiah
2023-11-28 21:25 ` Rodrigo Vivi [this message]
2023-11-29 9:11 ` Matthew Brost
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZWZaukdzs0chi5LK@intel.com \
--to=rodrigo.vivi@intel.com \
--cc=francois.dugast@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=krishnaiah.bommu@intel.com \
--cc=matthew.brost@intel.com \
--cc=oak.zeng@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.