Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Dixit, Ashutosh" <ashutosh.dixit@intel.com>
To: Matthew Brost <matthew.brost@intel.com>
Cc: intel-xe@lists.freedesktop.org
Subject: Re: [PATCH 4/7] drm/xe/oa: Signal output fences
Date: Tue, 17 Sep 2024 15:18:03 -0700	[thread overview]
Message-ID: <85ed5h7wmc.wl-ashutosh.dixit@intel.com> (raw)
In-Reply-To: <20240830221618.2103948-5-ashutosh.dixit@intel.com>

On Fri, 30 Aug 2024 15:16:15 -0700, Ashutosh Dixit wrote:
>

Hi Matt,

> Introduce 'struct xe_oa_fence' which includes the dma_fence used to signal
> output fences in the xe_sync array. The fences are signaled
> asynchronously. When there are no output fences to signal, the OA
> configuration wait is synchronously re-introduced into the ioctl.
>
> v2: Don't wait in the work, use callback + delayed work (Matt B)
>     Use a single, not a per-fence spinlock (Matt Brost)

Want to check with you about this "single, not per-fence spinlock" issue
below.

> v3: Move ofence alloc before job submission (Matt)
>     Assert, don't fail, from dma_fence_add_callback (Matt)
>     Additional dma_fence_get for dma_fence_wait (Matt)
>     Change dma_fence_wait to non-interruptible (Matt)
> v4: Introduce last_fence to prevent uaf if stream is closed with
>     pending OA config jobs
>
> Suggested-by: Matthew Brost <matthew.brost@intel.com>
> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_oa.c       | 117 ++++++++++++++++++++++++++-----
>  drivers/gpu/drm/xe/xe_oa_types.h |   6 ++
>  2 files changed, 106 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index b4b68019d35b7..56f95fd431bf9 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -100,6 +100,15 @@ struct xe_oa_config_bo {
>	struct xe_bb *bb;
>  };
>
> +struct xe_oa_fence {
> +	/* @base: dma fence base */
> +	struct dma_fence base;
> +	/* @work: work to signal @base */
> +	struct delayed_work work;
> +	/* @cb: callback to schedule @work */
> +	struct dma_fence_cb cb;
> +};
> +
>  #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
>
>  static const struct xe_oa_format oa_formats[] = {
> @@ -172,10 +181,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
>	return oa_config;
>  }
>
> -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
> +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
>  {
>	xe_oa_config_put(oa_bo->oa_config);
> -	xe_bb_free(oa_bo->bb, NULL);
> +	xe_bb_free(oa_bo->bb, last_fence);
>	kfree(oa_bo);
>  }
>
> @@ -648,7 +657,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
>
>	xe_oa_config_put(stream->oa_config);
>	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
> -		free_oa_config_bo(oa_bo);
> +		free_oa_config_bo(oa_bo, stream->last_fence);
> +	dma_fence_put(stream->last_fence);
>  }
>
>  static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
> @@ -945,40 +955,112 @@ xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_c
>	return oa_bo;
>  }
>
> +static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
> +{
> +	dma_fence_put(stream->last_fence);
> +	stream->last_fence = dma_fence_get(fence);
> +}
> +
> +static void xe_oa_fence_work_fn(struct work_struct *w)
> +{
> +	struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
> +
> +	/* Signal fence to indicate new OA configuration is active */
> +	dma_fence_signal(&ofence->base);
> +	dma_fence_put(&ofence->base);
> +}
> +
> +static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> +{
> +	/* Additional empirical delay needed for NOA programming after registers are written */
> +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
> +
> +	struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
> +
> +	INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
> +	queue_delayed_work(system_unbound_wq, &ofence->work,
> +			   usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
> +	dma_fence_put(fence);
> +}
> +
> +static const char *xe_oa_get_driver_name(struct dma_fence *fence)
> +{
> +	return "xe_oa";
> +}
> +
> +static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
> +{
> +	return "unbound";
> +}
> +
> +static const struct dma_fence_ops xe_oa_fence_ops = {
> +	.get_driver_name = xe_oa_get_driver_name,
> +	.get_timeline_name = xe_oa_get_timeline_name,
> +};
> +
>  static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
>  {
>  #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
>	struct xe_oa_config_bo *oa_bo;
> -	int err = 0, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
> +	struct xe_oa_fence *ofence;
> +	int i, err, num_signal = 0;
>	struct dma_fence *fence;
> -	long timeout;
>
> -	/* Emit OA configuration batch */
> +	ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
> +	if (!ofence) {
> +		err = -ENOMEM;
> +		goto exit;
> +	}
> +
>	oa_bo = xe_oa_alloc_config_buffer(stream, config);
>	if (IS_ERR(oa_bo)) {
>		err = PTR_ERR(oa_bo);
>		goto exit;
>	}
>
> +	/* Emit OA configuration batch */
>	fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
>	if (IS_ERR(fence)) {
>		err = PTR_ERR(fence);
>		goto exit;
>	}
>
> -	/* Wait till all previous batches have executed */
> -	timeout = dma_fence_wait_timeout(fence, false, 5 * HZ);
> -	dma_fence_put(fence);
> -	if (timeout < 0)
> -		err = timeout;
> -	else if (!timeout)
> -		err = -ETIME;
> -	if (err)
> -		drm_dbg(&stream->oa->xe->drm, "dma_fence_wait_timeout err %d\n", err);
> +	/* Point of no return: initialize and set fence to signal */
> +	dma_fence_init(&ofence->base, &xe_oa_fence_ops, &stream->oa_fence_lock, 0, 0);

About stream->oa_fence_lock. This was added in response to this comment
from you in v1 of this patch:

	/* No per fence spin lock, global OA one for user_signal */

here:
	https://patchwork.freedesktop.org/patch/607602/?series=137058&rev=1#comment_1104327

However, I just realized that, the way it is implemented, there is a uaf
here: lock memory can be freed before the lock is accessed in
dma_fence_signal() in xe_oa_fence_work_fn() (when the oa stream is
destroyed).

Of course we can implement some reference counting etc. to solve the uaf,
but the simplest way to solve it seems to be to go back to the per-fence
lock, unless doing so is incorrect? Looking at dma_fence_init calls, I do
see per fence locks being used occasionally (xe_preempt_fence, vgem_fence
etc.).

So just want to check with you if it's ok to go back to the per fence lock,
or we should solve it some other way.

Thanks.
--
Ashutosh

>
> -	/* Additional empirical delay needed for NOA programming after registers are written */
> -	usleep_range(us, 2 * us);
> +	for (i = 0; i < stream->num_syncs; i++) {
> +		if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
> +			num_signal++;
> +		xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
> +	}
> +
> +	/* Additional dma_fence_get in case we dma_fence_wait */
> +	if (!num_signal)
> +		dma_fence_get(&ofence->base);
> +
> +	/* Update last fence too before adding callback */
> +	xe_oa_update_last_fence(stream, fence);
> +
> +	/* Add job fence callback to schedule work to signal ofence->base */
> +	err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
> +	xe_gt_assert(stream->gt, !err || err == -ENOENT);
> +	if (err == -ENOENT)
> +		xe_oa_config_cb(fence, &ofence->cb);
> +
> +	/* If nothing needs to be signaled we wait synchronously */
> +	if (!num_signal) {
> +		dma_fence_wait(&ofence->base, false);
> +		dma_fence_put(&ofence->base);
> +	}
> +
> +	/* Done with syncs */
> +	for (i = 0; i < stream->num_syncs; i++)
> +		xe_sync_entry_cleanup(&stream->syncs[i]);
> +	kfree(stream->syncs);
> +
> +	return 0;
>  exit:
> +	kfree(ofence);
>	return err;
>  }
>
> @@ -1479,6 +1561,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
>		goto err_free_oa_buf;
>	}
>
> +	spin_lock_init(&stream->oa_fence_lock);
>	ret = xe_oa_enable_metric_set(stream);
>	if (ret) {
>		drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n");
> diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> index 99f4b2d4bdcf6..935f98772a948 100644
> --- a/drivers/gpu/drm/xe/xe_oa_types.h
> +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> @@ -239,6 +239,12 @@ struct xe_oa_stream {
>	/** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */
>	u32 no_preempt;
>
> +	/** @last_fence: fence to use in stream destroy when needed */
> +	struct dma_fence *last_fence;
> +
> +	/** @oa_fence_lock: Lock for struct xe_oa_fence */
> +	spinlock_t oa_fence_lock;
> +
>	/** @num_syncs: size of @syncs array */
>	u32 num_syncs;
>
> --
> 2.41.0
>

  parent reply	other threads:[~2024-09-17 22:18 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-30 22:16 [PATCH v5 0/7] drm/xe/oa: xe_syncs for OA Ashutosh Dixit
2024-08-30 22:16 ` [PATCH 1/7] drm/xe/oa: Separate batch submission from waiting for completion Ashutosh Dixit
2024-08-30 22:16 ` [PATCH 2/7] drm/xe/oa/uapi: Define and parse OA sync properties Ashutosh Dixit
2024-08-30 22:16 ` [PATCH 3/7] drm/xe/oa: Add input fence dependencies Ashutosh Dixit
2024-09-18 11:59   ` Souza, Jose
2024-09-18 19:56     ` Dixit, Ashutosh
2024-08-30 22:16 ` [PATCH 4/7] drm/xe/oa: Signal output fences Ashutosh Dixit
2024-08-30 22:15   ` Dixit, Ashutosh
2024-08-30 22:45   ` Matthew Brost
2024-08-30 22:58     ` Dixit, Ashutosh
2024-09-17 22:18   ` Dixit, Ashutosh [this message]
2024-09-17 23:38     ` Matthew Brost
2024-09-18 19:59       ` Dixit, Ashutosh
2024-08-30 22:16 ` [PATCH 5/7] drm/xe/oa: Move functions up so they can be reused for config ioctl Ashutosh Dixit
2024-08-30 22:16 ` [PATCH 6/7] drm/xe/oa: Add syncs support to OA " Ashutosh Dixit
2024-08-30 22:16 ` [PATCH 7/7] drm/xe/oa: Allow only certain property changes from config Ashutosh Dixit
2024-08-30 22:21 ` ✓ CI.Patch_applied: success for drm/xe/oa: xe_syncs for OA (rev5) Patchwork
2024-08-30 22:22 ` ✓ CI.checkpatch: " Patchwork
2024-08-30 22:23 ` ✓ CI.KUnit: " Patchwork
2024-08-30 22:35 ` ✓ CI.Build: " Patchwork
2024-08-30 22:37 ` ✓ CI.Hooks: " Patchwork
2024-08-30 22:38 ` ✓ CI.checksparse: " Patchwork
2024-08-30 22:58 ` ✓ CI.BAT: " Patchwork
2024-08-31 11:25 ` ✗ CI.FULL: failure " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2024-10-22 20:03 [PATCH v7 0/7] drm/xe/oa: xe_syncs for OA Ashutosh Dixit
2024-10-22 20:03 ` [PATCH 4/7] drm/xe/oa: Signal output fences Ashutosh Dixit
2024-09-18 19:53 [PATCH v6 0/7] drm/xe/oa: xe_syncs for OA Ashutosh Dixit
2024-09-18 19:53 ` [PATCH 4/7] drm/xe/oa: Signal output fences Ashutosh Dixit
2024-08-28  1:50 [PATCH v4 0/7] drm/xe/oa: xe_syncs for OA Ashutosh Dixit
2024-08-28  1:50 ` [PATCH 4/7] drm/xe/oa: Signal output fences Ashutosh Dixit
2024-08-21 15:28 [PATCH v3 0/7] drm/xe/oa: xe_syncs for OA Ashutosh Dixit
2024-08-21 15:28 ` [PATCH 4/7] drm/xe/oa: Signal output fences Ashutosh Dixit
2024-08-21 15:49   ` Matthew Brost
2024-08-20  0:58 [PATCH v2 0/7] drm/xe/oa: xe_syncs for OA Ashutosh Dixit
2024-08-20  0:58 ` [PATCH 4/7] drm/xe/oa: Signal output fences Ashutosh Dixit
2024-08-20 19:23   ` Matthew Brost
2024-08-21 15:20     ` Dixit, Ashutosh
2024-08-21 16:02       ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=85ed5h7wmc.wl-ashutosh.dixit@intel.com \
    --to=ashutosh.dixit@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox