Re: [RFC 20/20] drm/xe: Mega Kill of mem_access

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
To: Matthew Auld <matthew.auld@intel.com>
Cc: intel-xe@lists.freedesktop.org
Subject: Re: [RFC 20/20] drm/xe: Mega Kill of mem_access
Date: Tue, 9 Jan 2024 17:34:10 -0500	[thread overview]
Message-ID: <ZZ3J4gIZNpOIEzH7@intel.com> (raw)
In-Reply-To: <c91759f7-5115-4172-bba0-72ea0263abfc@intel.com>

On Tue, Jan 09, 2024 at 06:27:13PM +0000, Matthew Auld wrote:
> On 09/01/2024 17:39, Rodrigo Vivi wrote:
> > On Tue, Jan 09, 2024 at 11:41:35AM +0000, Matthew Auld wrote:
> > > On 28/12/2023 02:12, Rodrigo Vivi wrote:
> > > > All of these remaining cases should already be protected
> > > > by the outer bound calls of runtime_pm
> > > > 
> > > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > > > ---
> > > >    drivers/gpu/drm/xe/display/xe_fb_pin.c |  7 +--
> > > >    drivers/gpu/drm/xe/tests/xe_bo.c       |  8 ----
> > > >    drivers/gpu/drm/xe/tests/xe_mocs.c     |  4 --
> > > >    drivers/gpu/drm/xe/xe_bo.c             |  5 ---
> > > >    drivers/gpu/drm/xe/xe_device.c         | 59 --------------------------
> > > >    drivers/gpu/drm/xe/xe_device.h         |  7 ---
> > > >    drivers/gpu/drm/xe/xe_device_types.h   |  9 ----
> > > >    drivers/gpu/drm/xe/xe_ggtt.c           |  6 ---
> > > >    drivers/gpu/drm/xe/xe_gsc.c            |  3 --
> > > >    drivers/gpu/drm/xe/xe_gt.c             | 17 --------
> > > >    drivers/gpu/drm/xe/xe_huc_debugfs.c    |  2 -
> > > >    drivers/gpu/drm/xe/xe_pat.c            | 10 -----
> > > >    drivers/gpu/drm/xe/xe_pm.c             | 27 ------------
> > > >    drivers/gpu/drm/xe/xe_query.c          |  4 --
> > > >    drivers/gpu/drm/xe/xe_tile.c           | 10 ++---
> > > >    drivers/gpu/drm/xe/xe_vm.c             |  7 ---
> > > >    16 files changed, 5 insertions(+), 180 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > > > index 722c84a566073..077294ec50ece 100644
> > > > --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > > > +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
> > > > @@ -190,10 +190,9 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
> > > >    	/* TODO: Consider sharing framebuffer mapping?
> > > >    	 * embed i915_vma inside intel_framebuffer
> > > >    	 */
> > > > -	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
> > > >    	ret = mutex_lock_interruptible(&ggtt->lock);
> > > >    	if (ret)
> > > > -		goto out;
> > > > +		return ret;
> > > >    	align = XE_PAGE_SIZE;
> > > >    	if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
> > > > @@ -241,8 +240,6 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
> > > >    	xe_ggtt_invalidate(ggtt);
> > > >    out_unlock:
> > > >    	mutex_unlock(&ggtt->lock);
> > > > -out:
> > > > -	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
> > > >    	return ret;
> > > >    }
> > > > @@ -381,4 +378,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
> > > >    void intel_dpt_destroy(struct i915_address_space *vm)
> > > >    {
> > > >    	return;
> > > > -}
> > > > \ No newline at end of file
> > > > +}
> > > > diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
> > > > index 412b2e7ce40cb..97b10e597f0ad 100644
> > > > --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> > > > +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> > > > @@ -164,8 +164,6 @@ static int ccs_test_run_device(struct xe_device *xe)
> > > >    		return 0;
> > > >    	}
> > > > -	xe_device_mem_access_get(xe);
> > > > -
> > > >    	for_each_tile(tile, xe, id) {
> > > >    		/* For igfx run only for primary tile */
> > > >    		if (!IS_DGFX(xe) && id > 0)
> > > > @@ -173,8 +171,6 @@ static int ccs_test_run_device(struct xe_device *xe)
> > > >    		ccs_test_run_tile(xe, tile, test);
> > > >    	}
> > > > -	xe_device_mem_access_put(xe);
> > > > -
> > > >    	return 0;
> > > >    }
> > > > @@ -336,13 +332,9 @@ static int evict_test_run_device(struct xe_device *xe)
> > > >    		return 0;
> > > >    	}
> > > > -	xe_device_mem_access_get(xe);
> > > > -
> > > >    	for_each_tile(tile, xe, id)
> > > >    		evict_test_run_tile(xe, tile, test);
> > > > -	xe_device_mem_access_put(xe);
> > > > -
> > > >    	return 0;
> > > >    }
> > > > diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
> > > > index 7dd34f94e8094..a12e7e2bb5861 100644
> > > > --- a/drivers/gpu/drm/xe/tests/xe_mocs.c
> > > > +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
> > > > @@ -45,7 +45,6 @@ static void read_l3cc_table(struct xe_gt *gt,
> > > >    	struct kunit *test = xe_cur_kunit();
> > > > -	xe_device_mem_access_get(gt_to_xe(gt));
> > > >    	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> > > >    	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
> > > >    	mocs_dbg(&gt_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries);
> > > > @@ -65,7 +64,6 @@ static void read_l3cc_table(struct xe_gt *gt,
> > > >    				   XELP_LNCFCMOCS(i).addr);
> > > >    	}
> > > >    	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> > > > -	xe_device_mem_access_put(gt_to_xe(gt));
> > > >    }
> > > >    static void read_mocs_table(struct xe_gt *gt,
> > > > @@ -80,7 +78,6 @@ static void read_mocs_table(struct xe_gt *gt,
> > > >    	struct kunit *test = xe_cur_kunit();
> > > > -	xe_device_mem_access_get(gt_to_xe(gt));
> > > >    	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> > > >    	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
> > > >    	mocs_dbg(&gt_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries);
> > > > @@ -100,7 +97,6 @@ static void read_mocs_table(struct xe_gt *gt,
> > > >    				   XELP_GLOBAL_MOCS(i).addr);
> > > >    	}
> > > >    	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> > > > -	xe_device_mem_access_put(gt_to_xe(gt));
> > > >    }
> > > >    static int mocs_kernel_test_run_device(struct xe_device *xe)
> > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> > > > index 8e4a3b1f6b938..056c65c2675d8 100644
> > > > --- a/drivers/gpu/drm/xe/xe_bo.c
> > > > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > > > @@ -715,7 +715,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
> > > >    	xe_assert(xe, migrate);
> > > >    	trace_xe_bo_move(bo);
> > > > -	xe_device_mem_access_get(xe);
> > > >    	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
> > > >    		/*
> > > > @@ -739,7 +738,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
> > > >    				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
> > > >    					ret = -EINVAL;
> > > > -					xe_device_mem_access_put(xe);
> > > >    					goto out;
> > > >    				}
> > > > @@ -757,7 +755,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
> > > >    						new_mem, handle_system_ccs);
> > > >    		if (IS_ERR(fence)) {
> > > >    			ret = PTR_ERR(fence);
> > > > -			xe_device_mem_access_put(xe);
> > > >    			goto out;
> > > >    		}
> > > >    		if (!move_lacks_source) {
> > > > @@ -782,8 +779,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
> > > >    		dma_fence_put(fence);
> > > >    	}
> > > > -	xe_device_mem_access_put(xe);
> > > > -
> > > >    out:
> > > >    	return ret;
> > > > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> > > > index c1c19264a58b4..cb08a4369bb9e 100644
> > > > --- a/drivers/gpu/drm/xe/xe_device.c
> > > > +++ b/drivers/gpu/drm/xe/xe_device.c
> > > > @@ -44,12 +44,6 @@
> > > >    #include "xe_wait_user_fence.h"
> > > >    #include "xe_hwmon.h"
> > > > -#ifdef CONFIG_LOCKDEP
> > > > -struct lockdep_map xe_device_mem_access_lockdep_map = {
> > > > -	.name = "xe_device_mem_access_lockdep_map"
> > > > -};
> > > > -#endif
> > > 
> > > Did you mean to drop this? IMO we should for sure keep the lockdep
> > > annotations. Otherwise it is going to be really hard to validate the locking
> > > design and have reasonable confidence that we don't have deadlocks lurking,
> > > or as new users come along sprinkling rpm get in the wrong place.
> > 
> > Well, the whole goal of this series is to actually avoid sprinkling RPM calls at all.
> 
> I mean new users are bound to appear, and they might add such calls in the
> wrong place. Lockdep would hopefully catch such things for us.

we should actually catch that during reviews and push back ;)
But I do see your point.

> 
> > We should only protect the outer bounds. I'm afraid that if we put this to the outer
> > bounds we would start getting false positives on this, no?!
> 
> What kind of false positives? With this series the sync rpm get should be
> the outermost thing for the most part, and so the locking dependences should
> be minimal. If we drop the annotations we get no help from lockdep to tell
> us if the rpm resume and suspend callbacks are grabbing locks that are
> already held when calling the sync rpm get.

yeap, you are right. I honestly didn't think so deeply there and was just
afraid of some inversions.

next prev parent reply	other threads:[~2024-01-09 22:34 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-28  2:12 [RFC 00/20] First attempt to kill mem_access Rodrigo Vivi
2023-12-28  2:12 ` [RFC 01/20] drm/xe: Document Xe PM component Rodrigo Vivi
2023-12-28  2:12 ` [RFC 02/20] drm/xe: Fix display runtime_pm handling Rodrigo Vivi
2023-12-28  2:12 ` [RFC 03/20] drm/xe: Create a xe_pm_runtime_resume_and_get variant for display Rodrigo Vivi
2023-12-28  2:12 ` [RFC 04/20] drm/xe: Convert xe_pm_runtime_{get, put} to void and protect from recursion Rodrigo Vivi
2023-12-28  2:12 ` [RFC 05/20] drm/xe: Prepare display for D3Cold Rodrigo Vivi
2023-12-28  2:12 ` [RFC 06/20] drm/xe: Convert mem_access assertion towards the runtime_pm state Rodrigo Vivi
2024-01-09 11:06   ` Matthew Auld
2024-01-09 17:50     ` Rodrigo Vivi
2023-12-28  2:12 ` [RFC 07/20] drm/xe: Runtime PM wake on every IOCTL Rodrigo Vivi
2024-01-02 11:30   ` Gupta, Anshuman
2024-01-09 17:57     ` Rodrigo Vivi
2023-12-28  2:12 ` [RFC 08/20] drm/xe: Runtime PM wake on every exec Rodrigo Vivi
2024-01-09 11:24   ` Matthew Auld
2024-01-09 17:41     ` Rodrigo Vivi
2024-01-09 18:40       ` Matthew Auld
2023-12-28  2:12 ` [RFC 09/20] drm/xe: Runtime PM wake on every sysfs call Rodrigo Vivi
2023-12-28  2:12 ` [RFC 10/20] drm/xe: Sort some xe_pm_runtime related functions Rodrigo Vivi
2024-01-09 11:26   ` Matthew Auld
2023-12-28  2:12 ` [RFC 11/20] drm/xe: Ensure device is awake before removing it Rodrigo Vivi
2023-12-28  2:12 ` [RFC 12/20] drm/xe: Remove mem_access from guc_pc calls Rodrigo Vivi
2023-12-28  2:12 ` [RFC 13/20] drm/xe: Runtime PM wake on every debugfs call Rodrigo Vivi
2023-12-28  2:12 ` [RFC 14/20] drm/xe: Replace dma_buf mem_access per direct xe_pm_runtime calls Rodrigo Vivi
2023-12-28  2:12 ` [RFC 15/20] drm/xe: Allow GuC CT fast path and worker regardless of runtime_pm Rodrigo Vivi
2024-01-09 12:09   ` Matthew Auld
2023-12-28  2:12 ` [RFC 16/20] drm/xe: Remove mem_access calls from migration Rodrigo Vivi
2024-01-09 12:33   ` Matthew Auld
2024-01-09 17:58     ` Rodrigo Vivi
2024-01-09 18:49       ` Matthew Auld
2024-01-09 22:40         ` Rodrigo Vivi
2024-01-11 14:17           ` Matthew Brost
2023-12-28  2:12 ` [RFC 17/20] drm/xe: Removing extra mem_access protection from runtime pm Rodrigo Vivi
2023-12-28  2:12 ` [RFC 18/20] drm/xe: Convert hwmon from mem_access to xe_pm_runtime calls Rodrigo Vivi
2023-12-28  2:12 ` [RFC 19/20] drm/xe: Remove unused runtime pm helper Rodrigo Vivi
2023-12-28  2:12 ` [RFC 20/20] drm/xe: Mega Kill of mem_access Rodrigo Vivi
2024-01-09 11:41   ` Matthew Auld
2024-01-09 17:39     ` Rodrigo Vivi
2024-01-09 18:27       ` Matthew Auld
2024-01-09 22:34         ` Rodrigo Vivi [this message]
2024-01-04  5:40 ` ✓ CI.Patch_applied: success for First attempt to kill mem_access Patchwork
2024-01-04  5:40 ` ✗ CI.checkpatch: warning " Patchwork
2024-01-04  5:41 ` ✗ CI.KUnit: failure " Patchwork
2024-01-10  5:21 ` [RFC 00/20] " Matthew Brost
2024-01-10 14:06   ` Rodrigo Vivi
2024-01-10 14:08     ` Vivi, Rodrigo
2024-01-10 14:33     ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZZ3J4gIZNpOIEzH7@intel.com \
    --to=rodrigo.vivi@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox