From: Sultan Alsawaf <sultan@kerneltoast.com>
To: unlisted-recipients:; (no To-header on input)
Cc: Sultan Alsawaf <sultan@kerneltoast.com>,
stable@vger.kernel.org, Jani Nikula <jani.nikula@linux.intel.com>,
Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
Rodrigo Vivi <rodrigo.vivi@intel.com>,
David Airlie <airlied@linux.ie>, Daniel Vetter <daniel@ffwll.ch>,
Chris Wilson <chris@chris-wilson.co.uk>,
Matthew Auld <matthew.auld@intel.com>,
intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
linux-kernel@vger.kernel.org
Subject: [PATCH v4] drm/i915: Synchronize active and retire callbacks
Date: Mon, 6 Apr 2020 23:40:06 -0700 [thread overview]
Message-ID: <20200407064007.7599-1-sultan@kerneltoast.com> (raw)
In-Reply-To: <20200404024156.GA10382@sultan-box.localdomain>
From: Sultan Alsawaf <sultan@kerneltoast.com>
Active and retire callbacks can run simultaneously, causing panics and
mayhem. The most notable case is with the intel_context_pin/unpin race
that causes ring and page table corruption. In 5.4, this race is more
noticeable because intel_ring_unpin() sets ring->vaddr to NULL and
causes a clean NULL-pointer-dereference panic, but in newer kernels this
race goes unnoticed.
Here is an example of a crash caused by this race on 5.4:
BUG: unable to handle page fault for address: 0000000000003448
RIP: 0010:gen8_emit_flush_render+0x163/0x190
Call Trace:
execlists_request_alloc+0x25/0x40
__i915_request_create+0x1f4/0x2c0
i915_request_create+0x71/0xc0
i915_gem_do_execbuffer+0xb98/0x1a80
? preempt_count_add+0x68/0xa0
? _raw_spin_lock+0x13/0x30
? _raw_spin_unlock+0x16/0x30
i915_gem_execbuffer2_ioctl+0x1de/0x3c0
? i915_gem_busy_ioctl+0x7f/0x1d0
? i915_gem_execbuffer_ioctl+0x2d0/0x2d0
drm_ioctl_kernel+0xb2/0x100
drm_ioctl+0x209/0x360
? i915_gem_execbuffer_ioctl+0x2d0/0x2d0
ksys_ioctl+0x87/0xc0
__x64_sys_ioctl+0x16/0x20
do_syscall_64+0x4e/0x150
entry_SYSCALL_64_after_hwframe+0x44/0xa9
Protect the active and retire callbacks with their own lock to prevent
them from running at the same time as one another.
Fixes: 12c255b5dad1 ("drm/i915: Provide an i915_active.acquire callback")
Cc: <stable@vger.kernel.org>
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
drivers/gpu/drm/i915/i915_active.c | 52 ++++++++++++++++++++----
drivers/gpu/drm/i915/i915_active.h | 10 ++---
drivers/gpu/drm/i915/i915_active_types.h | 2 +
3 files changed, 50 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b0a499753526..5802233f71ec 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -147,8 +147,22 @@ __active_retire(struct i915_active *ref)
spin_unlock_irqrestore(&ref->tree_lock, flags);
/* After the final retire, the entire struct may be freed */
- if (ref->retire)
- ref->retire(ref);
+ if (ref->retire) {
+ if (ref->active) {
+ bool freed = false;
+
+ /* Don't race with the active callback, and avoid UaF */
+ down_write(&ref->rwsem);
+ ref->freed = &freed;
+ ref->retire(ref);
+ if (!freed) {
+ ref->freed = NULL;
+ up_write(&ref->rwsem);
+ }
+ } else {
+ ref->retire(ref);
+ }
+ }
/* ... except if you wait on it, you must manage your own references! */
wake_up_var(ref);
@@ -278,7 +292,8 @@ void __i915_active_init(struct i915_active *ref,
int (*active)(struct i915_active *ref),
void (*retire)(struct i915_active *ref),
struct lock_class_key *mkey,
- struct lock_class_key *wkey)
+ struct lock_class_key *wkey,
+ struct lock_class_key *rkey)
{
unsigned long bits;
@@ -287,8 +302,13 @@ void __i915_active_init(struct i915_active *ref,
ref->flags = 0;
ref->active = active;
ref->retire = ptr_unpack_bits(retire, &bits, 2);
- if (bits & I915_ACTIVE_MAY_SLEEP)
+ ref->freed = NULL;
+ if (ref->active && ref->retire) {
+ __init_rwsem(&ref->rwsem, "i915_active.rwsem", rkey);
ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+ } else if (bits & I915_ACTIVE_MAY_SLEEP) {
+ ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+ }
spin_lock_init(&ref->tree_lock);
ref->tree = RB_ROOT;
@@ -417,8 +437,20 @@ int i915_active_acquire(struct i915_active *ref)
return err;
if (likely(!i915_active_acquire_if_busy(ref))) {
- if (ref->active)
- err = ref->active(ref);
+ if (ref->active) {
+ if (ref->retire) {
+ /*
+ * This can be a recursive call, and the mutex
+ * above already protects from concurrent active
+ * callbacks, so a read lock fits best.
+ */
+ down_read(&ref->rwsem);
+ err = ref->active(ref);
+ up_read(&ref->rwsem);
+ } else {
+ err = ref->active(ref);
+ }
+ }
if (!err) {
spin_lock_irq(&ref->tree_lock); /* __active_retire() */
debug_active_activate(ref);
@@ -502,16 +534,20 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
return err;
}
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref)
{
+ if (ref->freed) {
+ *ref->freed = true;
+ up_write(&ref->rwsem);
+ }
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
debug_active_fini(ref);
GEM_BUG_ON(atomic_read(&ref->count));
GEM_BUG_ON(work_pending(&ref->work));
GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
mutex_destroy(&ref->mutex);
-}
#endif
+}
static inline bool is_idle_barrier(struct active_node *node, u64 idx)
{
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 51e1e854ca55..b684b1fdcc02 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -153,14 +153,16 @@ void __i915_active_init(struct i915_active *ref,
int (*active)(struct i915_active *ref),
void (*retire)(struct i915_active *ref),
struct lock_class_key *mkey,
- struct lock_class_key *wkey);
+ struct lock_class_key *wkey,
+ struct lock_class_key *rkey);
/* Specialise each class of i915_active to avoid impossible lockdep cycles. */
#define i915_active_init(ref, active, retire) do { \
static struct lock_class_key __mkey; \
static struct lock_class_key __wkey; \
+ static struct lock_class_key __rkey; \
\
- __i915_active_init(ref, active, retire, &__mkey, &__wkey); \
+ __i915_active_init(ref, active, retire, &__mkey, &__wkey, &__rkey); \
} while (0)
int i915_active_ref(struct i915_active *ref,
@@ -200,11 +202,7 @@ i915_active_is_idle(const struct i915_active *ref)
return !atomic_read(&ref->count);
}
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref);
-#else
-static inline void i915_active_fini(struct i915_active *ref) { }
-#endif
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 6360c3e4b765..aaee2548cb19 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -32,6 +32,8 @@ struct active_node;
struct i915_active {
atomic_t count;
struct mutex mutex;
+ struct rw_semaphore rwsem;
+ bool *freed;
spinlock_t tree_lock;
struct active_node *cache;
--
2.26.0
next prev parent reply other threads:[~2020-04-07 6:40 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-03 1:13 [PATCH] drm/i915: Fix use-after-free due to intel_context_pin/unpin race Sultan Alsawaf
2020-04-03 4:29 ` [PATCH v2] " Sultan Alsawaf
2020-04-03 22:35 ` [PATCH v3] drm/i915: Synchronize active and retire callbacks Sultan Alsawaf
2020-04-04 2:41 ` Sultan Alsawaf
2020-04-04 2:41 ` Sultan Alsawaf
2020-04-04 2:41 ` [Intel-gfx] " Sultan Alsawaf
2020-04-07 6:40 ` Sultan Alsawaf [this message]
2020-04-14 6:13 ` [PATCH v4] " Sultan Alsawaf
2020-04-14 6:13 ` Sultan Alsawaf
2020-04-14 6:13 ` [Intel-gfx] " Sultan Alsawaf
2020-04-14 8:23 ` Chris Wilson
2020-04-14 8:23 ` Chris Wilson
2020-04-14 8:23 ` [Intel-gfx] " Chris Wilson
2020-04-14 14:43 ` Sultan Alsawaf
2020-04-14 14:43 ` Sultan Alsawaf
2020-04-14 14:43 ` [Intel-gfx] " Sultan Alsawaf
2020-04-20 5:24 ` Sultan Alsawaf
2020-04-20 5:24 ` Sultan Alsawaf
2020-04-20 5:24 ` [Intel-gfx] " Sultan Alsawaf
2020-04-20 8:21 ` Joonas Lahtinen
2020-04-20 8:21 ` Joonas Lahtinen
2020-04-20 8:21 ` [Intel-gfx] " Joonas Lahtinen
2020-04-20 16:15 ` Sultan Alsawaf
2020-04-20 16:15 ` Sultan Alsawaf
2020-04-20 16:15 ` [Intel-gfx] " Sultan Alsawaf
2020-04-21 6:51 ` Joonas Lahtinen
2020-04-21 6:51 ` Joonas Lahtinen
2020-04-21 6:51 ` [Intel-gfx] " Joonas Lahtinen
2020-04-21 15:54 ` Sultan Alsawaf
2020-04-21 15:54 ` Sultan Alsawaf
2020-04-21 15:54 ` [Intel-gfx] " Sultan Alsawaf
2020-04-15 0:30 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2020-04-15 0:47 ` [Intel-gfx] ✗ Fi.CI.DOCS: " Patchwork
2020-04-15 0:50 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-04-15 3:29 ` [drm/i915] 6dc0b234a6: BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/mutex.c kernel test robot
2020-04-15 3:29 ` kernel test robot
2020-04-15 3:29 ` [Intel-gfx] " kernel test robot
2020-04-15 3:29 ` kernel test robot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200407064007.7599-1-sultan@kerneltoast.com \
--to=sultan@kerneltoast.com \
--cc=airlied@linux.ie \
--cc=chris@chris-wilson.co.uk \
--cc=daniel@ffwll.ch \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=jani.nikula@linux.intel.com \
--cc=joonas.lahtinen@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=matthew.auld@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.