All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Qing Wang <wangqing7171@gmail.com>
Cc: acme@kernel.org, adrian.hunter@intel.com,
	alexander.shishkin@linux.intel.com, irogers@google.com,
	james.clark@linaro.org, jolsa@kernel.org,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	mark.rutland@arm.com, mingo@redhat.com, namhyung@kernel.org,
	syzbot+196a82fd904572696b3c@syzkaller.appspotmail.com,
	yuhaocheng035@gmail.com
Subject: Re: [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Date: Thu, 26 Mar 2026 12:28:21 +0100	[thread overview]
Message-ID: <20260326112821.GK3738786@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20260326031806.876931-1-wangqing7171@gmail.com>

On Thu, Mar 26, 2026 at 11:18:06AM +0800, Qing Wang wrote:
> On Wed, 25 Mar 2026 at 23:17, Peter Zijlstra <peterz@infradead.org> wrote:
> > Argh,. why is this hidden in this old thread :/
> > 
> > On Wed, Mar 25, 2026 at 06:20:53PM +0800, yuhaocheng035@gmail.com wrote:
> > 
> > > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > > index 2c35acc2722b..a3228c587de1 100644
> > > --- a/kernel/events/core.c
> > > +++ b/kernel/events/core.c
> > > @@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
> > >   * the buffer here, where we still have a VM context. This means we need
> > >   * to detach all events redirecting to us.
> > >   */
> > > -static void perf_mmap_close(struct vm_area_struct *vma)
> > > +static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
> > > +			      bool holds_event_mmap_lock)
> > >  {
> > > -	struct perf_event *event = vma->vm_file->private_data;
> > > +	struct perf_event *iter_event;
> > >  	mapped_f unmapped = get_mapped(event, event_unmapped);
> > >  	struct perf_buffer *rb = ring_buffer_get(event);
> > >  	struct user_struct *mmap_user = rb->mmap_user;
> > > @@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
> > >  	if (refcount_dec_and_test(&rb->mmap_count))
> > >  		detach_rest = true;
> > >  
> > > -	if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
> > > +	if ((!holds_event_mmap_lock &&
> > > +	     !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
> > > +	    (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
> > >  		goto out_put;
> > 
> > *groan*, this is horrible.
> > 
> > Let me have a poke to see if there isn't a saner variant around.
> 
> I think it's ok to move perf_mmap_close() outside the mutex lock, like this:
> 
> https://lore.kernel.org/all/20260325153240.GK3739106@noisy.programming.kicks-ass.net/T/#m0f82e8ecdfdfce4acd5121bcb799e864cf05ebf9
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 1f5699b339ec..e5ce03ce926d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7485,9 +7485,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
>  		 */
>  		ret = map_range(event->rb, vma);
>  		if (ret)
> -			perf_mmap_close(vma);
> +			goto out_close;
>  	}
> +	return 0;
>  
> +out_close:
> +	perf_mmap_close(vma);
>  	return ret;
>  }
> 
> How do you think?

Well, that will just re-introduce the original problem. As you were told
there.

What about something like this?

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1f5699b339ec..0bb1d8b83bc9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7010,6 +7010,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 }
 
 static void perf_pmu_output_stop(struct perf_event *event);
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
 
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
@@ -7025,8 +7026,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	mapped_f unmapped = get_mapped(event, event_unmapped);
 	struct perf_buffer *rb = ring_buffer_get(event);
 	struct user_struct *mmap_user = rb->mmap_user;
-	int mmap_locked = rb->mmap_locked;
-	unsigned long size = perf_data_size(rb);
 	bool detach_rest = false;
 
 	/* FIXIES vs perf_pmu_unregister() */
@@ -7121,11 +7120,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	 * Aside from that, this buffer is 'fully' detached and unmapped,
 	 * undo the VM accounting.
 	 */
-
-	atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
-			&mmap_user->locked_vm);
-	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
-	free_uid(mmap_user);
+	perf_mmap_unaccount(vma, rb);
 
 out_put:
 	ring_buffer_put(rb); /* could be last */
@@ -7265,6 +7260,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
 	atomic64_add(extra, &vma->vm_mm->pinned_vm);
 }
 
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
+{
+	struct user_struct *user = rb->mmap_user;
+
+	atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
+			&user->locked_vm);
+	atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
+}
+
 static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 			unsigned long nr_pages)
 {
@@ -7327,8 +7331,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 	if (!rb)
 		return -ENOMEM;
 
-	refcount_set(&rb->mmap_count, 1);
-	rb->mmap_user = get_current_user();
 	rb->mmap_locked = extra;
 
 	ring_buffer_attach(event, rb);
@@ -7484,10 +7486,43 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		 * vmops::close().
 		 */
 		ret = map_range(event->rb, vma);
-		if (ret)
-			perf_mmap_close(vma);
+		if (likely(!ret))
+			return 0;
+
+		/* Error path */
+
+		/*
+		 * If this is the first mmap(), then event->mmap_count should
+		 * be stable at 1. It is only modified by:
+		 * perf_mmap_{open,close}() and perf_mmap().
+		 *
+		 * The former are not possible because this mmap() hasn't been
+		 * successful yet, and the latter is serialized by
+		 * event->mmap_mutex which we still hold (note that mmap_lock
+		 * is not strictly sufficient here, because the event fd can
+		 * be passed to another process through trivial means like
+		 * fork(), leading to concurrent mmap() from different mm).
+		 *
+		 * Make sure to remove event->rb before releasing
+		 * event->mmap_mutex, such that any concurrent mmap() will not
+		 * attempt use this failed buffer.
+		 */
+		if (refcount_read(&event->mmap_count) == 1) {
+			/*
+			 * Minimal perf_mmap_close(); there can't be AUX or
+			 * other events on account of this being the first.
+			 */
+			mapped = get_mapped(event, event_unmapped);
+			if (mapped)
+				mapped(event, vma->vm_mm);
+			perf_mmap_unaccount(vma, event->rb);
+			ring_buffer_attach(event, NULL);	/* drops last rb->refcount */
+			refcount_set(&event->mmap_count, 0);
+			return ret;
+		}
 	}
 
+	perf_mmap_close(vma);
 	return ret;
 }
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index d9cc57083091..c03c4f2eea57 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	struct perf_buffer *rb;
 
 	rb = container_of(rcu_head, struct perf_buffer, rcu_head);
+	free_uid(rb->mmap_user);
 	rb_free(rb);
 }
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 3e7de2661417..9fe92161715e 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
 		rb->paused = 1;
 
 	mutex_init(&rb->aux_mutex);
+	rb->mmap_user = get_current_user();
+	refcount_set(&rb->mmap_count, 1);
 }
 
 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)

  reply	other threads:[~2026-03-26 11:28 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-09  8:25 [PATCH] perf: Fix deadlock in perf_mmap() Qing Wang
2026-03-09 18:59 ` Ian Rogers
2026-03-10  3:37   ` Qing Wang
2026-03-10  4:45     ` Ian Rogers
2026-03-24 18:38       ` Ian Rogers
2026-03-25  6:58         ` Haocheng Yu
2026-03-25 10:20           ` [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap yuhaocheng035
2026-03-25 15:08             ` Ian Rogers
2026-03-25 15:17             ` Peter Zijlstra
2026-03-25 15:32               ` Peter Zijlstra
2026-03-26  3:18               ` Qing Wang
2026-03-26 11:28                 ` Peter Zijlstra [this message]
2026-03-27 12:29                   ` [PATCH v4] " yuhaocheng035
2026-03-27 12:31                     ` Haocheng Yu
2026-03-27 12:34                     ` Peter Zijlstra
2026-05-05 10:50                   ` [tip: perf/core] perf/core: Fix deadlock in perf_mmap() failure path tip-bot2 for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260326112821.GK3738786@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=irogers@google.com \
    --cc=james.clark@linaro.org \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=syzbot+196a82fd904572696b3c@syzkaller.appspotmail.com \
    --cc=wangqing7171@gmail.com \
    --cc=yuhaocheng035@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.