All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Yang Jihong <yangjihong1@huawei.com>
Cc: mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	namhyung@kernel.org, linux-perf-users@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close
Date: Tue, 5 Jul 2022 15:07:26 +0200	[thread overview]
Message-ID: <YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <YsMGixSL4CDPTTZs@worktop.programming.kicks-ass.net>

On Mon, Jul 04, 2022 at 05:26:04PM +0200, Peter Zijlstra wrote:
> On Mon, Jul 04, 2022 at 08:00:06PM +0800, Yang Jihong wrote:
> > Data race exists between perf_event_set_output and perf_mmap_close.
> > The scenario is as follows:
> > 
> >                   CPU1                                                       CPU2
> >                                                                     perf_mmap_close(event2)
> >                                                                       if (atomic_dec_and_test(&event2->rb->mmap_count)  // mmap_count 1 -> 0
> >                                                                         detach_rest = true;
> > ioctl(event1, PERF_EVENT_IOC_SET_OUTPUT, event2)
> >   perf_event_set_output(event1, event2)
> >                                                                       if (!detach_rest)
> >                                                                         goto out_put;
> >                                                                       list_for_each_entry_rcu(event, &event2->rb->event_list, rb_entry)
> >                                                                         ring_buffer_attach(event, NULL)
> >                                                                       // because event1 has not been added to event2->rb->event_list,
> >                                                                       // event1->rb is not set to NULL in these loops
> > 
> >     ring_buffer_attach(event1, event2->rb)
> >       list_add_rcu(&event1->rb_entry, &event2->rb->event_list)
> > 
> > The above data race causes a problem, that is, event1->rb is not NULL, but event1->rb->mmap_count is 0.
> > If the perf_mmap interface is invoked for the fd of event1, the kernel keeps in the perf_mmap infinite loop:
> > 
> > again:
> >         mutex_lock(&event->mmap_mutex);
> >         if (event->rb) {
> > <SNIP>
> >                 if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
> >                         /*
> >                          * Raced against perf_mmap_close() through
> >                          * perf_event_set_output(). Try again, hope for better
> >                          * luck.
> >                          */
> >                         mutex_unlock(&event->mmap_mutex);
> >                         goto again;
> >                 }
> > <SNIP>
> 
> Too tired, must look again tomorrow, little feeback below.

With brain more awake I ended up with the below. Does that work?

---
 kernel/events/core.c | 45 +++++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4d8c335a07db..c9d32d4d2e20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6262,10 +6262,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 		if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
 			/*
-			 * Raced against perf_mmap_close() through
-			 * perf_event_set_output(). Try again, hope for better
-			 * luck.
+			 * Raced against perf_mmap_close(); remove the
+			 * event and try again.
 			 */
+			ring_buffer_attach(event, NULL);
 			mutex_unlock(&event->mmap_mutex);
 			goto again;
 		}
@@ -11840,14 +11840,25 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	goto out;
 }
 
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+	if (b < a)
+		swap(a, b);
+
+	mutex_lock(a);
+	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
 	struct perf_buffer *rb = NULL;
 	int ret = -EINVAL;
 
-	if (!output_event)
+	if (!output_event) {
+		mutex_lock(&event->mmap_mutex);
 		goto set;
+	}
 
 	/* don't allow circular references */
 	if (event == output_event)
@@ -11885,8 +11896,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 	    event->pmu != output_event->pmu)
 		goto out;
 
+	/*
+	 * Hold both mmap_mutex to serialize against perf_mmap_close().  Since
+	 * output_event is already on rb->event_list, and the list iteration
+	 * restarts after every removal, it is guaranteed this new event is
+	 * observed *OR* if output_event is already removed, it's guaranteed we
+	 * observe !rb->mmap_count.
+	 */
+	mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
 set:
-	mutex_lock(&event->mmap_mutex);
 	/* Can't redirect output if we've got an active mmap() */
 	if (atomic_read(&event->mmap_count))
 		goto unlock;
@@ -11896,6 +11914,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 		rb = ring_buffer_get(output_event);
 		if (!rb)
 			goto unlock;
+
+		/* did we race against perf_mmap_close() */
+		if (!atomic_read(&rb->mmap_count)) {
+			ring_buffer_put(rb);
+			goto unlock;
+		}
 	}
 
 	ring_buffer_attach(event, rb);
@@ -11903,20 +11927,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 	ret = 0;
 unlock:
 	mutex_unlock(&event->mmap_mutex);
+	if (output_event)
+		mutex_unlock(&output_event->mmap_mutex);
 
 out:
 	return ret;
 }
 
-static void mutex_lock_double(struct mutex *a, struct mutex *b)
-{
-	if (b < a)
-		swap(a, b);
-
-	mutex_lock(a);
-	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
-}
-
 static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
 {
 	bool nmi_safe = false;

  parent reply	other threads:[~2022-07-05 13:44 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-04 12:00 [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close Yang Jihong
2022-07-04 15:26 ` Peter Zijlstra
2022-07-05  2:07   ` Yang Jihong
2022-07-05 13:07   ` Peter Zijlstra [this message]
2022-07-06 12:29     ` Yang Jihong
2022-07-09  2:00       ` Yang Jihong
2022-07-14 11:35     ` [tip: perf/urgent] perf/core: Fix data race between perf_event_set_output() and perf_mmap_close() tip-bot2 for Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=yangjihong1@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.