Re: [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Yang Jihong <yangjihong1@huawei.com>
Cc: mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	namhyung@kernel.org, linux-perf-users@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close
Date: Tue, 5 Jul 2022 15:07:26 +0200	[thread overview]
Message-ID: <YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <YsMGixSL4CDPTTZs@worktop.programming.kicks-ass.net>

On Mon, Jul 04, 2022 at 05:26:04PM +0200, Peter Zijlstra wrote:
> On Mon, Jul 04, 2022 at 08:00:06PM +0800, Yang Jihong wrote:
> > Data race exists between perf_event_set_output and perf_mmap_close.
> > The scenario is as follows:
> > 
> >                   CPU1                                                       CPU2
> >                                                                     perf_mmap_close(event2)
> >                                                                       if (atomic_dec_and_test(&event2->rb->mmap_count)  // mmap_count 1 -> 0
> >                                                                         detach_rest = true;
> > ioctl(event1, PERF_EVENT_IOC_SET_OUTPUT, event2)
> >   perf_event_set_output(event1, event2)
> >                                                                       if (!detach_rest)
> >                                                                         goto out_put;
> >                                                                       list_for_each_entry_rcu(event, &event2->rb->event_list, rb_entry)
> >                                                                         ring_buffer_attach(event, NULL)
> >                                                                       // because event1 has not been added to event2->rb->event_list,
> >                                                                       // event1->rb is not set to NULL in these loops
> > 
> >     ring_buffer_attach(event1, event2->rb)
> >       list_add_rcu(&event1->rb_entry, &event2->rb->event_list)
> > 
> > The above data race causes a problem, that is, event1->rb is not NULL, but event1->rb->mmap_count is 0.
> > If the perf_mmap interface is invoked for the fd of event1, the kernel keeps in the perf_mmap infinite loop:
> > 
> > again:
> >         mutex_lock(&event->mmap_mutex);
> >         if (event->rb) {
> > <SNIP>
> >                 if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
> >                         /*
> >                          * Raced against perf_mmap_close() through
> >                          * perf_event_set_output(). Try again, hope for better
> >                          * luck.
> >                          */
> >                         mutex_unlock(&event->mmap_mutex);
> >                         goto again;
> >                 }
> > <SNIP>
> 
> Too tired, must look again tomorrow, little feeback below.

With brain more awake I ended up with the below. Does that work?

---
 kernel/events/core.c | 45 +++++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4d8c335a07db..c9d32d4d2e20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6262,10 +6262,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 		if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
 			/*
-			 * Raced against perf_mmap_close() through
-			 * perf_event_set_output(). Try again, hope for better
-			 * luck.
+			 * Raced against perf_mmap_close(); remove the
+			 * event and try again.
 			 */
+			ring_buffer_attach(event, NULL);
 			mutex_unlock(&event->mmap_mutex);
 			goto again;
 		}
@@ -11840,14 +11840,25 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	goto out;
 }
 
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+	if (b < a)
+		swap(a, b);
+
+	mutex_lock(a);
+	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
 	struct perf_buffer *rb = NULL;
 	int ret = -EINVAL;
 
-	if (!output_event)
+	if (!output_event) {
+		mutex_lock(&event->mmap_mutex);
 		goto set;
+	}
 
 	/* don't allow circular references */
 	if (event == output_event)
@@ -11885,8 +11896,15 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 	    event->pmu != output_event->pmu)
 		goto out;
 
+	/*
+	 * Hold both mmap_mutex to serialize against perf_mmap_close().  Since
+	 * output_event is already on rb->event_list, and the list iteration
+	 * restarts after every removal, it is guaranteed this new event is
+	 * observed *OR* if output_event is already removed, it's guaranteed we
+	 * observe !rb->mmap_count.
+	 */
+	mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
 set:
-	mutex_lock(&event->mmap_mutex);
 	/* Can't redirect output if we've got an active mmap() */
 	if (atomic_read(&event->mmap_count))
 		goto unlock;
@@ -11896,6 +11914,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 		rb = ring_buffer_get(output_event);
 		if (!rb)
 			goto unlock;
+
+		/* did we race against perf_mmap_close() */
+		if (!atomic_read(&rb->mmap_count)) {
+			ring_buffer_put(rb);
+			goto unlock;
+		}
 	}
 
 	ring_buffer_attach(event, rb);
@@ -11903,20 +11927,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 	ret = 0;
 unlock:
 	mutex_unlock(&event->mmap_mutex);
+	if (output_event)
+		mutex_unlock(&output_event->mmap_mutex);
 
 out:
 	return ret;
 }
 
-static void mutex_lock_double(struct mutex *a, struct mutex *b)
-{
-	if (b < a)
-		swap(a, b);
-
-	mutex_lock(a);
-	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
-}
-
 static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
 {
 	bool nmi_safe = false;

next prev parent reply	other threads:[~2022-07-05 13:44 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-04 12:00 [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close Yang Jihong
2022-07-04 15:26 ` Peter Zijlstra
2022-07-05  2:07   ` Yang Jihong
2022-07-05 13:07   ` Peter Zijlstra [this message]
2022-07-06 12:29     ` Yang Jihong
2022-07-09  2:00       ` Yang Jihong
2022-07-14 11:35     ` [tip: perf/urgent] perf/core: Fix data race between perf_event_set_output() and perf_mmap_close() tip-bot2 for Peter Zijlstra

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4d8c335a07d dfblob:c9d32d4d2e2 )
 OR (
bs:"Re: [PATCH v2] perf/core: Fix data race between perf_event_set_output and perf_mmap_close" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=yangjihong1@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox