All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Stephane Eranian <eranian@google.com>
Cc: Andres Freund <andres@anarazel.de>,
	"eranian@gmail.com" <eranian@gmail.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Jiri Olsa <jolsa@redhat.com>, Ingo Molnar <mingo@elte.hu>,
	Anton Blanchard <anton@ozlabs.org>,
	Namhyung Kim <namhyung@kernel.org>
Subject: Re: perf/jit doesn't cope well with mprotect() to jit containing pages
Date: Fri, 27 Jan 2017 14:07:02 +0100	[thread overview]
Message-ID: <20170127130702.GI6515@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <CABPqkBQq-33ky_uZ4UgaFZaA-4=uuR2cRrAzcT5DabCZ_H30FA@mail.gmail.com>

On Thu, Jan 26, 2017 at 03:16:08PM -0800, Stephane Eranian wrote:
> One solution would be for the kernel to report actual mmaps and not resulting
> VMA layouts. Is that case you would have your 4 mmaps each reporting 4kb.
> That means the perf hook in the mmap code would have to be placed somewhere
> else. I don't know how feasible this is. I will let Peter comment on this. But
> hopefully by now, I have described the problem clearly enough that we can work
> out a solution.

There's a number of problems with that; first and foremost is that it
changes existing behaviour.

Second is that there is no clear definition of what an actual mmap is;
suppose something like:

	ptr = mmap(NULL, 2*4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	mprotect(ptr, 4096, PROT_EXEC);

What should be reported?


Like I said yesterday, the existing heuristic in perf exists because we
do not track munmap() and therefore cannot tell the difference between:

	ptr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	munmap(ptr);
	ptr = mmap(NULL, 2*4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);

and:
	ptr1 = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	ptr2 = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);

where these two get merged.

Something like the (compile tested only) below might be sufficient to
disambiguate things. It would need a corresponding tools/perf patch of
course, but I'm not too familiar with that code anymore.

---
 include/linux/perf_event.h      |  2 ++
 include/uapi/linux/perf_event.h | 14 ++++++++-
 kernel/events/core.c            | 66 ++++++++++++++++++++++++++++++++++++++++-
 mm/memory.c                     |  2 ++
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 78ed8105e64d..957a5f2d8e56 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1104,6 +1104,8 @@ static inline u64 __perf_event_count(struct perf_event *event)
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
+extern void perf_event_munmap(struct vm_area_struct *vma);
+
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485a24ac..7571b7b43951 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
 				write_backward :  1, /* Write ring buffer from end to beginning */
-				__reserved_1   : 36;
+				munmap         :  1, /* include munmap data */
+				__reserved_1   : 35;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -862,6 +863,17 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
 
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u64				addr;
+	 *	u64				len;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_MUNMAP			= 16,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 110b38a58493..256ab94e8baf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -374,6 +374,7 @@ static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
+static atomic_t nr_munmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
@@ -3850,7 +3851,7 @@ static bool is_sb_event(struct perf_event *event)
 	if (event->attach_state & PERF_ATTACH_TASK)
 		return false;
 
-	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+	if (attr->mmap || attr->mmap_data || attr->mmap2 || attr->munmap ||
 	    attr->comm || attr->comm_exec ||
 	    attr->task ||
 	    attr->context_switch)
@@ -3906,6 +3907,8 @@ static void unaccount_event(struct perf_event *event)
 		dec = true;
 	if (event->attr.mmap || event->attr.mmap_data)
 		atomic_dec(&nr_mmap_events);
+	if (event->attr.munmap)
+		atomic_dec(&nr_munmap_events);
 	if (event->attr.comm)
 		atomic_dec(&nr_comm_events);
 	if (event->attr.task)
@@ -6831,6 +6834,65 @@ void perf_event_mmap(struct vm_area_struct *vma)
 	perf_event_mmap_event(&mmap_event);
 }
 
+struct perf_munmap_event {
+	struct perf_event_header	header;
+	u64				start;
+	u64				len;
+};
+
+static void perf_event_munmap_output(struct perf_event *event, void *data)
+{
+	struct perf_munmap_event *munmap_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int size = munmap_event->header.size;
+	int ret;
+
+	if (!event->attr.munmap)
+		return;
+
+	if (!event->attr.mmap)
+		return; /* no munmap without mmap */
+
+	if (!event->attr.mmap_data &&
+	    (munmap_event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+		return; /* no data munmap without mmap_data */
+
+	perf_event_header__init_id(&munmap_event->header, &sample, event);
+	ret = perf_output_begin(&handle, event, munmap_event->header.size);
+	if (ret)
+		goto out;
+
+	perf_output_put(&handle, *munmap_event);
+	perf_event__output_id_sample(event, &handle, &sample);
+	perf_output_end(&handle);
+out:
+	munmap_event->header.size = size;
+}
+
+void perf_event_munmap(struct vm_area_struct *vma)
+{
+	struct perf_munmap_event munmap_event;
+
+	if (!atomic_read(&nr_munmap_events))
+		return;
+
+	munmap_event = (struct perf_munmap_event){
+		.header = {
+			.type = PERF_RECORD_MUNMAP,
+			.misc = PERF_RECORD_MISC_USER,
+			.size = sizeof(munmap_event),
+		},
+		.start = vma->vm_start,
+		.len   = vma->vm_end - vma->vm_start,
+	};
+
+	if (!(vma->vm_flags & VM_EXEC))
+		munmap_event.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+
+	perf_iterate_sb(perf_event_munmap_output, &munmap_event, NULL);
+}
+
 void perf_event_aux_event(struct perf_event *event, unsigned long head,
 			  unsigned long size, u64 flags)
 {
@@ -9067,6 +9129,8 @@ static void account_event(struct perf_event *event)
 		inc = true;
 	if (event->attr.mmap || event->attr.mmap_data)
 		atomic_inc(&nr_mmap_events);
+	if (event->attr.munmap)
+		atomic_inc(&nr_munmap_events);
 	if (event->attr.comm)
 		atomic_inc(&nr_comm_events);
 	if (event->attr.task)
diff --git a/mm/memory.c b/mm/memory.c
index 6bf2b471e30c..b89355229608 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -64,6 +64,7 @@
 #include <linux/debugfs.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/dax.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -1312,6 +1313,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 	if (end <= vma->vm_start)
 		return;
 
+	perf_event_munmap(vma);
 	if (vma->vm_file)
 		uprobe_munmap(vma, start, end);
 

  reply	other threads:[~2017-01-27 13:08 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-10  5:02 perf/jit doesn't cope well with mprotect() to jit containing pages Andres Freund
2016-12-12  8:49 ` Peter Zijlstra
2016-12-12  9:01   ` Andres Freund
2016-12-12  9:28     ` Peter Zijlstra
2017-01-26  1:25       ` Andres Freund
2017-01-26 22:15   ` Peter Zijlstra
2017-01-26 23:04     ` Andres Freund
2017-01-30 11:52     ` [tip:perf/core] perf/core: Fix PERF_RECORD_MMAP2 prot/flags for anonymous memory tip-bot for Peter Zijlstra
2017-01-26 20:32 ` perf/jit doesn't cope well with mprotect() to jit containing pages Stephane Eranian
2017-01-26 21:00   ` Andres Freund
2017-01-26 21:17     ` Stephane Eranian
2017-01-26 21:22       ` Andres Freund
2017-01-26 21:34         ` Stephane Eranian
2017-01-26 21:51           ` Andres Freund
2017-01-26 22:19     ` Peter Zijlstra
2017-01-26 22:26       ` Stephane Eranian
2017-01-26 22:38         ` Andres Freund
2017-01-26 22:51           ` Stephane Eranian
2017-01-26 23:09             ` Andres Freund
2017-01-26 23:16               ` Stephane Eranian
2017-01-27 13:07                 ` Peter Zijlstra [this message]
2017-01-27 15:43                   ` Arnaldo Carvalho de Melo
2017-01-27 17:35                     ` Stephane Eranian
2017-01-27 17:38                     ` [PATCH] handle munmap records in tools/perf was: " Arnaldo Carvalho de Melo
2017-01-27 17:46                       ` Stephane Eranian
2017-01-27 18:05                         ` Arnaldo Carvalho de Melo
2017-01-27 18:10                           ` Stephane Eranian
2017-01-27 19:18                             ` Arnaldo Carvalho de Melo
2017-01-27 19:26                               ` Stephane Eranian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170127130702.GI6515@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@kernel.org \
    --cc=andres@anarazel.de \
    --cc=anton@ozlabs.org \
    --cc=eranian@gmail.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=namhyung@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.