From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linuxfoundation.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Arnaldo Carvalho de Melo <acme@redhat.com>,
Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
Kees Cook <kees@kernel.org>
Subject: [patch V2 RESEND 4/6] perf/core: Split out AUX buffer allocation
Date: Mon, 11 Aug 2025 14:36:35 +0200 (CEST) [thread overview]
Message-ID: <20250811123609.667172352@linutronix.de> (raw)
In-Reply-To: 20250811123458.050061356@linutronix.de
The code logic in perf_mmap() is incomprehensible and has been source of
subtle bugs in the past. It makes it impossible to convert the atomic_t
reference counts to refcount_t.
There is not really much, which is shared between the ringbuffer and AUX
buffer allocation code since the mlock limit calculation and the
accounting has been split out into helper functions.
Move the AUX buffer allocation code out and integrate the call with a
momentary workaround to allow skipping the remaining ringbuffer related
code completely. That workaround will be removed once the ringbuffer
allocation is moved to its own function as well.
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
V2: Fixup invers condition and add the dropped flags setup back - Lorenzo
Fixup subject line to match the content
---
kernel/events/core.c | 137 +++++++++++++++++++++++++++++----------------------
1 file changed, 78 insertions(+), 59 deletions(-)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6970,12 +6970,79 @@ static void perf_mmap_account(struct vm_
atomic64_add(extra, &vma->vm_mm->pinned_vm);
}
+static int perf_mmap_aux(struct vm_area_struct *vma, struct perf_event *event,
+ unsigned long nr_pages)
+{
+ long user_extra = nr_pages, extra = 0;
+ struct perf_buffer *rb = event->rb;
+ u64 aux_offset, aux_size;
+ int ret, rb_flags = 0;
+
+ /*
+ * AUX area mapping: if rb->aux_nr_pages != 0, it's already
+ * mapped, all subsequent mappings should have the same size
+ * and offset. Must be above the normal perf buffer.
+ */
+ aux_offset = READ_ONCE(rb->user_page->aux_offset);
+ aux_size = READ_ONCE(rb->user_page->aux_size);
+
+ if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
+ return -EINVAL;
+
+ if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
+ return -EINVAL;
+
+ /* Already mapped with a different offset */
+ if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
+ return -EINVAL;
+
+ if (aux_size != nr_pages * PAGE_SIZE)
+ return -EINVAL;
+
+ /* Already mapped with a different size */
+ if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
+ return -EINVAL;
+
+ if (!is_power_of_2(nr_pages))
+ return -EINVAL;
+
+ /* If this succeeds, subsequent failures have to undo it */
+ if (!atomic_inc_not_zero(&rb->mmap_count))
+ return -EINVAL;
+
+ /* If mapped, attach to it */
+ if (rb_has_aux(rb)) {
+ atomic_inc(&rb->aux_mmap_count);
+ return 0;
+ }
+
+ if (!perf_mmap_calc_limits(vma, &user_extra, &extra)) {
+ atomic_dec(&rb->mmap_count);
+ return -EPERM;
+ }
+
+ if (vma->vm_flags & VM_WRITE)
+ rb_flags |= RING_BUFFER_WRITABLE;
+
+ ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
+ event->attr.aux_watermark, rb_flags);
+ if (ret) {
+ atomic_dec(&rb->mmap_count);
+ return ret;
+ }
+
+ atomic_set(&rb->aux_mmap_count, 1);
+ rb->aux_mmap_locked = extra;
+ perf_mmap_account(vma, user_extra, extra);
+ atomic_inc(&event->mmap_count);
+ return 0;
+}
+
static int perf_mmap(struct file *file, struct vm_area_struct *vma)
{
struct perf_event *event = file->private_data;
unsigned long vma_size, nr_pages;
long user_extra = 0, extra = 0;
- struct mutex *aux_mutex = NULL;
struct perf_buffer *rb = NULL;
int ret, flags = 0;
mapped_f mapped;
@@ -7055,51 +7122,15 @@ static int perf_mmap(struct file *file,
}
} else {
- /*
- * AUX area mapping: if rb->aux_nr_pages != 0, it's already
- * mapped, all subsequent mappings should have the same size
- * and offset. Must be above the normal perf buffer.
- */
- u64 aux_offset, aux_size;
-
- rb = event->rb;
- if (!rb)
- goto aux_unlock;
-
- aux_mutex = &rb->aux_mutex;
- mutex_lock(aux_mutex);
-
- aux_offset = READ_ONCE(rb->user_page->aux_offset);
- aux_size = READ_ONCE(rb->user_page->aux_size);
-
- if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
- goto aux_unlock;
-
- if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
- goto aux_unlock;
-
- /* already mapped with a different offset */
- if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
- goto aux_unlock;
-
- if (aux_size != nr_pages * PAGE_SIZE)
- goto aux_unlock;
-
- /* already mapped with a different size */
- if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
- goto aux_unlock;
-
- if (!is_power_of_2(nr_pages))
- goto aux_unlock;
-
- if (!atomic_inc_not_zero(&rb->mmap_count))
- goto aux_unlock;
-
- if (rb_has_aux(rb)) {
- atomic_inc(&rb->aux_mmap_count);
- ret = 0;
- goto unlock;
+ if (!event->rb) {
+ ret = -EINVAL;
+ } else {
+ scoped_guard(mutex, &event->rb->aux_mutex)
+ ret = perf_mmap_aux(vma, event, nr_pages);
}
+ // Temporary workaround to split out AUX handling first
+ mutex_unlock(&event->mmap_mutex);
+ goto out;
}
if (!perf_mmap_calc_limits(vma, &user_extra, &extra)) {
@@ -7132,28 +7163,16 @@ static int perf_mmap(struct file *file,
perf_event_init_userpage(event);
perf_event_update_userpage(event);
ret = 0;
- } else {
- ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
- event->attr.aux_watermark, flags);
- if (!ret) {
- atomic_set(&rb->aux_mmap_count, 1);
- rb->aux_mmap_locked = extra;
- }
}
-
unlock:
if (!ret) {
perf_mmap_account(vma, user_extra, extra);
atomic_inc(&event->mmap_count);
- } else if (rb) {
- /* AUX allocation failed */
- atomic_dec(&rb->mmap_count);
}
-aux_unlock:
- if (aux_mutex)
- mutex_unlock(aux_mutex);
mutex_unlock(&event->mmap_mutex);
+// Temporary until RB allocation is split out.
+out:
if (ret)
return ret;
next prev parent reply other threads:[~2025-08-11 12:36 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-11 12:36 [patch V2 RESEND 0/6] perf: Convert mmap() related reference counts to refcount_t Thomas Gleixner
2025-08-11 12:36 ` [patch V2 RESEND 1/6] perf/core: Remove redundant condition for AUX buffer size Thomas Gleixner
2025-08-11 12:55 ` Lorenzo Stoakes
2025-08-11 12:36 ` [patch V2 RESEND 2/6] perf/core: Split out mlock limit handling Thomas Gleixner
2025-08-11 12:57 ` Lorenzo Stoakes
2025-08-11 12:36 ` [patch V2 RESEND 3/6] perf/core: Split out VM accounting Thomas Gleixner
2025-08-11 12:58 ` Lorenzo Stoakes
2025-08-11 12:36 ` Thomas Gleixner [this message]
2025-08-11 13:21 ` [patch V2 RESEND 4/6] perf/core: Split out AUX buffer allocation Lorenzo Stoakes
2025-08-12 10:06 ` Peter Zijlstra
2025-08-12 11:08 ` Lorenzo Stoakes
2025-08-11 12:36 ` [patch V2 RESEND 5/6] perf/core: Split the ringbuffer mmap() and allocation code out Thomas Gleixner
2025-08-11 13:56 ` Lorenzo Stoakes
2025-08-11 14:10 ` Lorenzo Stoakes
2025-08-11 12:36 ` [patch V2 RESEND 6/6] perf/core: Convert mmap() refcounts to refcount_t Thomas Gleixner
2025-08-11 14:12 ` Lorenzo Stoakes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250811123609.667172352@linutronix.de \
--to=tglx@linutronix.de \
--cc=acme@redhat.com \
--cc=kees@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=torvalds@linuxfoundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox