public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
	Andy Lutomirski <luto@amacapital.net>,
	Thomas Gleixner <tglx@linutronix.de>,
	"H . Peter Anvin" <hpa@zytor.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Borislav Petkov <bp@alien8.de>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 11/24] x86/mm/kaiser: Map virtually-addressed performance monitoring buffers
Date: Mon, 27 Nov 2017 11:49:10 +0100	[thread overview]
Message-ID: <20171127104923.14378-12-mingo@kernel.org> (raw)
In-Reply-To: <20171127104923.14378-1-mingo@kernel.org>

From: Hugh Dickins <hughd@google.com>

The BTS and PEBS buffers both have their virtual addresses
programmed into the hardware.  This means that any access to them
is performed via the page tables.  The times that the hardware
accesses these are entirely dependent on how the performance
monitoring hardware events are set up.  In other words, there is
no way for the kernel to tell when the hardware might access
these buffers.

To avoid perf crashes, place 'debug_store' in the user-mapped
per-CPU area instead of dynamically allocating.  Also use the
page allocator plus kaiser_add_mapping() to keep the BTS and PEBS
buffers user-mapped (that is, present in the user mapping, though
visible only to kernel and hardware).  The PEBS fixup buffer does
not need this treatment.

The need for a user-mapped struct debug_store showed up before doing
any conscious perf testing: in a couple of kernel paging oopses on
Westmere, implicating the debug_store offset of the per-CPU area.

[Dave] Added explicit _PAGE_GLOBAL
[Dave] Removed KAISER #ifdefs by moving kmalloc() to plain page allocator
[Dave] Reworded the commit message a bit to be consistent with other patches

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: daniel.gruss@iaik.tugraz.at
Cc: keescook@google.com
Cc: linux-mm@kvack.org
Cc: michael.schwarz@iaik.tugraz.at
Cc: moritz.lipp@iaik.tugraz.at
Cc: richard.fellner@student.tugraz.at
Link: https://lkml.kernel.org/r/20171123003500.7EC0DB4E@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/ds.c | 49 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd..b5cf473e443a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bitops.h>
 #include <linux/types.h>
+#include <linux/kaiser.h>
 #include <linux/slab.h>
 
 #include <asm/perf_event.h>
@@ -8,6 +9,9 @@
 
 #include "../perf_event.h"
 
+static
+DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
+
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
 
@@ -279,6 +283,31 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
+static void *dsalloc(size_t size, gfp_t flags, int node)
+{
+	unsigned int order = get_order(size);
+	struct page *page;
+	unsigned long addr;
+
+	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+	if (!page)
+		return NULL;
+	addr = (unsigned long)page_address(page);
+	if (kaiser_add_mapping(addr, size, __PAGE_KERNEL | _PAGE_GLOBAL) < 0) {
+		__free_pages(page, order);
+		addr = 0;
+	}
+	return (void *)addr;
+}
+
+static void dsfree(const void *buffer, size_t size)
+{
+	if (!buffer)
+		return;
+	kaiser_remove_mapping((unsigned long)buffer, size);
+	free_pages((unsigned long)buffer, get_order(size));
+}
+
 static int alloc_pebs_buffer(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -289,7 +318,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+	buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -300,7 +329,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (x86_pmu.intel_cap.pebs_format < 2) {
 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 		if (!ibuffer) {
-			kfree(buffer);
+			dsfree(buffer, x86_pmu.pebs_buffer_size);
 			return -ENOMEM;
 		}
 		per_cpu(insn_buffer, cpu) = ibuffer;
@@ -326,7 +355,8 @@ static void release_pebs_buffer(int cpu)
 	kfree(per_cpu(insn_buffer, cpu));
 	per_cpu(insn_buffer, cpu) = NULL;
 
-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
+	dsfree((void *)(unsigned long)ds->pebs_buffer_base,
+			x86_pmu.pebs_buffer_size);
 	ds->pebs_buffer_base = 0;
 }
 
@@ -340,7 +370,7 @@ static int alloc_bts_buffer(int cpu)
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+	buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
 	if (unlikely(!buffer)) {
 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 		return -ENOMEM;
@@ -366,19 +396,15 @@ static void release_bts_buffer(int cpu)
 	if (!ds || !x86_pmu.bts)
 		return;
 
-	kfree((void *)(unsigned long)ds->bts_buffer_base);
+	dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
 	ds->bts_buffer_base = 0;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-	int node = cpu_to_node(cpu);
-	struct debug_store *ds;
-
-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-	if (unlikely(!ds))
-		return -ENOMEM;
+	struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
 
+	memset(ds, 0, sizeof(*ds));
 	per_cpu(cpu_hw_events, cpu).ds = ds;
 
 	return 0;
@@ -392,7 +418,6 @@ static void release_ds_buffer(int cpu)
 		return;
 
 	per_cpu(cpu_hw_events, cpu).ds = NULL;
-	kfree(ds);
 }
 
 void release_ds_buffers(void)
-- 
2.14.1

  parent reply	other threads:[~2017-11-27 10:55 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-27 10:48 [PATCH 00/24] x86/mm: Add KAISER support Ingo Molnar
2017-11-27 10:49 ` [PATCH 01/24] x86/mm/kaiser: Disable global pages by default with KAISER Ingo Molnar
2017-11-27 10:49 ` [PATCH 02/24] x86/mm/kaiser: Prepare the x86/entry assembly code for entry/exit CR3 switching Ingo Molnar
2017-11-27 17:31   ` Peter Zijlstra
2017-11-27 17:33     ` Thomas Gleixner
2017-11-27 21:00       ` Peter Zijlstra
2017-11-27 10:49 ` [PATCH 03/24] x86/mm/kaiser: Introduce user-mapped per-CPU areas Ingo Molnar
2017-11-27 10:49 ` [PATCH 04/24] x86/mm/kaiser: Unmap kernel mappings from userspace page tables, core patch Ingo Molnar
2017-11-27 15:39   ` Peter Zijlstra
2017-11-27 17:04     ` Borislav Petkov
2017-11-27 19:17     ` Dave Hansen
2017-11-28 10:34   ` Peter Zijlstra
2017-11-27 10:49 ` [PATCH 05/24] x86/mm/kaiser: Allow NX poison to be set in p4d/pgd Ingo Molnar
2017-11-27 10:49 ` [PATCH 06/24] x86/mm/kaiser: Make sure the static PGDs are 8k in size Ingo Molnar
2017-11-27 10:49 ` [PATCH 07/24] x86/mm/kaiser: Map the CPU entry area Ingo Molnar
2017-11-27 10:49 ` [PATCH 08/24] x86/mm/kaiser: Map the dynamically-allocated LDTs Ingo Molnar
2017-11-29 22:03   ` [08/24] " Guenter Roeck
2017-11-27 10:49 ` [PATCH 09/24] x86/mm/kaiser: Map the espfix structures Ingo Molnar
2017-11-27 10:49 ` [PATCH 10/24] x86/mm/kaiser: Map the entry stack variables Ingo Molnar
2017-11-27 17:22   ` Peter Zijlstra
2017-11-27 17:32     ` Thomas Gleixner
2017-11-27 21:00       ` Peter Zijlstra
2017-11-27 17:29   ` Peter Zijlstra
2017-11-27 17:32     ` Thomas Gleixner
2017-11-27 10:49 ` Ingo Molnar [this message]
2017-11-27 10:49 ` [PATCH 12/24] x86/mm: Move the CR3 construction functions to tlbflush.h Ingo Molnar
2017-11-27 10:49 ` [PATCH 13/24] x86/mm: Remove hard-coded ASID limit checks Ingo Molnar
2017-11-27 10:49 ` [PATCH 14/24] x86/mm: Put MMU-to-h/w ASID translation in one place Ingo Molnar
2017-11-27 10:49 ` [PATCH 15/24] x86/mm: Allow flushing for future ASID switches Ingo Molnar
2017-11-28  5:16   ` Andy Lutomirski
2017-11-28  7:32     ` Dave Hansen
2017-11-28 16:39     ` Peter Zijlstra
2017-11-28 16:48       ` Peter Zijlstra
2017-11-28 18:13       ` Dave Hansen
2017-11-28 19:05         ` Peter Zijlstra
2017-11-28 19:36           ` Peter Zijlstra
2017-11-28 20:34           ` Andy Lutomirski
2017-11-28 20:39             ` Peter Zijlstra
2017-11-28 20:45             ` Peter Zijlstra
2017-11-30 15:40     ` Peter Zijlstra
2017-11-30 15:42       ` Andy Lutomirski
2017-11-30 15:44   ` Peter Zijlstra
2017-11-30 15:51     ` Dave Hansen
2017-11-30 16:18       ` Peter Zijlstra
2017-11-30 18:44         ` Dave Hansen
2017-11-30 18:48           ` Andy Lutomirski
2017-11-30 18:53             ` Dave Hansen
2017-11-30 20:01             ` Peter Zijlstra
2017-11-30 21:51               ` Andy Lutomirski
2017-11-30 18:55           ` Peter Zijlstra
2017-11-30 19:00             ` Dave Hansen
2017-11-30 19:20               ` Peter Zijlstra
2017-11-27 10:49 ` [PATCH 16/24] x86/mm/kaiser: Use PCID feature to make user and kernel switches faster Ingo Molnar
2017-11-28  5:22   ` Andy Lutomirski
2017-11-28  7:52     ` Dave Hansen
2017-11-27 10:49 ` [PATCH 17/24] x86/mm/kaiser: Disable native VSYSCALL Ingo Molnar
2017-11-27 10:49 ` [PATCH 18/24] x86/mm/kaiser: Add Kconfig Ingo Molnar
2017-11-27 10:49 ` [PATCH 19/24] x86/mm/kaiser: Respect disabled CPU features Ingo Molnar
2017-11-27 10:49 ` [PATCH 20/24] x86/mm/kaiser: Simplify disabling of global pages Ingo Molnar
2017-11-27 10:49 ` [PATCH 21/24] x86/mm/dump_pagetables: Check Kaiser shadow page table for WX pages Ingo Molnar
2017-11-27 10:49 ` [PATCH 22/24] x86/mm/debug_pagetables: Allow dumping current pagetables Ingo Molnar
2017-11-27 10:49 ` [PATCH 23/24] x86/mm/kaiser: Add boot time disable switch Ingo Molnar
2017-11-27 10:49 ` [PATCH 24/24] x86/mm/kaiser: Use the other page_table_lock pattern Ingo Molnar
2017-11-27 13:51 ` [PATCH 00/24] x86/mm: Add KAISER support Borislav Petkov
2017-11-27 13:57   ` Thomas Gleixner
2017-11-27 13:59     ` Borislav Petkov
2017-11-27 14:03       ` Ingo Molnar
2017-11-27 14:08         ` Ingo Molnar
2017-11-27 19:43 ` Linus Torvalds
2017-11-27 20:01   ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171127104923.14378-12-mingo@kernel.org \
    --to=mingo@kernel.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox