public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Linus Torvalds <torvalds@linux-foundation.org>,
	Andy Lutomirsky <luto@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Borislav Petkov <bpetkov@suse.de>,
	Greg KH <gregkh@linuxfoundation.org>,
	keescook@google.com, hughd@google.com,
	Brian Gerst <brgerst@gmail.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Denys Vlasenko <dvlasenk@redhat.com>,
	Rik van Riel <riel@redhat.com>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>,
	David Laight <David.Laight@aculab.com>,
	Eduardo Valentin <eduval@amazon.com>,
	aliguori@amazon.com, Will Deacon <will.deacon@arm.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	daniel.gruss@iaik.tugraz.at,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ingo Molnar <mingo@kernel.org>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [patch V181 39/54] x86/events/intel/ds: Map debug buffers in cpu_entry_area
Date: Wed, 20 Dec 2017 22:35:42 +0100	[thread overview]
Message-ID: <20171220215443.827023196@linutronix.de> (raw)
In-Reply-To: 20171220213503.672610178@linutronix.de

[-- Attachment #1: 0044-x86-events-intel-ds-Map-debug-buffers-in-fixmap.patch --]
[-- Type: text/plain, Size: 7825 bytes --]

From: Hugh Dickins <hughd@google.com>

The BTS and PEBS buffers both have their virtual addresses programmed into
the hardware.  This means that any access to them is performed via the page
tables.  The times that the hardware accesses these are entirely dependent
on how the performance monitoring hardware events are set up.  In other
words, there is no way for the kernel to tell when the hardware might
access these buffers.

To avoid perf crashes, place 'debug_store' allocate pages and map them into
the cpu_entry_area.

The PEBS fixup buffer does not need this treatment.

[ tglx: Got rid of the kaiser_add_mapping() cruft ]

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: keescook@google.com
---
 arch/x86/events/intel/ds.c   |  125 +++++++++++++++++++++++++++----------------
 arch/x86/events/perf_event.h |    2 
 2 files changed, 82 insertions(+), 45 deletions(-)

--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,6 +3,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 
@@ -280,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
-static int alloc_pebs_buffer(int cpu)
+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	phys_addr_t pa;
+	size_t msz = 0;
+
+	pa = virt_to_phys(addr);
+	for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+		cea_set_pte(cea, pa, prot);
+}
+
+static void ds_clear_cea(void *cea, size_t size)
+{
+	size_t msz = 0;
+
+	for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+		cea_set_pte(cea, 0, PAGE_NONE);
+}
+
+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+{
+	unsigned int order = get_order(size);
 	int node = cpu_to_node(cpu);
-	int max;
-	void *buffer, *ibuffer;
+	struct page *page;
+
+	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+	return page ? page_address(page) : NULL;
+}
+
+static void dsfree_pages(const void *buffer, size_t size)
+{
+	if (buffer)
+		free_pages((unsigned long)buffer, get_order(size));
+}
+
+static int alloc_pebs_buffer(int cpu)
+{
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	size_t bsiz = x86_pmu.pebs_buffer_size;
+	int max, node = cpu_to_node(cpu);
+	void *buffer, *ibuffer, *cea;
 
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+	buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -301,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
 	if (x86_pmu.intel_cap.pebs_format < 2) {
 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 		if (!ibuffer) {
-			kfree(buffer);
+			dsfree_pages(buffer, bsiz);
 			return -ENOMEM;
 		}
 		per_cpu(insn_buffer, cpu) = ibuffer;
 	}
-
-	max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
-	ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+	hwev->ds_pebs_vaddr = buffer;
+	/* Update the cpu entry area mapping */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+	ds->pebs_buffer_base = (unsigned long) cea;
+	ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
 	ds->pebs_index = ds->pebs_buffer_base;
-	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-		max * x86_pmu.pebs_record_size;
-
+	max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
+	ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
 	return 0;
 }
 
 static void release_pebs_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	void *cea;
 
 	if (!ds || !x86_pmu.pebs)
 		return;
@@ -327,73 +365,70 @@ static void release_pebs_buffer(int cpu)
 	kfree(per_cpu(insn_buffer, cpu));
 	per_cpu(insn_buffer, cpu) = NULL;
 
-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
+	/* Clear the fixmap */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+	ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
 	ds->pebs_buffer_base = 0;
+	dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
+	hwev->ds_pebs_vaddr = NULL;
 }
 
 static int alloc_bts_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-	int node = cpu_to_node(cpu);
-	int max, thresh;
-	void *buffer;
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	void *buffer, *cea;
+	int max;
 
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+	buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
 	if (unlikely(!buffer)) {
 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 		return -ENOMEM;
 	}
-
-	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-	thresh = max / 16;
-
-	ds->bts_buffer_base = (u64)(unsigned long)buffer;
+	hwev->ds_bts_vaddr = buffer;
+	/* Update the fixmap */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+	ds->bts_buffer_base = (unsigned long) cea;
+	ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
 	ds->bts_index = ds->bts_buffer_base;
-	ds->bts_absolute_maximum = ds->bts_buffer_base +
-		max * BTS_RECORD_SIZE;
-	ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-		thresh * BTS_RECORD_SIZE;
-
+	max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
+	ds->bts_absolute_maximum = ds->bts_buffer_base + max;
+	ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
 	return 0;
 }
 
 static void release_bts_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+	struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+	struct debug_store *ds = hwev->ds;
+	void *cea;
 
 	if (!ds || !x86_pmu.bts)
 		return;
 
-	kfree((void *)(unsigned long)ds->bts_buffer_base);
+	/* Clear the fixmap */
+	cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+	ds_clear_cea(cea, BTS_BUFFER_SIZE);
 	ds->bts_buffer_base = 0;
+	dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
+	hwev->ds_bts_vaddr = NULL;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-	int node = cpu_to_node(cpu);
-	struct debug_store *ds;
-
-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-	if (unlikely(!ds))
-		return -ENOMEM;
+	struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 
+	memset(ds, 0, sizeof(*ds));
 	per_cpu(cpu_hw_events, cpu).ds = ds;
-
 	return 0;
 }
 
 static void release_ds_buffer(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-	if (!ds)
-		return;
-
 	per_cpu(cpu_hw_events, cpu).ds = NULL;
-	kfree(ds);
 }
 
 void release_ds_buffers(void)
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -199,6 +199,8 @@ struct cpu_hw_events {
 	 * Intel DebugStore bits
 	 */
 	struct debug_store	*ds;
+	void			*ds_pebs_vaddr;
+	void			*ds_bts_vaddr;
 	u64			pebs_enabled;
 	int			n_pebs;
 	int			n_large_pebs;

  parent reply	other threads:[~2017-12-20 22:03 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-20 21:35 [patch V181 00/54] x86/pti: Final XMAS release Thomas Gleixner
2017-12-20 21:35 ` [patch V181 01/54] x86/Kconfig: Limit NR_CPUS on 32bit to a sane amount Thomas Gleixner
2017-12-20 21:35 ` [patch V181 02/54] x86/mm/dump_pagetables: Check PAGE_PRESENT for real Thomas Gleixner
2017-12-20 21:35 ` [patch V181 03/54] x86/mm/dump_pagetables: Make the address hints correct and readable Thomas Gleixner
2017-12-20 21:35 ` [patch V181 04/54] x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy Thomas Gleixner
2017-12-20 21:35 ` [patch V181 05/54] x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode Thomas Gleixner
2017-12-20 21:35 ` [patch V181 06/54] arch: Allow arch_dup_mmap() to fail Thomas Gleixner
2017-12-20 21:35 ` [patch V181 07/54] x86/ldt: Rework locking Thomas Gleixner
2017-12-20 21:35 ` [patch V181 08/54] x86/ldt: Prevent ldt inheritance on exec Thomas Gleixner
2017-12-20 21:35 ` [patch V181 09/54] x86/mm/64: Improve the memory map documentation Thomas Gleixner
2017-12-20 21:35 ` [patch V181 10/54] x86/doc: Remove obvious weirdness Thomas Gleixner
2017-12-20 21:35 ` [patch V181 11/54] x86/entry: Remove SYSENTER_stack naming Thomas Gleixner
2017-12-20 21:35 ` [patch V181 12/54] x86/uv: Use the right tlbflush API Thomas Gleixner
2017-12-20 21:35 ` [patch V181 13/54] x86/microcode: Dont abuse the tlbflush interface Thomas Gleixner
2017-12-20 21:35 ` [patch V181 14/54] x86/mm: Use __flush_tlb_one() for kernel memory Thomas Gleixner
2017-12-20 21:35 ` [patch V181 15/54] x86/mm: Remove superfluous barriers Thomas Gleixner
2017-12-20 21:35 ` [patch V181 16/54] x86/mm: Clarify which functions are supposed to flush what Thomas Gleixner
2017-12-20 21:35 ` [patch V181 17/54] x86/mm: Move the CR3 construction functions to tlbflush.h Thomas Gleixner
2017-12-20 21:35 ` [patch V181 18/54] x86/mm: Remove hard-coded ASID limit checks Thomas Gleixner
2017-12-20 21:35 ` [patch V181 19/54] x86/mm: Put MMU to hardware ASID translation in one place Thomas Gleixner
2017-12-20 21:35 ` [patch V181 20/54] x86/mm: Create asm/invpcid.h Thomas Gleixner
2017-12-20 21:35 ` [patch V181 21/54] x86/cpu_entry_area: Move it to a separate unit Thomas Gleixner
2017-12-20 22:29   ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 22/54] x86/cpu_entry_area: Move it out of fixmap Thomas Gleixner
2017-12-22  2:46   ` [V181,22/54] " Andrei Vagin
2017-12-22 13:05     ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 23/54] init: Invoke init_espfix_bsp() from mm_init() Thomas Gleixner
2017-12-20 21:35 ` [patch V181 24/54] x86/cpufeatures: Add X86_BUG_CPU_INSECURE Thomas Gleixner
2017-12-20 21:35 ` [patch V181 25/54] x86/mm/pti: Disable global pages if PAGE_TABLE_ISOLATION=y Thomas Gleixner
2017-12-20 21:35 ` [patch V181 26/54] x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching Thomas Gleixner
2017-12-20 21:35 ` [patch V181 27/54] x86/mm/pti: Add infrastructure for page table isolation Thomas Gleixner
2017-12-20 21:35 ` [patch V181 28/54] x86/mm/pti: Add mapping helper functions Thomas Gleixner
2017-12-20 21:35 ` [patch V181 29/54] x86/mm/pti: Allow NX poison to be set in p4d/pgd Thomas Gleixner
2017-12-20 21:35 ` [patch V181 30/54] x86/mm/pti: Allocate a separate user PGD Thomas Gleixner
2017-12-20 21:35 ` [patch V181 31/54] x86/mm/pti: Populate " Thomas Gleixner
2017-12-20 21:35 ` [patch V181 32/54] x86/mm/pti: Add functions to clone kernel PMDs Thomas Gleixner
2017-12-20 21:35 ` [patch V181 33/54] x86/mm/pti: Force entry through trampoline when PTI active Thomas Gleixner
2017-12-20 21:35 ` [patch V181 34/54] x86/mm/pti: Share cpu_entry_area with user space page tables Thomas Gleixner
2017-12-20 21:35 ` [patch V181 35/54] x86/entry: Align entry text section to PMD boundary Thomas Gleixner
2018-05-17 15:58   ` Josh Poimboeuf
2018-05-18 10:38     ` Thomas Gleixner
2017-12-20 21:35 ` [patch V181 36/54] x86/mm/pti: Share entry text PMD Thomas Gleixner
2017-12-20 21:35 ` [patch V181 37/54] x86/mm/pti: Map ESPFIX into user space Thomas Gleixner
2017-12-20 21:35 ` [patch V181 38/54] x86/cpu_entry_area: Add debugstore entries to cpu_entry_area Thomas Gleixner
2017-12-20 21:35 ` Thomas Gleixner [this message]
2017-12-20 21:35 ` [patch V181 40/54] x86/mm/64: Make a full PGD-entry size hole in the memory map Thomas Gleixner
2017-12-20 21:35 ` [patch V181 41/54] x86/pti: Put the LDT in its own PGD if PTI is on Thomas Gleixner
2017-12-20 21:35 ` [patch V181 42/54] x86/pti: Map the vsyscall page if needed Thomas Gleixner
2017-12-20 21:35 ` [patch V181 43/54] x86/mm: Allow flushing for future ASID switches Thomas Gleixner
2017-12-20 21:35 ` [patch V181 44/54] x86/mm: Abstract switching CR3 Thomas Gleixner
2017-12-20 21:35 ` [patch V181 45/54] x86/mm: Use/Fix PCID to optimize user/kernel switches Thomas Gleixner
2017-12-20 21:35 ` [patch V181 46/54] x86/mm: Optimize RESTORE_CR3 Thomas Gleixner
2017-12-20 21:35 ` [patch V181 47/54] x86/mm: Use INVPCID for __native_flush_tlb_single() Thomas Gleixner
2017-12-20 21:35 ` [patch V181 48/54] x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming Thomas Gleixner
2017-12-20 21:35 ` [patch V181 49/54] x86/dumpstack: Indicate in Oops whether pti is configured and enabled Thomas Gleixner
2017-12-20 22:03   ` Jiri Kosina
2017-12-20 21:35 ` [patch V181 50/54] x86/mm/pti: Add Kconfig Thomas Gleixner
2017-12-20 21:35 ` [patch V181 51/54] x86/mm/dump_pagetables: Add page table directory Thomas Gleixner
2017-12-20 21:35 ` [patch V181 52/54] x86/mm/dump_pagetables: Check user space page table for WX pages Thomas Gleixner
2017-12-20 21:35 ` [patch V181 53/54] x86/mm/dump_pagetables: Allow dumping current pagetables Thomas Gleixner
2017-12-20 21:35 ` [patch V181 54/54] x86/ldt: Make the LDT mapping RO Thomas Gleixner
2017-12-20 23:48 ` [patch V181 00/54] x86/pti: Final XMAS release Thomas Gleixner
2017-12-21 12:57 ` Kirill A. Shutemov
2017-12-21 16:26   ` Kirill A. Shutemov
2017-12-21 18:39     ` Thomas Gleixner
2017-12-21 15:57 ` Boris Ostrovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171220215443.827023196@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=David.Laight@aculab.com \
    --cc=aliguori@amazon.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=bpetkov@suse.de \
    --cc=brgerst@gmail.com \
    --cc=daniel.gruss@iaik.tugraz.at \
    --cc=dave.hansen@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dvlasenk@redhat.com \
    --cc=eduval@amazon.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jgross@suse.com \
    --cc=jpoimboe@redhat.com \
    --cc=keescook@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=vbabka@suse.cz \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox