From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.linuxfoundation.org ([140.211.169.12]:56946 "EHLO mail.linuxfoundation.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750929AbdL3OxN (ORCPT ); Sat, 30 Dec 2017 09:53:13 -0500 Subject: Patch "x86/events/intel/ds: Map debug buffers in cpu_entry_area" has been added to the 4.14-stable tree To: hughd@google.com, David.Laight@aculab.com, boris.ostrovsky@oracle.com, bp@alien8.de, brgerst@gmail.com, dave.hansen@linux.intel.com, dvlasenk@redhat.com, eduval@amazon.com, gregkh@linuxfoundation.org, hpa@zytor.com, jgross@suse.com, jpoimboe@redhat.com, luto@kernel.org, mingo@kernel.org, peterz@infradead.org, tglx@linutronix.de, torvalds@linux-foundation.org, will.deacon@arm.com Cc: , From: Date: Sat, 30 Dec 2017 15:53:10 +0100 Message-ID: <151464559018710@kroah.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ANSI_X3.4-1968 Content-Transfer-Encoding: 8bit Sender: stable-owner@vger.kernel.org List-ID: This is a note to let you know that I've just added the patch titled x86/events/intel/ds: Map debug buffers in cpu_entry_area to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let know about it. >>From c1961a4631daef4aeabee8e368b1b13e8f173c91 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 4 Dec 2017 15:07:50 +0100 Subject: x86/events/intel/ds: Map debug buffers in cpu_entry_area From: Hugh Dickins commit c1961a4631daef4aeabee8e368b1b13e8f173c91 upstream. The BTS and PEBS buffers both have their virtual addresses programmed into the hardware. This means that any access to them is performed via the page tables. The times that the hardware accesses these are entirely dependent on how the performance monitoring hardware events are set up. In other words, there is no way for the kernel to tell when the hardware might access these buffers. To avoid perf crashes, place 'debug_store' allocate pages and map them into the cpu_entry_area. The PEBS fixup buffer does not need this treatment. [ tglx: Got rid of the kaiser_add_mapping() complication ] Signed-off-by: Hugh Dickins Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: keescook@google.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/ds.c | 125 +++++++++++++++++++++++++++---------------- arch/x86/events/perf_event.h | 2 2 files changed, 82 insertions(+), 45 deletions(-) --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -280,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); -static int alloc_pebs_buffer(int cpu) +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + phys_addr_t pa; + size_t msz = 0; + + pa = virt_to_phys(addr); + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, pa, prot); +} + +static void ds_clear_cea(void *cea, size_t size) +{ + size_t msz = 0; + + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); int node = cpu_to_node(cpu); - int max; - void *buffer, *ibuffer; + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int max, node = cpu_to_node(cpu); + void *buffer, *ibuffer, *cea; if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; @@ -301,25 +337,27 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree_pages(buffer, bsiz); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; } - - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_pebs_vaddr = buffer; + /* Update the cpu entry area mapping */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds->pebs_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; return 0; } static void release_pebs_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.pebs) return; @@ -327,73 +365,70 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; - void *buffer; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *buffer, *cea; + int max; if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; } - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds->bts_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); + ds->bts_absolute_maximum = ds->bts_buffer_base + max; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); return 0; } static void release_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds_clear_cea(cea, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; - return 0; } static void release_ds_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -199,6 +199,8 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; Patches currently in stable-queue which might be from hughd@google.com are queue-4.14/x86-mm-pti-allow-nx-poison-to-be-set-in-p4d-pgd.patch queue-4.14/x86-dumpstack-indicate-in-oops-whether-pti-is-configured-and-enabled.patch queue-4.14/x86-mm-clarify-the-whole-asid-kernel-pcid-user-pcid-naming.patch queue-4.14/x86-mm-abstract-switching-cr3.patch queue-4.14/x86-mm-optimize-restore_cr3.patch queue-4.14/x86-mm-pti-force-entry-through-trampoline-when-pti-active.patch queue-4.14/x86-entry-align-entry-text-section-to-pmd-boundary.patch queue-4.14/x86-cpu_entry_area-add-debugstore-entries-to-cpu_entry_area.patch queue-4.14/x86-mm-pti-share-entry-text-pmd.patch queue-4.14/x86-mm-dump_pagetables-check-user-space-page-table-for-wx-pages.patch queue-4.14/x86-mm-pti-disable-global-pages-if-page_table_isolation-y.patch queue-4.14/x86-mm-dump_pagetables-allow-dumping-current-pagetables.patch queue-4.14/x86-mm-pti-add-infrastructure-for-page-table-isolation.patch queue-4.14/x86-cpufeatures-add-x86_bug_cpu_insecure.patch queue-4.14/x86-mm-use-fix-pcid-to-optimize-user-kernel-switches.patch queue-4.14/x86-mm-use-invpcid-for-__native_flush_tlb_single.patch queue-4.14/x86-mm-pti-prepare-the-x86-entry-assembly-code-for-entry-exit-cr3-switching.patch queue-4.14/x86-mm-allow-flushing-for-future-asid-switches.patch queue-4.14/x86-mm-pti-add-kconfig.patch queue-4.14/x86-mm-pti-add-mapping-helper-functions.patch queue-4.14/x86-mm-pti-share-cpu_entry_area-with-user-space-page-tables.patch queue-4.14/x86-mm-pti-add-functions-to-clone-kernel-pmds.patch queue-4.14/x86-pti-add-the-pti-cmdline-option-and-documentation.patch queue-4.14/x86-events-intel-ds-map-debug-buffers-in-cpu_entry_area.patch queue-4.14/x86-mm-pti-allocate-a-separate-user-pgd.patch queue-4.14/x86-mm-pti-populate-user-pgd.patch queue-4.14/x86-mm-dump_pagetables-add-page-table-directory-to-the-debugfs-vfs-hierarchy.patch