stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Toshi Kani <toshi.kani@hp.com>,
	Borislav Petkov <bp@suse.de>,
	Matt Fleming <matt.fleming@intel.com>
Subject: [PATCH 3.14 04/23] x86/efi: Make efi virtual runtime map passing more robust
Date: Fri, 11 Apr 2014 09:11:53 -0700	[thread overview]
Message-ID: <20140411161200.837494322@linuxfoundation.org> (raw)
In-Reply-To: <20140411161200.236939691@linuxfoundation.org>

3.14-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Borislav Petkov <bp@suse.de>

commit b7b898ae0c0a82489511a1ce1b35f26215e6beb5 upstream.

Currently, running SetVirtualAddressMap() and passing the physical
address of the virtual map array was working only by a lucky coincidence
because the memory was present in the EFI page table too. Until Toshi
went and booted this on a big HP box - the krealloc() manner of resizing
the memmap we're doing did allocate from such physical addresses which
were not mapped anymore and boom:

http://lkml.kernel.org/r/1386806463.1791.295.camel@misato.fc.hp.com

One way to take care of that issue is to reimplement the krealloc thing
but with pages. We start with contiguous pages of order 1, i.e. 2 pages,
and when we deplete that memory (shouldn't happen all that often but you
know firmware) we realloc the next power-of-two pages.

Having the pages, it is much more handy and easy to map them into the
EFI page table with the already existing mapping code which we're using
for building the virtual mappings.

Thanks to Toshi Kani and Matt for the great debugging help.

Reported-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/efi.h     |    3 -
 arch/x86/platform/efi/efi.c    |   99 ++++++++++++++++++++++++++++++++---------
 arch/x86/platform/efi/efi_32.c |    7 ++
 arch/x86/platform/efi/efi_64.c |   32 ++++++++++++-
 4 files changed, 115 insertions(+), 26 deletions(-)

--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -130,7 +130,8 @@ extern void efi_memory_uc(u64 addr, unsi
 extern void __init efi_map_region(efi_memory_desc_t *md);
 extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
 extern void efi_sync_low_kernel_mappings(void);
-extern void efi_setup_page_tables(void);
+extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
+extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
 extern void __init old_map_region(efi_memory_desc_t *md);
 extern void __init runtime_code_page_mkexec(void);
 extern void __init efi_runtime_mkexec(void);
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -939,14 +939,36 @@ static void __init efi_map_regions_fixed
 
 }
 
+static void *realloc_pages(void *old_memmap, int old_shift)
+{
+	void *ret;
+
+	ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
+	if (!ret)
+		goto out;
+
+	/*
+	 * A first-time allocation doesn't have anything to copy.
+	 */
+	if (!old_memmap)
+		return ret;
+
+	memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
+
+out:
+	free_pages((unsigned long)old_memmap, old_shift);
+	return ret;
+}
+
 /*
- * Map efi memory ranges for runtime serivce and update new_memmap with virtual
- * addresses.
+ * Map the efi memory ranges of the runtime services and update new_mmap with
+ * virtual addresses.
  */
-static void * __init efi_map_regions(int *count)
+static void * __init efi_map_regions(int *count, int *pg_shift)
 {
+	void *p, *new_memmap = NULL;
+	unsigned long left = 0;
 	efi_memory_desc_t *md;
-	void *p, *tmp, *new_memmap = NULL;
 
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
@@ -961,20 +983,23 @@ static void * __init efi_map_regions(int
 		efi_map_region(md);
 		get_systab_virt_addr(md);
 
-		tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
-			       GFP_KERNEL);
-		if (!tmp)
-			goto out;
-		new_memmap = tmp;
+		if (left < memmap.desc_size) {
+			new_memmap = realloc_pages(new_memmap, *pg_shift);
+			if (!new_memmap)
+				return NULL;
+
+			left += PAGE_SIZE << *pg_shift;
+			(*pg_shift)++;
+		}
+
 		memcpy(new_memmap + (*count * memmap.desc_size), md,
 		       memmap.desc_size);
+
+		left -= memmap.desc_size;
 		(*count)++;
 	}
 
 	return new_memmap;
-out:
-	kfree(new_memmap);
-	return NULL;
 }
 
 /*
@@ -1000,9 +1025,9 @@ out:
  */
 void __init efi_enter_virtual_mode(void)
 {
-	efi_status_t status;
+	int err, count = 0, pg_shift = 0;
 	void *new_memmap = NULL;
-	int err, count = 0;
+	efi_status_t status;
 
 	efi.systab = NULL;
 
@@ -1019,20 +1044,24 @@ void __init efi_enter_virtual_mode(void)
 		efi_map_regions_fixed();
 	} else {
 		efi_merge_regions();
-		new_memmap = efi_map_regions(&count);
+		new_memmap = efi_map_regions(&count, &pg_shift);
 		if (!new_memmap) {
 			pr_err("Error reallocating memory, EFI runtime non-functional!\n");
 			return;
 		}
-	}
 
-	err = save_runtime_map();
-	if (err)
-		pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+		err = save_runtime_map();
+		if (err)
+			pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+	}
 
 	BUG_ON(!efi.systab);
 
-	efi_setup_page_tables();
+	if (!efi_setup) {
+		if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
+			return;
+	}
+
 	efi_sync_low_kernel_mappings();
 
 	if (!efi_setup) {
@@ -1072,7 +1101,35 @@ void __init efi_enter_virtual_mode(void)
 
 	efi_runtime_mkexec();
 
-	kfree(new_memmap);
+
+	/*
+	 * We mapped the descriptor array into the EFI pagetable above but we're
+	 * not unmapping it here. Here's why:
+	 *
+	 * We're copying select PGDs from the kernel page table to the EFI page
+	 * table and when we do so and make changes to those PGDs like unmapping
+	 * stuff from them, those changes appear in the kernel page table and we
+	 * go boom.
+	 *
+	 * From setup_real_mode():
+	 *
+	 * ...
+	 * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+	 *
+	 * In this particular case, our allocation is in PGD 0 of the EFI page
+	 * table but we've copied that PGD from PGD[272] of the EFI page table:
+	 *
+	 *	pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
+	 *
+	 * where the direct memory mapping in kernel space is.
+	 *
+	 * new_memmap's VA comes from that direct mapping and thus clearing it,
+	 * it would get cleared in the kernel page table too.
+	 *
+	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
+	 */
+	if (!efi_setup)
+		free_pages((unsigned long)new_memmap, pg_shift);
 
 	/* clean DUMMY object */
 	efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -40,7 +40,12 @@
 static unsigned long efi_rt_eflags;
 
 void efi_sync_low_kernel_mappings(void) {}
-void efi_setup_page_tables(void) {}
+void __init efi_dump_pagetable(void) {}
+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+	return 0;
+}
+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
 
 void __init efi_map_region(efi_memory_desc_t *md)
 {
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void)
 		sizeof(pgd_t) * num_pgds);
 }
 
-void efi_setup_page_tables(void)
+int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
+	pgd_t *pgd;
+
+	if (efi_enabled(EFI_OLD_MEMMAP))
+		return 0;
+
 	efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
+	pgd = __va(efi_scratch.efi_pgt);
+
+	/*
+	 * It can happen that the physical address of new_memmap lands in memory
+	 * which is not mapped in the EFI page table. Therefore we need to go
+	 * and ident-map those pages containing the map before calling
+	 * phys_efi_set_virtual_address_map().
+	 */
+	if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {
+		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
+		return 1;
+	}
+
+	efi_scratch.use_pgd = true;
+
+
+	return 0;
+}
+
+void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+{
+	pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
 
-	if (!efi_enabled(EFI_OLD_MEMMAP))
-		efi_scratch.use_pgd = true;
+	kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
 }
 
 static void __init __map_region(efi_memory_desc_t *md, u64 va)



  parent reply	other threads:[~2014-04-11 16:11 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-11 16:11 [PATCH 3.14 00/23] 3.14.1-stable review Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 01/23] Revert "ALSA: hda - Increment default stream numbers for AMD HDMI controllers" Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 02/23] selinux: correctly label /proc inodes in use before the policy is loaded Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 03/23] x86, pageattr: Export page unmapping interface Greg Kroah-Hartman
2014-04-11 16:11 ` Greg Kroah-Hartman [this message]
2014-04-11 16:11 ` [PATCH 3.14 05/23] futex: avoid race between requeue and wake Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 06/23] xen-netback: remove pointless clause from if statement Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 07/23] xen-netback: worse-case estimate in xenvif_rx_action is underestimating Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 08/23] xen-netback: BUG_ON in xenvif_rx_action() not catching overflow Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 09/23] ipv6: some ipv6 statistic counters failed to disable bh Greg Kroah-Hartman
2014-04-11 16:11 ` [PATCH 3.14 10/23] netlink: dont compare the nul-termination in nla_strcmp Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 11/23] xen-netback: disable rogue vif in kthread context Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 12/23] Call efx_set_channels() before efx->type->dimension_resources() Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 13/23] net: vxlan: fix crash when interface is created with no group Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 14/23] isdnloop: Validate NUL-terminated strings from user Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 15/23] isdnloop: several buffer overflows Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 16/23] rds: prevent dereference of a NULL device in rds_iw_laddr_check Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 17/23] net/at91_ether: avoid NULL pointer dereference Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 18/23] iwlwifi: mvm: rs: fix search cycle rules Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 19/23] ARC: [nsimosci] Change .dts to use generic 8250 UART Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 20/23] ARC: [nsimosci] Unbork console Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 21/23] futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 22/23] m68k: Skip " Greg Kroah-Hartman
2014-04-11 16:12 ` [PATCH 3.14 23/23] crypto: ghash-clmulni-intel - use C implementation for setkey() Greg Kroah-Hartman
2014-04-11 21:49 ` [PATCH 3.14 00/23] 3.14.1-stable review Guenter Roeck
2014-04-12  1:58   ` Greg Kroah-Hartman
2014-04-11 23:46 ` Shuah Khan
2014-04-12  1:59   ` Greg Kroah-Hartman
2014-04-12  5:38   ` Satoru Takeuchi
2014-04-12 15:22     ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140411161200.837494322@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=bp@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=stable@vger.kernel.org \
    --cc=toshi.kani@hp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).