xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Kiper <daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: andrew.cooper3-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org,
	hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org,
	jbeulich-IBi9RG/b67k@public.gmane.org,
	konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
	maxim.uvarov-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org,
	mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org,
	vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	kexec-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	virtualization-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	xen-devel-GuqFBffKawuULHF6PoxzQEEOCMrvLtNR@public.gmane.org
Cc: Daniel Kiper <daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Subject: [PATCH v3 01/11] kexec: introduce kexec firmware support
Date: Thu, 27 Dec 2012 03:18:50 +0100	[thread overview]
Message-ID: <1356574740-6806-2-git-send-email-daniel.kiper@oracle.com> (raw)
In-Reply-To: <1356574740-6806-1-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>

Some kexec/kdump implementations (e.g. Xen PVOPS) could not use default
Linux infrastructure and require some support from firmware and/or hypervisor.
To cope with that problem kexec firmware infrastructure was introduced.
It allows a developer to use all kexec/kdump features of given firmware
or hypervisor.

v3 - suggestions/fixes:
   - replace kexec_ops struct by kexec firmware infrastructure
     (suggested by Eric Biederman).

v2 - suggestions/fixes:
   - add comment for kexec_ops.crash_alloc_temp_store member
     (suggested by Konrad Rzeszutek Wilk),
   - simplify kexec_ops usage
     (suggested by Konrad Rzeszutek Wilk).

Signed-off-by: Daniel Kiper <daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
---
 include/linux/kexec.h   |   26 ++-
 kernel/Makefile         |    1 +
 kernel/kexec-firmware.c |  743 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kexec.c          |   46 +++-
 4 files changed, 809 insertions(+), 7 deletions(-)
 create mode 100644 kernel/kexec-firmware.c

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d0b8458..9568457 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -116,17 +116,34 @@ struct kimage {
 #endif
 };
 
-
-
 /* kexec interface functions */
 extern void machine_kexec(struct kimage *image);
 extern int machine_kexec_prepare(struct kimage *image);
 extern void machine_kexec_cleanup(struct kimage *image);
+extern struct page *mf_kexec_kimage_alloc_pages(gfp_t gfp_mask,
+						unsigned int order,
+						unsigned long limit);
+extern void mf_kexec_kimage_free_pages(struct page *page);
+extern unsigned long mf_kexec_page_to_pfn(struct page *page);
+extern struct page *mf_kexec_pfn_to_page(unsigned long mfn);
+extern unsigned long mf_kexec_virt_to_phys(volatile void *address);
+extern void *mf_kexec_phys_to_virt(unsigned long address);
+extern int mf_kexec_prepare(struct kimage *image);
+extern int mf_kexec_load(struct kimage *image);
+extern void mf_kexec_cleanup(struct kimage *image);
+extern void mf_kexec_unload(struct kimage *image);
+extern void mf_kexec_shutdown(void);
+extern void mf_kexec(struct kimage *image);
 extern asmlinkage long sys_kexec_load(unsigned long entry,
 					unsigned long nr_segments,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
+extern long firmware_sys_kexec_load(unsigned long entry,
+					unsigned long nr_segments,
+					struct kexec_segment __user *segments,
+					unsigned long flags);
 extern int kernel_kexec(void);
+extern int firmware_kernel_kexec(void);
 #ifdef CONFIG_COMPAT
 extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
 				unsigned long nr_segments,
@@ -135,7 +152,10 @@ extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
 #endif
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
+extern struct page *firmware_kimage_alloc_control_pages(struct kimage *image,
+							unsigned int order);
 extern void crash_kexec(struct pt_regs *);
+extern void firmware_crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
 void crash_save_vmcoreinfo(void);
@@ -168,6 +188,8 @@ unsigned long paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
+extern bool kexec_use_firmware;
+
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c072b6..bc96b2f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificat
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
+obj-$(CONFIG_KEXEC_FIRMWARE) += kexec-firmware.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
diff --git a/kernel/kexec-firmware.c b/kernel/kexec-firmware.c
new file mode 100644
index 0000000..f6ddd4c
--- /dev/null
+++ b/kernel/kexec-firmware.c
@@ -0,0 +1,743 @@
+/*
+ * Copyright (C) 2002-2004 Eric Biederman  <ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org>
+ * Copyright (C) 2012 Daniel Kiper, Oracle Corporation
+ *
+ * Most of the code here is a copy of kernel/kexec.c.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/errno.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * KIMAGE_NO_DEST is an impossible destination address..., for
+ * allocating pages whose destination address we do not care about.
+ */
+#define KIMAGE_NO_DEST (-1UL)
+
+static int kimage_is_destination_range(struct kimage *image,
+				       unsigned long start, unsigned long end);
+static struct page *kimage_alloc_page(struct kimage *image,
+				       gfp_t gfp_mask,
+				       unsigned long dest);
+
+static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
+	                    unsigned long nr_segments,
+                            struct kexec_segment __user *segments)
+{
+	size_t segment_bytes;
+	struct kimage *image;
+	unsigned long i;
+	int result;
+
+	/* Allocate a controlling structure */
+	result = -ENOMEM;
+	image = kzalloc(sizeof(*image), GFP_KERNEL);
+	if (!image)
+		goto out;
+
+	image->head = 0;
+	image->entry = &image->head;
+	image->last_entry = &image->head;
+	image->control_page = ~0; /* By default this does not apply */
+	image->start = entry;
+	image->type = KEXEC_TYPE_DEFAULT;
+
+	/* Initialize the list of control pages */
+	INIT_LIST_HEAD(&image->control_pages);
+
+	/* Initialize the list of destination pages */
+	INIT_LIST_HEAD(&image->dest_pages);
+
+	/* Initialize the list of unusable pages */
+	INIT_LIST_HEAD(&image->unuseable_pages);
+
+	/* Read in the segments */
+	image->nr_segments = nr_segments;
+	segment_bytes = nr_segments * sizeof(*segments);
+	result = copy_from_user(image->segment, segments, segment_bytes);
+	if (result) {
+		result = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * Verify we have good destination addresses.  The caller is
+	 * responsible for making certain we don't attempt to load
+	 * the new image into invalid or reserved areas of RAM.  This
+	 * just verifies it is an address we can use.
+	 *
+	 * Since the kernel does everything in page size chunks ensure
+	 * the destination addresses are page aligned.  Too many
+	 * special cases crop of when we don't do this.  The most
+	 * insidious is getting overlapping destination addresses
+	 * simply because addresses are changed to page size
+	 * granularity.
+	 */
+	result = -EADDRNOTAVAIL;
+	for (i = 0; i < nr_segments; i++) {
+		unsigned long mstart, mend;
+
+		mstart = image->segment[i].mem;
+		mend   = mstart + image->segment[i].memsz;
+		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
+			goto out;
+		if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
+			goto out;
+	}
+
+	/* Verify our destination addresses do not overlap.
+	 * If we alloed overlapping destination addresses
+	 * through very weird things can happen with no
+	 * easy explanation as one segment stops on another.
+	 */
+	result = -EINVAL;
+	for (i = 0; i < nr_segments; i++) {
+		unsigned long mstart, mend;
+		unsigned long j;
+
+		mstart = image->segment[i].mem;
+		mend   = mstart + image->segment[i].memsz;
+		for (j = 0; j < i; j++) {
+			unsigned long pstart, pend;
+			pstart = image->segment[j].mem;
+			pend   = pstart + image->segment[j].memsz;
+			/* Do the segments overlap ? */
+			if ((mend > pstart) && (mstart < pend))
+				goto out;
+		}
+	}
+
+	/* Ensure our buffer sizes are strictly less than
+	 * our memory sizes.  This should always be the case,
+	 * and it is easier to check up front than to be surprised
+	 * later on.
+	 */
+	result = -EINVAL;
+	for (i = 0; i < nr_segments; i++) {
+		if (image->segment[i].bufsz > image->segment[i].memsz)
+			goto out;
+	}
+
+	result = 0;
+out:
+	if (result == 0)
+		*rimage = image;
+	else
+		kfree(image);
+
+	return result;
+
+}
+
+static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
+				unsigned long nr_segments,
+				struct kexec_segment __user *segments)
+{
+	int result;
+	struct kimage *image;
+
+	/* Allocate and initialize a controlling structure */
+	image = NULL;
+	result = do_kimage_alloc(&image, entry, nr_segments, segments);
+	if (result)
+		goto out;
+
+	*rimage = image;
+
+	/*
+	 * Find a location for the control code buffer, and add it
+	 * the vector of segments so that it's pages will also be
+	 * counted as destination pages.
+	 */
+	result = -ENOMEM;
+	image->control_code_page = firmware_kimage_alloc_control_pages(image,
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
+	if (!image->control_code_page) {
+		printk(KERN_ERR "Could not allocate control_code_buffer\n");
+		goto out;
+	}
+
+	image->swap_page = firmware_kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		printk(KERN_ERR "Could not allocate swap buffer\n");
+		goto out;
+	}
+
+	result = 0;
+ out:
+	if (result == 0)
+		*rimage = image;
+	else
+		kfree(image);
+
+	return result;
+}
+
+static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
+				unsigned long nr_segments,
+				struct kexec_segment __user *segments)
+{
+	int result;
+	struct kimage *image;
+	unsigned long i;
+
+	image = NULL;
+	/* Verify we have a valid entry point */
+	if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
+		result = -EADDRNOTAVAIL;
+		goto out;
+	}
+
+	/* Allocate and initialize a controlling structure */
+	result = do_kimage_alloc(&image, entry, nr_segments, segments);
+	if (result)
+		goto out;
+
+	/* Enable the special crash kernel control page
+	 * allocation policy.
+	 */
+	image->control_page = crashk_res.start;
+	image->type = KEXEC_TYPE_CRASH;
+
+	/*
+	 * Verify we have good destination addresses.  Normally
+	 * the caller is responsible for making certain we don't
+	 * attempt to load the new image into invalid or reserved
+	 * areas of RAM.  But crash kernels are preloaded into a
+	 * reserved area of ram.  We must ensure the addresses
+	 * are in the reserved area otherwise preloading the
+	 * kernel could corrupt things.
+	 */
+	result = -EADDRNOTAVAIL;
+	for (i = 0; i < nr_segments; i++) {
+		unsigned long mstart, mend;
+
+		mstart = image->segment[i].mem;
+		mend = mstart + image->segment[i].memsz - 1;
+		/* Ensure we are within the crash kernel limits */
+		if ((mstart < crashk_res.start) || (mend > crashk_res.end))
+			goto out;
+	}
+
+	/*
+	 * Find a location for the control code buffer, and add
+	 * the vector of segments so that it's pages will also be
+	 * counted as destination pages.
+	 */
+	result = -ENOMEM;
+	image->control_code_page = firmware_kimage_alloc_control_pages(image,
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
+	if (!image->control_code_page) {
+		printk(KERN_ERR "Could not allocate control_code_buffer\n");
+		goto out;
+	}
+
+	result = 0;
+out:
+	if (result == 0)
+		*rimage = image;
+	else
+		kfree(image);
+
+	return result;
+}
+
+static int kimage_is_destination_range(struct kimage *image,
+					unsigned long start,
+					unsigned long end)
+{
+	unsigned long i;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		unsigned long mstart, mend;
+
+		mstart = image->segment[i].mem;
+		mend = mstart + image->segment[i].memsz;
+		if ((end > mstart) && (start < mend))
+			return 1;
+	}
+
+	return 0;
+}
+
+static void kimage_free_page_list(struct list_head *list)
+{
+	struct list_head *pos, *next;
+
+	list_for_each_safe(pos, next, list) {
+		struct page *page;
+
+		page = list_entry(pos, struct page, lru);
+		list_del(&page->lru);
+		mf_kexec_kimage_free_pages(page);
+	}
+}
+
+static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
+							unsigned int order)
+{
+	/* Control pages are special, they are the intermediaries
+	 * that are needed while we copy the rest of the pages
+	 * to their final resting place.  As such they must
+	 * not conflict with either the destination addresses
+	 * or memory the kernel is already using.
+	 *
+	 * The only case where we really need more than one of
+	 * these are for architectures where we cannot disable
+	 * the MMU and must instead generate an identity mapped
+	 * page table for all of the memory.
+	 *
+	 * At worst this runs in O(N) of the image size.
+	 */
+	struct list_head extra_pages;
+	struct page *pages;
+	unsigned int count;
+
+	count = 1 << order;
+	INIT_LIST_HEAD(&extra_pages);
+
+	/* Loop while I can allocate a page and the page allocated
+	 * is a destination page.
+	 */
+	do {
+		unsigned long pfn, epfn, addr, eaddr;
+
+		pages = mf_kexec_kimage_alloc_pages(GFP_KERNEL, order,
+							KEXEC_CONTROL_MEMORY_LIMIT);
+		if (!pages)
+			break;
+		pfn   = mf_kexec_page_to_pfn(pages);
+		epfn  = pfn + count;
+		addr  = pfn << PAGE_SHIFT;
+		eaddr = epfn << PAGE_SHIFT;
+		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
+			      kimage_is_destination_range(image, addr, eaddr)) {
+			list_add(&pages->lru, &extra_pages);
+			pages = NULL;
+		}
+	} while (!pages);
+
+	if (pages) {
+		/* Remember the allocated page... */
+		list_add(&pages->lru, &image->control_pages);
+
+		/* Because the page is already in it's destination
+		 * location we will never allocate another page at
+		 * that address.  Therefore mf_kexec_kimage_alloc_pages
+		 * will not return it (again) and we don't need
+		 * to give it an entry in image->segment[].
+		 */
+	}
+	/* Deal with the destination pages I have inadvertently allocated.
+	 *
+	 * Ideally I would convert multi-page allocations into single
+	 * page allocations, and add everything to image->dest_pages.
+	 *
+	 * For now it is simpler to just free the pages.
+	 */
+	kimage_free_page_list(&extra_pages);
+
+	return pages;
+}
+
+struct page *firmware_kimage_alloc_control_pages(struct kimage *image,
+							unsigned int order)
+{
+	return kimage_alloc_normal_control_pages(image, order);
+}
+
+static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
+{
+	if (*image->entry != 0)
+		image->entry++;
+
+	if (image->entry == image->last_entry) {
+		kimage_entry_t *ind_page;
+		struct page *page;
+
+		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
+		if (!page)
+			return -ENOMEM;
+
+		ind_page = page_address(page);
+		*image->entry = mf_kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
+		image->entry = ind_page;
+		image->last_entry = ind_page +
+				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
+	}
+	*image->entry = entry;
+	image->entry++;
+	*image->entry = 0;
+
+	return 0;
+}
+
+static int kimage_set_destination(struct kimage *image,
+				   unsigned long destination)
+{
+	int result;
+
+	destination &= PAGE_MASK;
+	result = kimage_add_entry(image, destination | IND_DESTINATION);
+	if (result == 0)
+		image->destination = destination;
+
+	return result;
+}
+
+
+static int kimage_add_page(struct kimage *image, unsigned long page)
+{
+	int result;
+
+	page &= PAGE_MASK;
+	result = kimage_add_entry(image, page | IND_SOURCE);
+	if (result == 0)
+		image->destination += PAGE_SIZE;
+
+	return result;
+}
+
+
+static void kimage_free_extra_pages(struct kimage *image)
+{
+	/* Walk through and free any extra destination pages I may have */
+	kimage_free_page_list(&image->dest_pages);
+
+	/* Walk through and free any unusable pages I have cached */
+	kimage_free_page_list(&image->unuseable_pages);
+
+}
+static void kimage_terminate(struct kimage *image)
+{
+	if (*image->entry != 0)
+		image->entry++;
+
+	*image->entry = IND_DONE;
+}
+
+#define for_each_kimage_entry(image, ptr, entry) \
+	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
+		ptr = (entry & IND_INDIRECTION)? \
+			mf_kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
+
+static void kimage_free_entry(kimage_entry_t entry)
+{
+	struct page *page;
+
+	page = mf_kexec_pfn_to_page(entry >> PAGE_SHIFT);
+	mf_kexec_kimage_free_pages(page);
+}
+
+static void kimage_free(struct kimage *image)
+{
+	kimage_entry_t *ptr, entry;
+	kimage_entry_t ind = 0;
+
+	if (!image)
+		return;
+
+	kimage_free_extra_pages(image);
+	for_each_kimage_entry(image, ptr, entry) {
+		if (entry & IND_INDIRECTION) {
+			/* Free the previous indirection page */
+			if (ind & IND_INDIRECTION)
+				kimage_free_entry(ind);
+			/* Save this indirection page until we are
+			 * done with it.
+			 */
+			ind = entry;
+		}
+		else if (entry & IND_SOURCE)
+			kimage_free_entry(entry);
+	}
+	/* Free the final indirection page */
+	if (ind & IND_INDIRECTION)
+		kimage_free_entry(ind);
+
+	/* Handle any machine specific cleanup */
+	mf_kexec_cleanup(image);
+
+	/* Free the kexec control pages... */
+	kimage_free_page_list(&image->control_pages);
+	kfree(image);
+}
+
+static kimage_entry_t *kimage_dst_used(struct kimage *image,
+					unsigned long page)
+{
+	kimage_entry_t *ptr, entry;
+	unsigned long destination = 0;
+
+	for_each_kimage_entry(image, ptr, entry) {
+		if (entry & IND_DESTINATION)
+			destination = entry & PAGE_MASK;
+		else if (entry & IND_SOURCE) {
+			if (page == destination)
+				return ptr;
+			destination += PAGE_SIZE;
+		}
+	}
+
+	return NULL;
+}
+
+static struct page *kimage_alloc_page(struct kimage *image,
+					gfp_t gfp_mask,
+					unsigned long destination)
+{
+	/*
+	 * Here we implement safeguards to ensure that a source page
+	 * is not copied to its destination page before the data on
+	 * the destination page is no longer useful.
+	 *
+	 * To do this we maintain the invariant that a source page is
+	 * either its own destination page, or it is not a
+	 * destination page at all.
+	 *
+	 * That is slightly stronger than required, but the proof
+	 * that no problems will not occur is trivial, and the
+	 * implementation is simply to verify.
+	 *
+	 * When allocating all pages normally this algorithm will run
+	 * in O(N) time, but in the worst case it will run in O(N^2)
+	 * time.   If the runtime is a problem the data structures can
+	 * be fixed.
+	 */
+	struct page *page;
+	unsigned long addr;
+
+	/*
+	 * Walk through the list of destination pages, and see if I
+	 * have a match.
+	 */
+	list_for_each_entry(page, &image->dest_pages, lru) {
+		addr = mf_kexec_page_to_pfn(page) << PAGE_SHIFT;
+		if (addr == destination) {
+			list_del(&page->lru);
+			return page;
+		}
+	}
+	page = NULL;
+	while (1) {
+		kimage_entry_t *old;
+
+		/* Allocate a page, if we run out of memory give up */
+		page = mf_kexec_kimage_alloc_pages(gfp_mask, 0,
+							KEXEC_SOURCE_MEMORY_LIMIT);
+		if (!page)
+			return NULL;
+		/* If the page cannot be used file it away */
+		if (mf_kexec_page_to_pfn(page) >
+				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
+			list_add(&page->lru, &image->unuseable_pages);
+			continue;
+		}
+		addr = mf_kexec_page_to_pfn(page) << PAGE_SHIFT;
+
+		/* If it is the destination page we want use it */
+		if (addr == destination)
+			break;
+
+		/* If the page is not a destination page use it */
+		if (!kimage_is_destination_range(image, addr,
+						  addr + PAGE_SIZE))
+			break;
+
+		/*
+		 * I know that the page is someones destination page.
+		 * See if there is already a source page for this
+		 * destination page.  And if so swap the source pages.
+		 */
+		old = kimage_dst_used(image, addr);
+		if (old) {
+			/* If so move it */
+			unsigned long old_addr;
+			struct page *old_page;
+
+			old_addr = *old & PAGE_MASK;
+			old_page = mf_kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
+			copy_highpage(page, old_page);
+			*old = addr | (*old & ~PAGE_MASK);
+
+			/* The old page I have found cannot be a
+			 * destination page, so return it if it's
+			 * gfp_flags honor the ones passed in.
+			 */
+			if (!(gfp_mask & __GFP_HIGHMEM) &&
+			    PageHighMem(old_page)) {
+				mf_kexec_kimage_free_pages(old_page);
+				continue;
+			}
+			addr = old_addr;
+			page = old_page;
+			break;
+		}
+		else {
+			/* Place the page on the destination list I
+			 * will use it later.
+			 */
+			list_add(&page->lru, &image->dest_pages);
+		}
+	}
+
+	return page;
+}
+
+static int kimage_load_normal_segment(struct kimage *image,
+					 struct kexec_segment *segment)
+{
+	unsigned long maddr;
+	unsigned long ubytes, mbytes;
+	int result;
+	unsigned char __user *buf;
+
+	result = 0;
+	buf = segment->buf;
+	ubytes = segment->bufsz;
+	mbytes = segment->memsz;
+	maddr = segment->mem;
+
+	result = kimage_set_destination(image, maddr);
+	if (result < 0)
+		goto out;
+
+	while (mbytes) {
+		struct page *page;
+		char *ptr;
+		size_t uchunk, mchunk;
+
+		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
+		if (!page) {
+			result  = -ENOMEM;
+			goto out;
+		}
+		result = kimage_add_page(image, mf_kexec_page_to_pfn(page)
+								<< PAGE_SHIFT);
+		if (result < 0)
+			goto out;
+
+		ptr = kmap(page);
+		/* Start with a clear page */
+		clear_page(ptr);
+		ptr += maddr & ~PAGE_MASK;
+		mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
+		if (mchunk > mbytes)
+			mchunk = mbytes;
+
+		uchunk = mchunk;
+		if (uchunk > ubytes)
+			uchunk = ubytes;
+
+		result = copy_from_user(ptr, buf, uchunk);
+		kunmap(page);
+		if (result) {
+			result = -EFAULT;
+			goto out;
+		}
+		ubytes -= uchunk;
+		maddr  += mchunk;
+		buf    += mchunk;
+		mbytes -= mchunk;
+	}
+out:
+	return result;
+}
+
+static int kimage_load_segment(struct kimage *image,
+				struct kexec_segment *segment)
+{
+	return kimage_load_normal_segment(image, segment);
+}
+
+long firmware_sys_kexec_load(unsigned long entry, unsigned long nr_segments,
+				struct kexec_segment __user *segments,
+				unsigned long flags)
+{
+	struct kimage **dest_image, *image = NULL;
+	int result = 0;
+
+	dest_image = &kexec_image;
+	if (flags & KEXEC_ON_CRASH)
+		dest_image = &kexec_crash_image;
+	if (nr_segments > 0) {
+		unsigned long i;
+
+		/* Loading another kernel to reboot into */
+		if ((flags & KEXEC_ON_CRASH) == 0)
+			result = kimage_normal_alloc(&image, entry,
+							nr_segments, segments);
+		/* Loading another kernel to switch to if this one crashes */
+		else if (flags & KEXEC_ON_CRASH) {
+			/* Free any current crash dump kernel before
+			 * we corrupt it.
+			 */
+			mf_kexec_unload(image);
+			kimage_free(xchg(&kexec_crash_image, NULL));
+			result = kimage_crash_alloc(&image, entry,
+						     nr_segments, segments);
+		}
+		if (result)
+			goto out;
+
+		if (flags & KEXEC_PRESERVE_CONTEXT)
+			image->preserve_context = 1;
+		result = mf_kexec_prepare(image);
+		if (result)
+			goto out;
+
+		for (i = 0; i < nr_segments; i++) {
+			result = kimage_load_segment(image, &image->segment[i]);
+			if (result)
+				goto out;
+		}
+		kimage_terminate(image);
+	}
+
+	result = mf_kexec_load(image);
+
+	if (result)
+		goto out;
+
+	/* Install the new kernel, and  Uninstall the old */
+	image = xchg(dest_image, image);
+
+out:
+	mf_kexec_unload(image);
+
+	kimage_free(image);
+
+	return result;
+}
+
+void firmware_crash_kexec(struct pt_regs *regs)
+{
+	struct pt_regs fixed_regs;
+
+	crash_setup_regs(&fixed_regs, regs);
+	crash_save_vmcoreinfo();
+	machine_crash_shutdown(&fixed_regs);
+	mf_kexec(kexec_crash_image);
+}
+
+int firmware_kernel_kexec(void)
+{
+	kernel_restart_prepare(NULL);
+	printk(KERN_EMERG "Starting new kernel\n");
+	mf_kexec_shutdown();
+	mf_kexec(kexec_image);
+
+	return 0;
+}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5e4bd78..9f3b6cb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -38,6 +38,10 @@
 #include <asm/io.h>
 #include <asm/sections.h>
 
+#ifdef CONFIG_KEXEC_FIRMWARE
+bool kexec_use_firmware = false;
+#endif
+
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
 
@@ -924,7 +928,7 @@ static int kimage_load_segment(struct kimage *image,
  *   the devices in a consistent state so a later kernel can
  *   reinitialize them.
  *
- * - A machine specific part that includes the syscall number
+ * - A machine/firmware specific part that includes the syscall number
  *   and the copies the image to it's final destination.  And
  *   jumps into the image at entry.
  *
@@ -978,6 +982,17 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 	if (!mutex_trylock(&kexec_mutex))
 		return -EBUSY;
 
+#ifdef CONFIG_KEXEC_FIRMWARE
+	if (kexec_use_firmware) {
+		result = firmware_sys_kexec_load(entry, nr_segments,
+							segments, flags);
+
+		mutex_unlock(&kexec_mutex);
+
+		return result;
+	}
+#endif
+
 	dest_image = &kexec_image;
 	if (flags & KEXEC_ON_CRASH)
 		dest_image = &kexec_crash_image;
@@ -1091,10 +1106,17 @@ void crash_kexec(struct pt_regs *regs)
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
 
-			crash_setup_regs(&fixed_regs, regs);
-			crash_save_vmcoreinfo();
-			machine_crash_shutdown(&fixed_regs);
-			machine_kexec(kexec_crash_image);
+#ifdef CONFIG_KEXEC_FIRMWARE
+			if (kexec_use_firmware)
+				firmware_crash_kexec(regs);
+			else
+#endif
+			{
+				crash_setup_regs(&fixed_regs, regs);
+				crash_save_vmcoreinfo();
+				machine_crash_shutdown(&fixed_regs);
+				machine_kexec(kexec_crash_image);
+			}
 		}
 		mutex_unlock(&kexec_mutex);
 	}
@@ -1132,6 +1154,13 @@ int crash_shrink_memory(unsigned long new_size)
 
 	mutex_lock(&kexec_mutex);
 
+#ifdef CONFIG_KEXEC_FIRMWARE
+	if (kexec_use_firmware) {
+		ret = -ENOSYS;
+		goto unlock;
+	}
+#endif
+
 	if (kexec_crash_image) {
 		ret = -ENOENT;
 		goto unlock;
@@ -1536,6 +1565,13 @@ int kernel_kexec(void)
 		goto Unlock;
 	}
 
+#ifdef CONFIG_KEXEC_FIRMWARE
+	if (kexec_use_firmware) {
+		error = firmware_kernel_kexec();
+		goto Unlock;
+	}
+#endif
+
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
 		lock_system_sleep();
-- 
1.5.6.5

  parent reply	other threads:[~2012-12-27  2:18 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-27  2:18 [PATCH v3 00/11] xen: Initial kexec/kdump implementation Daniel Kiper
     [not found] ` <1356574740-6806-1-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18   ` Daniel Kiper [this message]
     [not found]     ` <1356574740-6806-2-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18       ` [PATCH v3 02/11] x86/kexec: Add extra pointers to transition page table PGD, PUD, PMD and PTE Daniel Kiper
     [not found]         ` <1356574740-6806-3-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18           ` [PATCH v3 03/11] xen: Introduce architecture independent data for kexec/kdump Daniel Kiper
     [not found]             ` <1356574740-6806-4-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18               ` [PATCH v3 04/11] x86/xen: Introduce architecture dependent " Daniel Kiper
     [not found]                 ` <1356574740-6806-5-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18                   ` [PATCH v3 05/11] x86/xen: Register resources required by kexec-tools Daniel Kiper
     [not found]                     ` <1356574740-6806-6-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18                       ` [PATCH v3 06/11] x86/xen: Add i386 kexec/kdump implementation Daniel Kiper
     [not found]                         ` <1356574740-6806-7-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18                           ` [PATCH v3 07/11] x86/xen: Add x86_64 " Daniel Kiper
     [not found]                             ` <1356574740-6806-8-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18                               ` [PATCH v3 08/11] x86/xen: Add kexec/kdump Kconfig and makefile rules Daniel Kiper
     [not found]                                 ` <1356574740-6806-9-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18                                   ` [PATCH v3 09/11] x86/xen/enlighten: Add init and crash kexec/kdump hooks Daniel Kiper
     [not found]                                     ` <1356574740-6806-10-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:18                                       ` [PATCH v3 10/11] drivers/xen: Export vmcoreinfo through sysfs Daniel Kiper
     [not found]                                         ` <1356574740-6806-11-git-send-email-daniel.kiper-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-12-27  2:19                                           ` [PATCH v3 11/11] x86: Add Xen kexec control code size check to linker script Daniel Kiper
2012-12-27 18:53                                     ` [PATCH v3 09/11] x86/xen/enlighten: Add init and crash kexec/kdump hooks H. Peter Anvin
2012-12-27  4:00                         ` [PATCH v3 06/11] x86/xen: Add i386 kexec/kdump implementation H. Peter Anvin
2012-12-27  3:33         ` [PATCH v3 02/11] x86/kexec: Add extra pointers to transition page table PGD, PUD, PMD and PTE H. Peter Anvin
2013-01-03  9:34         ` Jan Beulich
     [not found]         ` <50E55ECF02000078000B2A13@nat28.tlf.novell.com>
2013-01-04 15:15           ` Daniel Kiper
     [not found]           ` <20130104151542.GD3346@host-192-168-1-59.local.net-space.pl>
2013-01-04 16:12             ` Jan Beulich
     [not found]             ` <50E70D8002000078000B336D@nat28.tlf.novell.com>
2013-01-04 17:25               ` Daniel Kiper
     [not found]               ` <20130104172538.GC3472@host-192-168-1-59.local.net-space.pl>
2013-01-07  9:48                 ` Jan Beulich
     [not found]                 ` <50EAA7F402000078000B34C3@nat28.tlf.novell.com>
2013-01-07 12:52                   ` Daniel Kiper
     [not found]                   ` <20130107125211.GB2927@host-192-168-1-59.local.net-space.pl>
2013-01-07 13:05                     ` Jan Beulich
     [not found]                     ` <50EAD61602000078000B36A0@nat28.tlf.novell.com>
2013-01-09 18:42                       ` Daniel Kiper
2013-01-10 14:07             ` David Vrabel
     [not found]             ` <50EECB23.4090603@citrix.com>
2013-01-11 13:36               ` Daniel Kiper
2012-12-27  4:46     ` [PATCH v3 01/11] kexec: introduce kexec firmware support Eric W. Biederman
2012-12-27  4:02 ` [PATCH v3 00/11] xen: Initial kexec/kdump implementation H. Peter Anvin
2012-12-27  7:53   ` Eric W. Biederman
2012-12-27 14:18     ` Andrew Cooper
2012-12-27 18:02       ` Eric W. Biederman
2013-01-02 11:26         ` [Xen-devel] " Andrew Cooper
2013-01-02 11:47           ` Eric W. Biederman
2013-01-03  9:31           ` Jan Beulich
2013-01-04 14:22           ` Daniel Kiper
2013-01-04 14:34             ` Konrad Rzeszutek Wilk
2013-01-04 14:34             ` Ian Campbell
2013-01-04 14:38             ` David Vrabel
2013-01-04 17:01               ` Daniel Kiper
2013-01-10 14:19                 ` David Vrabel
2013-01-11 13:22                   ` Daniel Kiper
2013-01-11 15:22                     ` David Vrabel
2013-01-11 17:34                       ` Daniel Kiper
2013-01-11 20:05                       ` Eric W. Biederman
2013-01-04 14:41             ` Jan Beulich
     [not found]               ` <50E6F81D02000078000B3245-ce6RLXgGx+vWGUEhTRrCg1aTQe2KTcn/@public.gmane.org>
2013-01-04 17:07                 ` Daniel Kiper
2013-01-04 19:11                   ` Konrad Rzeszutek Wilk
     [not found]                     ` <20130104191146.GC6721-6K5HmflnPlqSPmnEAIUT9EEOCMrvLtNR@public.gmane.org>
2013-01-07 10:25                       ` Ian Campbell
2013-01-07 10:46                         ` Andrew Cooper
2013-01-07 10:54                           ` Ian Campbell
2013-01-07 12:34                     ` Daniel Kiper
     [not found]                       ` <20130107123404.GA2927-UojuW/CpjwpdUOLzJiIvSsFoITBeLw/klGfBN0aaEZ+lPcVs/6D9LQ@public.gmane.org>
2013-01-07 13:49                         ` Ian Campbell
2013-01-11 13:47                           ` Daniel Kiper
2013-01-07 16:20                       ` Konrad Rzeszutek Wilk
2013-01-11  4:16                         ` Eric W. Biederman
2013-01-11 16:55                           ` Konrad Rzeszutek Wilk
2013-01-11 20:26                             ` H. Peter Anvin
2013-01-11 20:43                               ` Vivek Goyal
2013-01-11 20:26                             ` Eric W. Biederman
2013-01-11 20:52                               ` Vivek Goyal
     [not found]                                 ` <20130111205232.GC17126-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-01-11 21:03                                   ` H. Peter Anvin
     [not found]                                     ` <50F07E2D.6010602-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>
2013-01-11 21:08                                       ` Vivek Goyal
2013-01-11 21:14                                         ` H. Peter Anvin
     [not found]       ` <50DC58C4.3000307-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org>
2013-01-02 15:27         ` Ian Campbell
  -- strict thread matches above, loose matches on Subject: below --
2012-12-28  0:18 [PATCH v3 01/11] kexec: introduce kexec firmware support Daniel Kiper
2012-12-28  3:06 ` Eric W. Biederman
2013-01-04 14:04   ` Daniel Kiper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1356574740-6806-2-git-send-email-daniel.kiper@oracle.com \
    --to=daniel.kiper-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=andrew.cooper3-Sxgqhf6Nn4DQT0dZR+AlfA@public.gmane.org \
    --cc=ebiederm-aS9lmoZGLiVWk0Htik3J/w@public.gmane.org \
    --cc=hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org \
    --cc=jbeulich-IBi9RG/b67k@public.gmane.org \
    --cc=kexec-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org \
    --cc=konrad.wilk-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=maxim.uvarov-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org \
    --cc=mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org \
    --cc=vgoyal-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=virtualization-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=xen-devel-GuqFBffKawuULHF6PoxzQEEOCMrvLtNR@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).