All of lore.kernel.org
 help / color / mirror / Atom feed
From: Roberto Ricci <io@r-ricci.it>
To: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>,
	ebiederm@xmission.com, rafael@kernel.org, pavel@ucw.cz,
	ytcoode@gmail.com, kexec@lists.infradead.org,
	linux-pm@vger.kernel.org, akpm@linux-foundation.org,
	regressions@lists.linux.dev, linux-kernel@vger.kernel.org,
	msizanoen@qtmlabs.xyz, rafael.j.wysocki@intel.com,
	yu.c.chen@intel.com
Subject: Re: [REGRESSION] Kernel booted via kexec fails to resume from hibernation
Date: Sat, 5 Apr 2025 01:31:23 +0200	[thread overview]
Message-ID: <Z_Bry0JcB1jqPztR@desktop0a> (raw)
In-Reply-To: <Z-hYWc9LtBU1Yhtg@desktop0a>

On 2025-03-29 21:30 +0100, Roberto Ricci wrote:
> On 2025-03-29 09:44 +0800, Baoquan He wrote:
> > [snip]
> > 3) If answer to 1) and 2) is yes, does kexec_load works for you? Asking
> > this because kexec_load interface defaults to put kexec kernel on top of
> > system RAM which is equivalent to applying commit b3ba234171cd.
> 
> No, it doesn't. While hibernation alone works, kexec + hibernation
> results in the system just rebooting without resuming the hibernation
> image, but no crash or other weird behaviour occurs.
> Initially I decided to focus on kexec_file_load in order to narrow
> things down, but that was before noticing that the bug could manifest
> itself in different forms.
> It is possible, indeed, that both syscalls are affected by the same
> problem, which is not caused by commit b3ba234171cd.
> I tried to test kexec_load with some older kernels, but I got build
> errors, so I tested longterm releases where such errors have been fixed.
> With v4.9.337, kexec (via kexec_load) + hibernation works.
> With v5.4.291 it doesn't.
> I'm not sure how bisection could be done in this case.
> [snip]

I've bisected this other bug with kexec_load. I found commit
62a03defeabd PM / hibernate: Verify the consistent of e820 memory map by md5 digest
Reverting it on v6.14 fixes kexec_load, but not kexec_file_load.
Also applying the patch suggested by msizanoen fixes kexec_file_load, too:
https://lore.kernel.org/all/Z_BDbwmFV6wxDPV1@desktop0a/

FYI, this is how I reverted that commit (I had to manually resolve
conflicts):

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 5b81d19cd114..f2021a515bad 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -40,20 +40,6 @@ unsigned long restore_cr3 __visible;
 unsigned long temp_pgt __visible;
 unsigned long relocated_restore_code __visible;
 
-/**
- *	pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn;
-	unsigned long nosave_end_pfn;
-
-	nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
-	nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
-
-	return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn;
-}
-
 struct restore_data_record {
 	unsigned long jump_address;
 	unsigned long jump_address_phys;
@@ -83,69 +69,6 @@ static inline u32 compute_e820_crc32(struct e820_table *table)
 #define RESTORE_MAGIC	0x12345679UL
 #endif
 
-/**
- *	arch_hibernation_header_save - populate the architecture specific part
- *		of a hibernation image header
- *	@addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
-	struct restore_data_record *rdr = addr;
-
-	if (max_size < sizeof(struct restore_data_record))
-		return -EOVERFLOW;
-	rdr->magic = RESTORE_MAGIC;
-	rdr->jump_address = (unsigned long)restore_registers;
-	rdr->jump_address_phys = __pa_symbol(restore_registers);
-
-	/*
-	 * The restore code fixes up CR3 and CR4 in the following sequence:
-	 *
-	 * [in hibernation asm]
-	 * 1. CR3 <= temporary page tables
-	 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
-	 * 3. CR3 <= rdr->cr3
-	 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
-	 * [in restore_processor_state()]
-	 * 5. CR4 <= saved CR4
-	 * 6. CR3 <= saved CR3
-	 *
-	 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
-	 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
-	 * rdr->cr3 needs to point to valid page tables but must not
-	 * have any of the PCID bits set.
-	 */
-	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
-
-	rdr->e820_checksum = compute_e820_crc32(e820_table_firmware);
-	return 0;
-}
-
-/**
- *	arch_hibernation_header_restore - read the architecture specific data
- *		from the hibernation image header
- *	@addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
-	struct restore_data_record *rdr = addr;
-
-	if (rdr->magic != RESTORE_MAGIC) {
-		pr_crit("Unrecognized hibernate image header format!\n");
-		return -EINVAL;
-	}
-
-	restore_jump_address = rdr->jump_address;
-	jump_address_phys = rdr->jump_address_phys;
-	restore_cr3 = rdr->cr3;
-
-	if (rdr->e820_checksum != compute_e820_crc32(e820_table_firmware)) {
-		pr_crit("Hibernate inconsistent memory map detected!\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
 
 int relocate_restore_code(void)
 {
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index a595953f1d6d..924420fdaab4 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -140,3 +140,56 @@ asmlinkage int swsusp_arch_resume(void)
 	restore_image();
 	return 0;
 }
+
+/*
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+	unsigned long jump_address;
+	unsigned long jump_address_phys;
+	unsigned long cr3;
+	unsigned long magic;
+};
+
+#define RESTORE_MAGIC	0x123456789ABCDEF0UL
+
+/**
+ *	arch_hibernation_header_save - populate the architecture specific part
+ *		of a hibernation image header
+ *	@addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (max_size < sizeof(struct restore_data_record))
+		return -EOVERFLOW;
+	rdr->jump_address = (unsigned long)&restore_registers;
+	rdr->jump_address_phys = __pa_symbol(&restore_registers);
+	rdr->cr3 = restore_cr3;
+	rdr->magic = RESTORE_MAGIC;
+	return 0;
+}
+
+/**
+ *	arch_hibernation_header_restore - read the architecture specific data
+ *		from the hibernation image header
+ *	@addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+	struct restore_data_record *rdr = addr;
+
+	restore_jump_address = rdr->jump_address;
+	jump_address_phys = rdr->jump_address_phys;
+	restore_cr3 = rdr->cr3;
+	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}


  parent reply	other threads:[~2025-04-04 23:31 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-13 21:28 [REGRESSION] Kernel booted via kexec fails to resume from hibernation Roberto Ricci
2025-01-13 21:31 ` Roberto Ricci
2025-01-14  3:42   ` Baoquan He
2025-04-01 12:59   ` msizanoen
2025-04-03 22:00     ` Roberto Ricci
2025-04-04  2:54       ` msizanoen
2025-04-04  4:56         ` msizanoen
2025-04-04  5:50           ` msizanoen
2025-04-04 20:39             ` Roberto Ricci
2025-04-05  5:15             ` msizanoen
2025-04-04 20:00         ` Roberto Ricci
2025-01-13 21:32 ` Roberto Ricci
2025-01-13 23:17 ` Andrew Morton
2025-01-14 13:19   ` Roberto Ricci
2025-01-14 13:16 ` Roberto Ricci
2025-01-15  4:04   ` Baoquan He
2025-01-15 12:00     ` Roberto Ricci
2025-01-16 11:52       ` Roberto Ricci
2025-01-17  1:55         ` Baoquan He
2025-01-17  3:41           ` Baoquan He
2025-01-17  7:52             ` Roberto Ricci
2025-01-16  9:54     ` Yuntao Wang
2025-01-22  9:45 ` RuiRui Yang
2025-01-22 13:01   ` Roberto Ricci
2025-01-27  2:39 ` Dave Young
2025-01-27  2:42   ` Dave Young
2025-03-09 17:09     ` Donald
2025-03-29  0:14     ` Roberto Ricci
2025-03-29  0:14       ` Roberto Ricci
2025-03-29  0:15       ` Roberto Ricci
2025-03-29  1:44       ` Baoquan He
2025-03-29 20:30         ` Roberto Ricci
2025-03-29 20:33           ` Roberto Ricci
2025-03-31  3:22           ` Dave Young
2025-04-03 21:59             ` Roberto Ricci
2025-04-04 23:31           ` Roberto Ricci [this message]
2025-04-04 23:37             ` Roberto Ricci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z_Bry0JcB1jqPztR@desktop0a \
    --to=io@r-ricci.it \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=dyoung@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=msizanoen@qtmlabs.xyz \
    --cc=pavel@ucw.cz \
    --cc=rafael.j.wysocki@intel.com \
    --cc=rafael@kernel.org \
    --cc=regressions@lists.linux.dev \
    --cc=ytcoode@gmail.com \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.