linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Roberto Ricci <io@r-ricci.it>
To: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>,
	ebiederm@xmission.com, rafael@kernel.org, pavel@ucw.cz,
	ytcoode@gmail.com, kexec@lists.infradead.org,
	linux-pm@vger.kernel.org, akpm@linux-foundation.org,
	regressions@lists.linux.dev, linux-kernel@vger.kernel.org,
	msizanoen@qtmlabs.xyz, rafael.j.wysocki@intel.com,
	yu.c.chen@intel.com
Subject: Re: [REGRESSION] Kernel booted via kexec fails to resume from hibernation
Date: Sat, 5 Apr 2025 01:31:23 +0200	[thread overview]
Message-ID: <Z_Bry0JcB1jqPztR@desktop0a> (raw)
In-Reply-To: <Z-hYWc9LtBU1Yhtg@desktop0a>

On 2025-03-29 21:30 +0100, Roberto Ricci wrote:
> On 2025-03-29 09:44 +0800, Baoquan He wrote:
> > [snip]
> > 3) If answer to 1) and 2) is yes, does kexec_load works for you? Asking
> > this because kexec_load interface defaults to put kexec kernel on top of
> > system RAM which is equivalent to applying commit b3ba234171cd.
> 
> No, it doesn't. While hibernation alone works, kexec + hibernation
> results in the system just rebooting without resuming the hibernation
> image, but no crash or other weird behaviour occurs.
> Initially I decided to focus on kexec_file_load in order to narrow
> things down, but that was before noticing that the bug could manifest
> itself in different forms.
> It is possible, indeed, that both syscalls are affected by the same
> problem, which is not caused by commit b3ba234171cd.
> I tried to test kexec_load with some older kernels, but I got build
> errors, so I tested longterm releases where such errors have been fixed.
> With v4.9.337, kexec (via kexec_load) + hibernation works.
> With v5.4.291 it doesn't.
> I'm not sure how bisection could be done in this case.
> [snip]

I've bisected this other bug with kexec_load. I found commit
62a03defeabd PM / hibernate: Verify the consistent of e820 memory map by md5 digest
Reverting it on v6.14 fixes kexec_load, but not kexec_file_load.
Also applying the patch suggested by msizanoen fixes kexec_file_load, too:
https://lore.kernel.org/all/Z_BDbwmFV6wxDPV1@desktop0a/

FYI, this is how I reverted that commit (I had to manually resolve
conflicts):

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 5b81d19cd114..f2021a515bad 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -40,20 +40,6 @@ unsigned long restore_cr3 __visible;
 unsigned long temp_pgt __visible;
 unsigned long relocated_restore_code __visible;
 
-/**
- *	pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn;
-	unsigned long nosave_end_pfn;
-
-	nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
-	nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
-
-	return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn;
-}
-
 struct restore_data_record {
 	unsigned long jump_address;
 	unsigned long jump_address_phys;
@@ -83,69 +69,6 @@ static inline u32 compute_e820_crc32(struct e820_table *table)
 #define RESTORE_MAGIC	0x12345679UL
 #endif
 
-/**
- *	arch_hibernation_header_save - populate the architecture specific part
- *		of a hibernation image header
- *	@addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
-	struct restore_data_record *rdr = addr;
-
-	if (max_size < sizeof(struct restore_data_record))
-		return -EOVERFLOW;
-	rdr->magic = RESTORE_MAGIC;
-	rdr->jump_address = (unsigned long)restore_registers;
-	rdr->jump_address_phys = __pa_symbol(restore_registers);
-
-	/*
-	 * The restore code fixes up CR3 and CR4 in the following sequence:
-	 *
-	 * [in hibernation asm]
-	 * 1. CR3 <= temporary page tables
-	 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
-	 * 3. CR3 <= rdr->cr3
-	 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
-	 * [in restore_processor_state()]
-	 * 5. CR4 <= saved CR4
-	 * 6. CR3 <= saved CR3
-	 *
-	 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
-	 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
-	 * rdr->cr3 needs to point to valid page tables but must not
-	 * have any of the PCID bits set.
-	 */
-	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
-
-	rdr->e820_checksum = compute_e820_crc32(e820_table_firmware);
-	return 0;
-}
-
-/**
- *	arch_hibernation_header_restore - read the architecture specific data
- *		from the hibernation image header
- *	@addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
-	struct restore_data_record *rdr = addr;
-
-	if (rdr->magic != RESTORE_MAGIC) {
-		pr_crit("Unrecognized hibernate image header format!\n");
-		return -EINVAL;
-	}
-
-	restore_jump_address = rdr->jump_address;
-	jump_address_phys = rdr->jump_address_phys;
-	restore_cr3 = rdr->cr3;
-
-	if (rdr->e820_checksum != compute_e820_crc32(e820_table_firmware)) {
-		pr_crit("Hibernate inconsistent memory map detected!\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
 
 int relocate_restore_code(void)
 {
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index a595953f1d6d..924420fdaab4 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -140,3 +140,56 @@ asmlinkage int swsusp_arch_resume(void)
 	restore_image();
 	return 0;
 }
+
+/*
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+	unsigned long jump_address;
+	unsigned long jump_address_phys;
+	unsigned long cr3;
+	unsigned long magic;
+};
+
+#define RESTORE_MAGIC	0x123456789ABCDEF0UL
+
+/**
+ *	arch_hibernation_header_save - populate the architecture specific part
+ *		of a hibernation image header
+ *	@addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (max_size < sizeof(struct restore_data_record))
+		return -EOVERFLOW;
+	rdr->jump_address = (unsigned long)&restore_registers;
+	rdr->jump_address_phys = __pa_symbol(&restore_registers);
+	rdr->cr3 = restore_cr3;
+	rdr->magic = RESTORE_MAGIC;
+	return 0;
+}
+
+/**
+ *	arch_hibernation_header_restore - read the architecture specific data
+ *		from the hibernation image header
+ *	@addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+	struct restore_data_record *rdr = addr;
+
+	restore_jump_address = rdr->jump_address;
+	jump_address_phys = rdr->jump_address_phys;
+	restore_cr3 = rdr->cr3;
+	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}

  parent reply	other threads:[~2025-04-04 23:31 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-13 21:28 [REGRESSION] Kernel booted via kexec fails to resume from hibernation Roberto Ricci
2025-01-13 21:31 ` Roberto Ricci
2025-01-14  3:42   ` Baoquan He
2025-04-01 12:59   ` msizanoen
2025-04-03 22:00     ` Roberto Ricci
2025-04-04  2:54       ` msizanoen
2025-04-04  4:56         ` msizanoen
2025-04-04  5:50           ` msizanoen
2025-04-04 20:39             ` Roberto Ricci
2025-04-05  5:15             ` msizanoen
2025-04-04 20:00         ` Roberto Ricci
2025-01-13 21:32 ` Roberto Ricci
2025-01-13 23:17 ` Andrew Morton
2025-01-14 13:19   ` Roberto Ricci
2025-01-14 13:16 ` Roberto Ricci
2025-01-15  4:04   ` Baoquan He
2025-01-15 12:00     ` Roberto Ricci
2025-01-16 11:52       ` Roberto Ricci
2025-01-17  1:55         ` Baoquan He
2025-01-17  3:41           ` Baoquan He
2025-01-17  7:52             ` Roberto Ricci
2025-01-16  9:54     ` Yuntao Wang
2025-01-22  9:45 ` RuiRui Yang
2025-01-22 13:01   ` Roberto Ricci
2025-01-27  2:39 ` Dave Young
2025-01-27  2:42   ` Dave Young
2025-03-09 17:09     ` Donald
2025-03-29  0:14     ` Roberto Ricci
2025-03-29  0:14       ` Roberto Ricci
2025-03-29  0:15       ` Roberto Ricci
2025-03-29  1:44       ` Baoquan He
2025-03-29 20:30         ` Roberto Ricci
2025-03-29 20:33           ` Roberto Ricci
2025-03-31  3:22           ` Dave Young
2025-04-03 21:59             ` Roberto Ricci
2025-04-04 23:31           ` Roberto Ricci [this message]
2025-04-04 23:37             ` Roberto Ricci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z_Bry0JcB1jqPztR@desktop0a \
    --to=io@r-ricci.it \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=dyoung@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=msizanoen@qtmlabs.xyz \
    --cc=pavel@ucw.cz \
    --cc=rafael.j.wysocki@intel.com \
    --cc=rafael@kernel.org \
    --cc=regressions@lists.linux.dev \
    --cc=ytcoode@gmail.com \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).