[KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch

All of lore.kernel.org
 help / color / mirror / Atom feed

* [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
@ 2003-02-13 10:40 Suparna Bhattacharya
  2003-02-13 15:09 ` Eric W. Biederman
  0 siblings, 1 reply; 10+ messages in thread
From: Suparna Bhattacharya @ 2003-02-13 10:40 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: fastboot, linux-kernel, mbligh

[-- Attachment #1: Type: text/plain, Size: 1025 bytes --]

Martin Bligh came up with a simple way to fix the kernel
to enable kexec boot from any CPU. 

Rather than picking up boot cpu information from the MP 
tables (which belong to the previous boot in the case of 
kexec), it just sets it to the cpu its starting on.
(See the changes in arch/i386/kernel/smpboot.c)

This simplifies the the kexec-hwfixes patch, since we
no longer need to move to the boot cpu before stopping
other processors. Which removes a lot of the unconditional
patching of reboot.c and makes it less invasive, thanks to 
Martin. Also, at panic time, cpu migration is something 
that is best avoided.

It would be good if someone could test this out (on SMP)
and confirm it works fine (I tried it on a 4way).

Eric, Do these changes look OK to you ? Did you have
something similar in mind, when you were talking about
enabling the kexec'd kernel to not care about which cpu
it was running on ?

Regards
Suparna


-- 
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Labs, India


[-- Attachment #2: kexec-hwfixes.patch --]
[-- Type: text/plain, Size: 6752 bytes --]

diff -ur -X ../../dontdiff linux-2.5.59-kexec/arch/i386/kernel/apic.c linux-2.5.59-kexecfixes/arch/i386/kernel/apic.c
--- linux-2.5.59-kexec/arch/i386/kernel/apic.c	Fri Jan 17 07:53:00 2003
+++ linux-2.5.59-kexecfixes/arch/i386/kernel/apic.c	Thu Feb 13 10:14:44 2003
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
+#include <linux/reboot.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -155,6 +156,36 @@
 		outb(0x70, 0x22);
 		outb(0x00, 0x23);
 	}
+	else {
+		/* Go back to Virtual Wire compatibility mode */
+		unsigned long value;
+
+		/* For the spurious interrupt use vector F, and enable it */
+		value = apic_read(APIC_SPIV);
+		value &= ~APIC_VECTOR_MASK; 
+		value |= APIC_SPIV_APIC_ENABLED;
+		value |= 0xf;
+		apic_write_around(APIC_SPIV, value);
+
+		/* For LVT0 make it edge triggered, active high, external and enabled */
+		value = apic_read(APIC_LVT0);
+		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
+		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT);
+		apic_write_around(APIC_LVT0, value);
+		
+		/* For LVT1 make it edge triggered, active high, nmi and enabled */
+		value = apic_read(APIC_LVT1);
+		value &= ~(
+			APIC_MODE_MASK | APIC_SEND_PENDING | 
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+		apic_write_around(APIC_LVT1, value);
+	}
 }
 
 void disable_local_APIC(void)
@@ -1116,6 +1147,26 @@
 	irq_exit();
 }
 
+void stop_apics(void)
+{
+	/* By resetting the APIC's we disable the nmi watchdog */
+#if CONFIG_SMP
+	/*
+	 * Stop all CPUs and turn off local APICs and the IO-APIC, so
+	 * other OSs see a clean IRQ state.
+	 */
+	smp_send_stop();
+#else
+	disable_local_APIC();
+#endif
+#if defined(CONFIG_X86_IO_APIC)
+	if (smp_found_config) {
+		disable_IO_APIC();
+	}
+#endif
+	disconnect_bsp_APIC();
+}
+
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
diff -ur -X ../../dontdiff linux-2.5.59-kexec/arch/i386/kernel/i8259.c linux-2.5.59-kexecfixes/arch/i386/kernel/i8259.c
--- linux-2.5.59-kexec/arch/i386/kernel/i8259.c	Fri Jan 17 07:52:43 2003
+++ linux-2.5.59-kexecfixes/arch/i386/kernel/i8259.c	Thu Feb 13 10:14:44 2003
@@ -246,10 +246,21 @@
 	return 0;
 }
 
+static void i8259A_shutdown(struct device *dev)
+{   
+	/* Put the i8259A into a quiescent state that
+	 * the kernel initialization code can get it
+	 * out of.
+	 */
+	outb(0xff, 0x21);	/* mask all of 8259A-1 */
+	outb(0xff, 0xA1);	/* mask all of 8259A-1 */
+}
+
 static struct device_driver i8259A_driver = {
 	.name		= "pic",
 	.bus		= &system_bus_type,
 	.resume		= i8259A_resume,
+	.shutdown	= i8259A_shutdown,
 };
 
 static struct sys_device device_i8259A = {
diff -ur -X ../../dontdiff linux-2.5.59-kexec/arch/i386/kernel/io_apic.c linux-2.5.59-kexecfixes/arch/i386/kernel/io_apic.c
--- linux-2.5.59-kexec/arch/i386/kernel/io_apic.c	Fri Jan 17 07:52:00 2003
+++ linux-2.5.59-kexecfixes/arch/i386/kernel/io_apic.c	Thu Feb 13 10:14:44 2003
@@ -1121,8 +1121,6 @@
 	 * Clear the IO-APIC before rebooting:
 	 */
 	clear_IO_APIC();
-
-	disconnect_bsp_APIC();
 }
 
 /*
diff -ur -X ../../dontdiff linux-2.5.59-kexec/arch/i386/kernel/machine_kexec.c linux-2.5.59-kexecfixes/arch/i386/kernel/machine_kexec.c
--- linux-2.5.59-kexec/arch/i386/kernel/machine_kexec.c	Thu Feb 13 10:38:46 2003
+++ linux-2.5.59-kexecfixes/arch/i386/kernel/machine_kexec.c	Thu Feb 13 10:14:44 2003
@@ -82,6 +82,8 @@
 	/* switch to an mm where the reboot_code_buffer is identity mapped */
 	switch_mm(current->active_mm, &init_mm, current, smp_processor_id());
 
+	stop_apics();
+
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 	reboot_code_buffer = page_to_pfn(image->reboot_code_pages) << PAGE_SHIFT;
diff -ur -X ../../dontdiff linux-2.5.59-kexec/arch/i386/kernel/reboot.c linux-2.5.59-kexecfixes/arch/i386/kernel/reboot.c
--- linux-2.5.59-kexec/arch/i386/kernel/reboot.c	Fri Jan 17 07:51:49 2003
+++ linux-2.5.59-kexecfixes/arch/i386/kernel/reboot.c	Thu Feb 13 10:19:57 2003
@@ -8,6 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/mc146818rtc.h>
 #include <asm/uaccess.h>
+#include <asm/apic.h>
 
 /*
  * Power off function, if any
@@ -252,13 +253,12 @@
 		for (;;)
 		__asm__ __volatile__ ("hlt");
 	}
+#endif
 	/*
 	 * Stop all CPUs and turn off local APICs and the IO-APIC, so
 	 * other OSs see a clean IRQ state.
 	 */
-	smp_send_stop();
-	disable_IO_APIC();
-#endif
+	stop_apics();
 
 	if(!reboot_thru_bios) {
 		/* rebooting needs to touch the page at absolute addr 0 */
@@ -282,10 +282,12 @@
 
 void machine_halt(void)
 {
+	stop_apics();
 }
 
 void machine_power_off(void)
 {
+	stop_apics();
 	if (pm_power_off)
 		pm_power_off();
 }
diff -ur -X ../../dontdiff linux-2.5.59-kexec/arch/i386/kernel/smpboot.c linux-2.5.59-kexecfixes/arch/i386/kernel/smpboot.c
--- linux-2.5.59-kexec/arch/i386/kernel/smpboot.c	Fri Jan 17 07:52:09 2003
+++ linux-2.5.59-kexecfixes/arch/i386/kernel/smpboot.c	Thu Feb 13 10:14:44 2003
@@ -967,6 +967,7 @@
 	printk("CPU%d: ", 0);
 	print_cpu_info(&cpu_data[0]);
 
+	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 	boot_cpu_logical_apicid = logical_smp_processor_id();
 
 	current_thread_info()->cpu = 0;
@@ -1026,8 +1027,6 @@
 	setup_local_APIC();
 	map_cpu_to_logical_apicid();
 
-	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
-		BUG();
 
 	setup_portio_remap();
 
diff -ur -X ../../dontdiff linux-2.5.59-kexec/include/asm-i386/apic.h linux-2.5.59-kexecfixes/include/asm-i386/apic.h
--- linux-2.5.59-kexec/include/asm-i386/apic.h	Fri Jan 17 07:52:56 2003
+++ linux-2.5.59-kexecfixes/include/asm-i386/apic.h	Thu Feb 13 10:14:44 2003
@@ -96,6 +96,9 @@
 #define NMI_LOCAL_APIC	2
 #define NMI_INVALID	3
 
+extern void stop_apics(void);
+#else
+static inline void stop_apics(void) { }
 #endif /* CONFIG_X86_LOCAL_APIC */
 
 #endif /* __ASM_APIC_H */
diff -ur -X ../../dontdiff linux-2.5.59-kexec/include/asm-i386/apicdef.h linux-2.5.59-kexecfixes/include/asm-i386/apicdef.h
--- linux-2.5.59-kexec/include/asm-i386/apicdef.h	Fri Jan 17 07:52:15 2003
+++ linux-2.5.59-kexecfixes/include/asm-i386/apicdef.h	Thu Feb 13 10:14:44 2003
@@ -93,6 +93,7 @@
 #define			APIC_LVT_REMOTE_IRR		(1<<14)
 #define			APIC_INPUT_POLARITY		(1<<13)
 #define			APIC_SEND_PENDING		(1<<12)
+#define			APIC_MODE_MASK			0x700
 #define			GET_APIC_DELIVERY_MODE(x)	(((x)>>8)&0x7)
 #define			SET_APIC_DELIVERY_MODE(x,y)	(((x)&~0x700)|((y)<<8))
 #define				APIC_MODE_FIXED		0x0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-13 10:40 [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch Suparna Bhattacharya
@ 2003-02-13 15:09 ` Eric W. Biederman
  2003-02-14  3:29   ` Suparna Bhattacharya
  0 siblings, 1 reply; 10+ messages in thread
From: Eric W. Biederman @ 2003-02-13 15:09 UTC (permalink / raw)
  To: suparna; +Cc: fastboot, linux-kernel, mbligh

Suparna Bhattacharya <suparna@in.ibm.com> writes:

> Martin Bligh came up with a simple way to fix the kernel
> to enable kexec boot from any CPU. 
> 
> Rather than picking up boot cpu information from the MP 
> tables (which belong to the previous boot in the case of 
> kexec), it just sets it to the cpu its starting on.
> (See the changes in arch/i386/kernel/smpboot.c)
> 
> This simplifies the the kexec-hwfixes patch, since we
> no longer need to move to the boot cpu before stopping
> other processors. Which removes a lot of the unconditional
> patching of reboot.c and makes it less invasive, thanks to 
> Martin. Also, at panic time, cpu migration is something 
> that is best avoided.

I will agree with that, at least conditionally.  

I figure stop_apics can be removed from the panic path.

However stopping all of the cpus does seem to be something
that is needed on the panic path.  And if we stop cpus
what is wrong with cpu migration.  Or can we move the halt
of the cpus into the panic kernel?  That would be my real 
preference.

> It would be good if someone could test this out (on SMP)
> and confirm it works fine (I tried it on a 4way).
> 
> Eric, Do these changes look OK to you ? Did you have
> something similar in mind, when you were talking about
> enabling the kexec'd kernel to not care about which cpu
> it was running on ?

50%.  The normal case needs to shutdown the way it is currently doing.
So we need to audit the code a little more.

Basically the way I see it, in the normal case the kernel is responsible
for a clean shutdown of the kernel and all it's devices.   No one else
knows better how to accomplish those tasks then the drivers running the kernel.

On the other hand during a panic the recovery kernel is responsible for
everything it possibly can handle.  Because we know something is broken
in the kernel calling kexec.

Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-13 15:09 ` Eric W. Biederman
@ 2003-02-14  3:29   ` Suparna Bhattacharya
  2003-02-14  4:08     ` Martin J. Bligh
                       ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Suparna Bhattacharya @ 2003-02-14  3:29 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: fastboot, linux-kernel, mbligh

On Thu, Feb 13, 2003 at 08:09:16AM -0700, Eric W. Biederman wrote:
> Suparna Bhattacharya <suparna@in.ibm.com> writes:
> 
> > This simplifies the the kexec-hwfixes patch, since we
> > no longer need to move to the boot cpu before stopping
> > other processors. Which removes a lot of the unconditional
> > patching of reboot.c and makes it less invasive, thanks to 
> > Martin. Also, at panic time, cpu migration is something 
> > that is best avoided.
> 
> I will agree with that, at least conditionally.  
> 
> I figure stop_apics can be removed from the panic path.
> 
> However stopping all of the cpus does seem to be something
> that is needed on the panic path.  And if we stop cpus
> what is wrong with cpu migration.  Or can we move the halt
> of the cpus into the panic kernel?  That would be my real 
> preference.

We still are stopping all cpus on a panic.
The difference is that we don't need to move to the boot cpu
and do this from there, since the new kernel can deal with
starting from any CPU.

The problem with cpu migration using set_cpus_allowed ?
My main concern is that it appears to wait for the migration 
thread to wakeup and do the needful .. goes through the scheduler 
etc.  

void set_cpus_allowed(task_t *p, unsigned long new_mask)
{
	..
	..
        wake_up_process(rq->migration_thread);

        wait_for_completion(&req.done);
}

We normally try to avoid this in the panic path -- we
don't know what state the system is in so try to depend on
as little functioning of the system as possible.

The approach that the current machine_restart uses for 
migration to the reboot cpu (i.e. issue and smp_call_function 
ipi, and let each cpu decided for itself what it needs to 
do) seems a little safer.

One of the things to try out in the crash dump
code is using an NMI to bring other cpus to a halt
in case one or more cpus are stuck/deadlocked in 
tight loop in an interrupt handler smp_call_function 
waits forever, and also to make sure we stop activity in
the system as early as possible. (This of course is for 
archs that have NMI support). Have to make sure we can
still kexec sucessfully after that.

> 
> > It would be good if someone could test this out (on SMP)
> > and confirm it works fine (I tried it on a 4way).
> > 
> > Eric, Do these changes look OK to you ? Did you have
> > something similar in mind, when you were talking about
> > enabling the kexec'd kernel to not care about which cpu
> > it was running on ?
> 
> 50%.  The normal case needs to shutdown the way it is currently doing.
> So we need to audit the code a little more.

It still is doing exactly what the regular kernel was doing 
before. If you look closely at this patch, notice that it simply 
backs out most of the changes to reboot.c (so machine_restart
is very close to what it was before).

> 
> Basically the way I see it, in the normal case the kernel is responsible
> for a clean shutdown of the kernel and all it's devices.   No one else
> knows better how to accomplish those tasks then the drivers running the kernel.

Yes, and that's what is happening AFAICT. None of that
should have changed (at least I didn't intend it to change,
so let me know if I missed something).

> 
> On the other hand during a panic the recovery kernel is responsible for
> everything it possibly can handle.  Because we know something is broken
> in the kernel calling kexec.

As far as possible, yes.

Regards
Suparna

-- 
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Labs, India

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14  3:29   ` Suparna Bhattacharya
@ 2003-02-14  4:08     ` Martin J. Bligh
  2003-02-14  8:30       ` Eric W. Biederman
  2003-02-14  6:47     ` Martin J. Bligh
  2003-02-14  8:39     ` Eric W. Biederman
  2 siblings, 1 reply; 10+ messages in thread
From: Martin J. Bligh @ 2003-02-14  4:08 UTC (permalink / raw)
  To: suparna, Eric W. Biederman; +Cc: fastboot, linux-kernel

> We still are stopping all cpus on a panic.
> The difference is that we don't need to move to the boot cpu
> and do this from there, since the new kernel can deal with
> starting from any CPU.

The kernel always supported this - cpu IDs are dynamically assigned on
bootup ... and the boot cpu is always given number 0. There's nothing
magical about the boot CPU, it doesn't really matter which it is. The only
problem we had to fix last night was that the OS believes the BIOS mps
tables as to what the boot CPU is. It now just says ... "oh, I'm the boot
cpu  ... because I'm running this code".

This seems infinitely simpler and safer to me than trying to migrate
yourself around (potentially at panic time with a bad kernel). The only
thing that will be different is the *physical* apic id of the CPU, which
nothing uses after we boot anyway.

M.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14  3:29   ` Suparna Bhattacharya
  2003-02-14  4:08     ` Martin J. Bligh
@ 2003-02-14  6:47     ` Martin J. Bligh
  2003-02-14  8:32       ` Eric W. Biederman
  2003-02-14  8:39     ` Eric W. Biederman
  2 siblings, 1 reply; 10+ messages in thread
From: Martin J. Bligh @ 2003-02-14  6:47 UTC (permalink / raw)
  To: suparna, Eric W. Biederman; +Cc: fastboot, linux-kernel

Running on my 4-way P3 test box (just SMP, not NUMA) kexec_test
prints this:

Synchronizing SCSI caches: 
Shutting down devices
Starting new kernel
kexec_test 1.8 starting...
eax: 0E1FB007 ebx: 0000011C ecx: 00000000 edx: 00000000
esi: 00000000 edi: 00000000 esp: 00000000 ebp: 00000000
idt: 00000000 C0000000
gdt: 0000006F 000000A0
Switching descriptors.
Descriptors changed.
Legacy pic setup.
In real mode.

Without that I just get:

Synchronizing SCSI caches: 
Shutting down devices
Starting new kernel

Can someone interpret?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14  4:08     ` Martin J. Bligh
@ 2003-02-14  8:30       ` Eric W. Biederman
  0 siblings, 0 replies; 10+ messages in thread
From: Eric W. Biederman @ 2003-02-14  8:30 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: suparna, fastboot, linux-kernel

"Martin J. Bligh" <mbligh@aracnet.com> writes:

> > We still are stopping all cpus on a panic.
> > The difference is that we don't need to move to the boot cpu
> > and do this from there, since the new kernel can deal with
> > starting from any CPU.
> 
> The kernel always supported this - cpu IDs are dynamically assigned on
> bootup ... and the boot cpu is always given number 0. There's nothing
> magical about the boot CPU, it doesn't really matter which it is. The only
> problem we had to fix last night was that the OS believes the BIOS mps
> tables as to what the boot CPU is. It now just says ... "oh, I'm the boot
> cpu  ... because I'm running this code".
> 
> This seems infinitely simpler and safer to me than trying to migrate
> yourself around (potentially at panic time with a bad kernel). The only
> thing that will be different is the *physical* apic id of the CPU, which
> nothing uses after we boot anyway.

At panic time I agree that migration is a bad idea.  And the code looks
good for that case.  However for booting an arbitrary kernel that code needs
to be run on the original boot strap processor if at all possible.  As there
are kernels that cannot cope with booting up on the wrong cpu.

Eric


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14  6:47     ` Martin J. Bligh
@ 2003-02-14  8:32       ` Eric W. Biederman
  2003-02-14 15:32         ` Martin J. Bligh
  0 siblings, 1 reply; 10+ messages in thread
From: Eric W. Biederman @ 2003-02-14  8:32 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: suparna, fastboot, linux-kernel

"Martin J. Bligh" <mbligh@aracnet.com> writes:

> Running on my 4-way P3 test box (just SMP, not NUMA) kexec_test
> prints this:
> 
> Synchronizing SCSI caches: 
> Shutting down devices
> Starting new kernel
> kexec_test 1.8 starting...
> eax: 0E1FB007 ebx: 0000011C ecx: 00000000 edx: 00000000
> esi: 00000000 edi: 00000000 esp: 00000000 ebp: 00000000
> idt: 00000000 C0000000
> gdt: 0000006F 000000A0
> Switching descriptors.
> Descriptors changed.
> Legacy pic setup.
> In real mode.
> 
> Without that I just get:
> 
> Synchronizing SCSI caches: 
> Shutting down devices
> Starting new kernel
> 
> Can someone interpret?

Besides the fact that you cannot make BIOS calls, and kexec is working
there is not much to say.  You cannot kexec another kernel?

Eric


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14  3:29   ` Suparna Bhattacharya
  2003-02-14  4:08     ` Martin J. Bligh
  2003-02-14  6:47     ` Martin J. Bligh
@ 2003-02-14  8:39     ` Eric W. Biederman
  2 siblings, 0 replies; 10+ messages in thread
From: Eric W. Biederman @ 2003-02-14  8:39 UTC (permalink / raw)
  To: suparna; +Cc: fastboot, linux-kernel, mbligh

Suparna Bhattacharya <suparna@in.ibm.com> writes:

> On Thu, Feb 13, 2003 at 08:09:16AM -0700, Eric W. Biederman wrote:
> 
> We still are stopping all cpus on a panic.
> The difference is that we don't need to move to the boot cpu
> and do this from there, since the new kernel can deal with
> starting from any CPU.

I think I would like to maintain one patch that is for
the recover kernel, and a second patch that is for the kernel
calling kexec.  So that we don't get things to confused.


> The problem with cpu migration using set_cpus_allowed ?
[snip]
> The approach that the current machine_restart uses for 
> migration to the reboot cpu (i.e. issue and smp_call_function 
> ipi, and let each cpu decided for itself what it needs to 
> do) seems a little safer.

I agree with this analysis for the panic path.

> One of the things to try out in the crash dump
> code is using an NMI to bring other cpus to a halt
> in case one or more cpus are stuck/deadlocked in 
> tight loop in an interrupt handler smp_call_function 
> waits forever, and also to make sure we stop activity in
> the system as early as possible. (This of course is for 
> archs that have NMI support). Have to make sure we can
> still kexec sucessfully after that.

Hmm.  As my memory serves you can send a startup ipi anytime you
want so it may be reasonable to just ignore the other cpus
and wait until we get to trusted recovery code to do something
about them.  Though that implies reserved some memory below 1MB
which I'm not terribly fond of. 

I guess the place to start is the current implementation of
smp_send_stop, and we can see how the code goes from there.

> > > It would be good if someone could test this out (on SMP)
> > > and confirm it works fine (I tried it on a 4way).
> > > 
> > > Eric, Do these changes look OK to you ? Did you have
> > > something similar in mind, when you were talking about
> > > enabling the kexec'd kernel to not care about which cpu
> > > it was running on ?
> > 
> > 50%.  The normal case needs to shutdown the way it is currently doing.
> > So we need to audit the code a little more.
> 
> It still is doing exactly what the regular kernel was doing 
> before. If you look closely at this patch, notice that it simply 
> backs out most of the changes to reboot.c (so machine_restart
> is very close to what it was before).

For kexec without panic it keeps us from running on the bootstrap
processor, unless I missed something.

> > Basically the way I see it, in the normal case the kernel is responsible
> > for a clean shutdown of the kernel and all it's devices.   No one else
> > knows better how to accomplish those tasks then the drivers running the
> kernel.
> 
> 
> Yes, and that's what is happening AFAICT. None of that
> should have changed (at least I didn't intend it to change,
> so let me know if I missed something).
> 
> > 
> > On the other hand during a panic the recovery kernel is responsible for
> > everything it possibly can handle.  Because we know something is broken
> > in the kernel calling kexec.
> 
> As far as possible, yes.

Good we agree on philosophy if not all of the little technical details.


Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14  8:32       ` Eric W. Biederman
@ 2003-02-14 15:32         ` Martin J. Bligh
  2003-02-14 16:00           ` Eric W. Biederman
  0 siblings, 1 reply; 10+ messages in thread
From: Martin J. Bligh @ 2003-02-14 15:32 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: suparna, fastboot, linux-kernel

>> Running on my 4-way P3 test box (just SMP, not NUMA) kexec_test
>> prints this:
>> 
>> Synchronizing SCSI caches: 
>> Shutting down devices
>> Starting new kernel
>> kexec_test 1.8 starting...
>> eax: 0E1FB007 ebx: 0000011C ecx: 00000000 edx: 00000000
>> esi: 00000000 edi: 00000000 esp: 00000000 ebp: 00000000
>> idt: 00000000 C0000000
>> gdt: 0000006F 000000A0
>> Switching descriptors.
>> Descriptors changed.
>> Legacy pic setup.
>> In real mode.
>> 
>> Without that I just get:
>> 
>> Synchronizing SCSI caches: 
>> Shutting down devices
>> Starting new kernel
>> 
>> Can someone interpret?
> 
> Besides the fact that you cannot make BIOS calls, and kexec is working
> there is not much to say.  You cannot kexec another kernel?

Nope, if I just kexec the same 2.5.59 kernel+kexec patches that I'm booted
on it says: 

Synchronizing SCSI caches: 
Shutting down devices
Starting new kernel

Could you give me a high-level sketch of what you're doing? kexec -l loads
the new kernel, then what do you do? Drop back into real mode and jump to
the normal kernel entry point? Or decompress by hand, do some alternate
setup of the early page tables and try to jump in at the 32-bit entry point?

Is all I can assume from the above that I crash in the new kernel before
console_init()? Or should I expect something from the decompress code?

Thanks,

M.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch
  2003-02-14 15:32         ` Martin J. Bligh
@ 2003-02-14 16:00           ` Eric W. Biederman
  0 siblings, 0 replies; 10+ messages in thread
From: Eric W. Biederman @ 2003-02-14 16:00 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: suparna, fastboot, linux-kernel

"Martin J. Bligh" <mbligh@aracnet.com> writes:

> >> Running on my 4-way P3 test box (just SMP, not NUMA) kexec_test
> >> prints this:
> >> 
> >> Synchronizing SCSI caches: 
> >> Shutting down devices
> >> Starting new kernel
> >> kexec_test 1.8 starting...
> >> eax: 0E1FB007 ebx: 0000011C ecx: 00000000 edx: 00000000
> >> esi: 00000000 edi: 00000000 esp: 00000000 ebp: 00000000
> >> idt: 00000000 C0000000
> >> gdt: 0000006F 000000A0
> >> Switching descriptors.
> >> Descriptors changed.
> >> Legacy pic setup.
> >> In real mode.
> >> 
> >> Without that I just get:
> >> 
> >> Synchronizing SCSI caches: 
> >> Shutting down devices
> >> Starting new kernel
> >> 
> >> Can someone interpret?
> > 
> > Besides the fact that you cannot make BIOS calls, and kexec is working
> > there is not much to say.  You cannot kexec another kernel?
> 
> Nope, if I just kexec the same 2.5.59 kernel+kexec patches that I'm booted
> on it says: 
> 
> Synchronizing SCSI caches: 
> Shutting down devices
> Starting new kernel
> 
> Could you give me a high-level sketch of what you're doing? kexec -l loads
> the new kernel, then what do you do? Drop back into real mode and jump to
> the normal kernel entry point? Or decompress by hand, do some alternate
> setup of the early page tables and try to jump in at the 32-bit entry point?

With Interrupts disabled.
machine_kexec switch the cpu to physical address mode (it turns the mmu off).
Copies the kernel to where it needs to run.
Then it jumps to an entry point.

kexec has put in a stub piece of code, and pointed the entry point at that location.
The stub code setups of a gdt, the kernel can cope with.
The stub code setups a parameter table just like the real mode code generates.
The stub code jumps in at the 32bit entry point.

[There is another stub that will attempt to start the kernel at the 16bit entry
 point but it is not used by default].

Interrupts are off the entire time.

> Is all I can assume from the above that I crash in the new kernel before
> console_init()?

Yes.

> Or should I expect something from the decompress code?

It is not hard to patch the decompression code to display a message, but
by default it does not output to serial...

You might want to run mkelfImage on a vmlinux so you can skip the decompression
step.  It adds a stub to the elf file that gets passed to kexec so that
you can skip the decompression.

ftp://ftp.lnxi.com/pub/src/mkelfImage/mkelfImage-2.x

Also I have some assembly language macros that display text out the serial port, if you want
to instrument up the kernel you are booting.

Eric

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2003-02-14 15:50 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-02-13 10:40 [KEXEC][PATCH] Modified (smaller) x86 kexec hwfixes patch Suparna Bhattacharya
2003-02-13 15:09 ` Eric W. Biederman
2003-02-14  3:29   ` Suparna Bhattacharya
2003-02-14  4:08     ` Martin J. Bligh
2003-02-14  8:30       ` Eric W. Biederman
2003-02-14  6:47     ` Martin J. Bligh
2003-02-14  8:32       ` Eric W. Biederman
2003-02-14 15:32         ` Martin J. Bligh
2003-02-14 16:00           ` Eric W. Biederman
2003-02-14  8:39     ` Eric W. Biederman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.