linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: Voluntary leave_mm before entering ACPI C3
@ 2007-12-19 18:34 Venki Pallipadi
  2007-12-19 19:32 ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Venki Pallipadi @ 2007-12-19 18:34 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Len Brown; +Cc: linux-kernel


Aviod TLB flush IPIs during C3 states by voluntary leave_mm()
before entering C3.

The performance impact of TLB flush on C3 should not be significant with
respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware while in
C3 anyways.

On a 8 logical CPU system, running make -j2, the number of tlbflush IPIs goes
down from 40 per second to ~ 0. Total number of interrupts during the run
of this workload was ~1200 per second, which makes it ~3% savings in wakeups.

There was no measurable performance or power impact however.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>

Index: linux-2.6.24-rc/arch/x86/kernel/smp_64.c
===================================================================
--- linux-2.6.24-rc.orig/arch/x86/kernel/smp_64.c
+++ linux-2.6.24-rc/arch/x86/kernel/smp_64.c
@@ -70,7 +70,7 @@ static DEFINE_PER_CPU(union smp_flush_st
  * We cannot call mmdrop() because we are in interrupt context, 
  * instead update mm->cpu_vm_mask.
  */
-static inline void leave_mm(int cpu)
+void leave_mm(int cpu)
 {
 	if (read_pda(mmu_state) == TLBSTATE_OK)
 		BUG();
Index: linux-2.6.24-rc/include/asm-x86/acpi_32.h
===================================================================
--- linux-2.6.24-rc.orig/include/asm-x86/acpi_32.h
+++ linux-2.6.24-rc/include/asm-x86/acpi_32.h
@@ -31,6 +31,7 @@
 #include <acpi/pdc_intel.h>
 
 #include <asm/system.h>		/* defines cmpxchg */
+#include <asm/mmu.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
@@ -138,6 +139,8 @@ static inline void disable_acpi(void) { 
 
 #define ARCH_HAS_POWER_INIT	1
 
+#define acpi_unlazy_tlb(x)	leave_mm(x)
+
 #endif /*__KERNEL__*/
 
 #endif /*_ASM_ACPI_H*/
Index: linux-2.6.24-rc/include/asm-x86/acpi_64.h
===================================================================
--- linux-2.6.24-rc.orig/include/asm-x86/acpi_64.h
+++ linux-2.6.24-rc/include/asm-x86/acpi_64.h
@@ -30,6 +30,7 @@
 
 #include <acpi/pdc_intel.h>
 #include <asm/numa.h>
+#include <asm/mmu.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
@@ -148,6 +149,8 @@ static inline void acpi_fake_nodes(const
 }
 #endif
 
+#define acpi_unlazy_tlb(x)	leave_mm(x)
+
 #endif /*__KERNEL__*/
 
 #endif /*_ASM_ACPI_H*/
Index: linux-2.6.24-rc/drivers/acpi/processor_idle.c
===================================================================
--- linux-2.6.24-rc.orig/drivers/acpi/processor_idle.c
+++ linux-2.6.24-rc/drivers/acpi/processor_idle.c
@@ -530,6 +530,7 @@ static void acpi_processor_idle(void)
 		break;
 
 	case ACPI_STATE_C3:
+		acpi_unlazy_tlb(smp_processor_id());
 		/*
 		 * disable bus master
 		 * bm_check implies we need ARB_DIS
@@ -1485,6 +1486,7 @@ static int acpi_idle_enter_bm(struct cpu
 		return 0;
 	}
 
+	acpi_unlazy_tlb(smp_processor_id());
 	/*
 	 * Must be done before busmaster disable as we might need to
 	 * access HPET !
Index: linux-2.6.24-rc/include/asm-ia64/acpi.h
===================================================================
--- linux-2.6.24-rc.orig/include/asm-ia64/acpi.h
+++ linux-2.6.24-rc/include/asm-ia64/acpi.h
@@ -126,6 +126,8 @@ extern int __devinitdata pxm_to_nid_map[
 extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
+#define acpi_unlazy_tlb(x)
+
 #endif /*__KERNEL__*/
 
 #endif /*_ASM_ACPI_H*/
Index: linux-2.6.24-rc/include/asm-x86/mmu.h
===================================================================
--- linux-2.6.24-rc.orig/include/asm-x86/mmu.h
+++ linux-2.6.24-rc/include/asm-x86/mmu.h
@@ -20,4 +20,6 @@ typedef struct { 
 	void *vdso;
 } mm_context_t;
 
+void leave_mm(int cpu);
+
 #endif /* _ASM_X86_MMU_H */
Index: linux-2.6.24-rc/arch/x86/kernel/smp_32.c
===================================================================
--- linux-2.6.24-rc.orig/arch/x86/kernel/smp_32.c
+++ linux-2.6.24-rc/arch/x86/kernel/smp_32.c
@@ -256,7 +256,7 @@ static DEFINE_SPINLOCK(tlbstate_lock);
  * We need to reload %cr3 since the page tables may be going
  * away from under us..
  */
-void leave_mm(unsigned long cpu)
+void leave_mm(int cpu)
 {
 	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
 		BUG();
Index: linux-2.6.24-rc/include/asm-x86/mmu_context_32.h
===================================================================
--- linux-2.6.24-rc.orig/include/asm-x86/mmu_context_32.h
+++ linux-2.6.24-rc/include/asm-x86/mmu_context_32.h
@@ -32,8 +32,6 @@ static inline void enter_lazy_tlb(struct
 #endif
 }
 
-void leave_mm(unsigned long cpu);
-
 static inline void switch_mm(struct mm_struct *prev,
 			     struct mm_struct *next,
 			     struct task_struct *tsk)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 18:34 [PATCH] x86: Voluntary leave_mm before entering ACPI C3 Venki Pallipadi
@ 2007-12-19 19:32 ` Ingo Molnar
  2007-12-19 19:36   ` H. Peter Anvin
                     ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Ingo Molnar @ 2007-12-19 19:32 UTC (permalink / raw)
  To: Venki Pallipadi; +Cc: Thomas Gleixner, H. Peter Anvin, Len Brown, linux-kernel


* Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:

> Aviod TLB flush IPIs during C3 states by voluntary leave_mm() before 
> entering C3.
> 
> The performance impact of TLB flush on C3 should not be significant 
> with respect to C3 wakeup latency. Also, CPUs tend to flush TLB in 
> hardware while in C3 anyways.
> 
> On a 8 logical CPU system, running make -j2, the number of tlbflush 
> IPIs goes down from 40 per second to ~ 0. Total number of interrupts 
> during the run of this workload was ~1200 per second, which makes it 
> ~3% savings in wakeups.
> 
> There was no measurable performance or power impact however.

thanks, applied to x86.git. Nice and elegant patch!

Btw., since the TLB flush state machine is really subtle and fragile, 
could you try to run the following mmap stresstest i wrote some time 
ago:

   http://redhat.com/~mingo/threaded-mmap-stresstest/

for a couple of hours. It runs nr_cpus threads which then do a "random 
crazy mix" of mappings/unmappings/remappings of a 800 MB memory window. 
The more sockets/cores, the crazier the TLB races get ;-)

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:32 ` Ingo Molnar
@ 2007-12-19 19:36   ` H. Peter Anvin
  2007-12-19 19:40     ` Ingo Molnar
  2007-12-19 20:54   ` Ingo Molnar
  2007-12-20  2:08   ` Venki Pallipadi
  2 siblings, 1 reply; 15+ messages in thread
From: H. Peter Anvin @ 2007-12-19 19:36 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Venki Pallipadi, Thomas Gleixner, Len Brown, linux-kernel

Ingo Molnar wrote:
> * Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:
> 
>> Aviod TLB flush IPIs during C3 states by voluntary leave_mm() before 
>> entering C3.
>>
>> The performance impact of TLB flush on C3 should not be significant 
>> with respect to C3 wakeup latency. Also, CPUs tend to flush TLB in 
>> hardware while in C3 anyways.
>>

Are there any CPUs around which *don't* flush the TLB across C3?  (I 
guess it's not guaranteed by the spec, though, and as TLBs grow larger 
there might be incentive to keep them online.)

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:36   ` H. Peter Anvin
@ 2007-12-19 19:40     ` Ingo Molnar
  2007-12-19 19:48       ` H. Peter Anvin
  2007-12-19 19:56       ` Venki Pallipadi
  0 siblings, 2 replies; 15+ messages in thread
From: Ingo Molnar @ 2007-12-19 19:40 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Venki Pallipadi, Thomas Gleixner, Len Brown, linux-kernel


* H. Peter Anvin <hpa@zytor.com> wrote:

> Ingo Molnar wrote:
>> * Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:
>>
>>> Aviod TLB flush IPIs during C3 states by voluntary leave_mm() before 
>>> entering C3.
>>>
>>> The performance impact of TLB flush on C3 should not be significant with 
>>> respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware 
>>> while in C3 anyways.
>>>
>
> Are there any CPUs around which *don't* flush the TLB across C3?  (I 
> guess it's not guaranteed by the spec, though, and as TLBs grow larger 
> there might be incentive to keep them online.)

i dont think it's required for C3 to even turn off any portion of the 
CPU - if an interrupt arrives after the C3 sequence is initiated but 
just before dirty cachelines have been flushed then the CPU can just 
return without touching anything (such as the TLB) - right? So i dont 
think there's any implicit guarantee of TLB flushing (nor should there 
be), but in practice, a good C3 sequence would (statistically) turn off 
large portions of the CPU and hence the TLB as well.

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:40     ` Ingo Molnar
@ 2007-12-19 19:48       ` H. Peter Anvin
  2007-12-19 20:23         ` Venki Pallipadi
  2007-12-20  7:53         ` Arjan van de Ven
  2007-12-19 19:56       ` Venki Pallipadi
  1 sibling, 2 replies; 15+ messages in thread
From: H. Peter Anvin @ 2007-12-19 19:48 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Venki Pallipadi, Thomas Gleixner, Len Brown, linux-kernel

Ingo Molnar wrote:
> 
> i dont think it's required for C3 to even turn off any portion of the 
> CPU - if an interrupt arrives after the C3 sequence is initiated but 
> just before dirty cachelines have been flushed then the CPU can just 
> return without touching anything (such as the TLB) - right? So i dont 
> think there's any implicit guarantee of TLB flushing (nor should there 
> be), but in practice, a good C3 sequence would (statistically) turn off 
> large portions of the CPU and hence the TLB as well.
> 

I think C3 guarantees that the cache contents stay intact, and thus it 
might make sense in some technology to preserve the TLB as well (being a 
kind of cache.)

Otherwise, what you say here of course is absolutely correct.

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:40     ` Ingo Molnar
  2007-12-19 19:48       ` H. Peter Anvin
@ 2007-12-19 19:56       ` Venki Pallipadi
  1 sibling, 0 replies; 15+ messages in thread
From: Venki Pallipadi @ 2007-12-19 19:56 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: H. Peter Anvin, Venki Pallipadi, Thomas Gleixner, Len Brown,
	linux-kernel

On Wed, Dec 19, 2007 at 08:40:32PM +0100, Ingo Molnar wrote:
> 
> * H. Peter Anvin <hpa@zytor.com> wrote:
> 
> > Ingo Molnar wrote:
> >> * Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:
> >>
> >>> Aviod TLB flush IPIs during C3 states by voluntary leave_mm() before 
> >>> entering C3.
> >>>
> >>> The performance impact of TLB flush on C3 should not be significant with 
> >>> respect to C3 wakeup latency. Also, CPUs tend to flush TLB in hardware 
> >>> while in C3 anyways.
> >>>
> >
> > Are there any CPUs around which *don't* flush the TLB across C3?  (I 
> > guess it's not guaranteed by the spec, though, and as TLBs grow larger 
> > there might be incentive to keep them online.)
> 
> i dont think it's required for C3 to even turn off any portion of the 
> CPU - if an interrupt arrives after the C3 sequence is initiated but 
> just before dirty cachelines have been flushed then the CPU can just 
> return without touching anything (such as the TLB) - right? So i dont 
> think there's any implicit guarantee of TLB flushing (nor should there 
> be), but in practice, a good C3 sequence would (statistically) turn off 
> large portions of the CPU and hence the TLB as well.
> 

Yes. There are cases where hardware/BIOS can do C-state changes behind OS, with
things like be in C1 for a while and then go to C2/C3 after a while etc. In
such cases, there will be times when TLBs are not really flushed in hardware.
But ideally, if C3 results in deep idle TLBs would be turned off. And in cases
where we wake up earlier than expected, C-state policy should identify that
and choose a lower C-state next time around.

I also tried one variation of this, where in I only do flush if there are more
than one CPU sharing the mm. But, that did not help the test case I was using
(which is probably the worst case). What I would see is:
Process runs on CPU x and mm is not shared
Goes idle (C3) waiting on something
Wakes up on CPU y which will now start sharing mm
and would send flush IPI anyway

Thanks,
Venki


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:48       ` H. Peter Anvin
@ 2007-12-19 20:23         ` Venki Pallipadi
  2007-12-20  7:53         ` Arjan van de Ven
  1 sibling, 0 replies; 15+ messages in thread
From: Venki Pallipadi @ 2007-12-19 20:23 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Venki Pallipadi, Thomas Gleixner, Len Brown,
	linux-kernel

On Wed, Dec 19, 2007 at 11:48:14AM -0800, H. Peter Anvin wrote:
> Ingo Molnar wrote:
> >
> >i dont think it's required for C3 to even turn off any portion of the 
> >CPU - if an interrupt arrives after the C3 sequence is initiated but 
> >just before dirty cachelines have been flushed then the CPU can just 
> >return without touching anything (such as the TLB) - right? So i dont 
> >think there's any implicit guarantee of TLB flushing (nor should there 
> >be), but in practice, a good C3 sequence would (statistically) turn off 
> >large portions of the CPU and hence the TLB as well.
> >
> 
> I think C3 guarantees that the cache contents stay intact, and thus it 
> might make sense in some technology to preserve the TLB as well (being a 
> kind of cache.)
> 
> Otherwise, what you say here of course is absolutely correct.
> 

C3 does not guarantee all cache contents. Infact, atleast on Intel, 
L1 will be almost always flushed. Newer more power efficient CPUs does dynamic
cache sizing [1]

C3 just guarantees that the caches are coherent. That is, if they are intact,
then DMA will keep cache consistent.

Thanks,
Venki

[1] - http://download.intel.com/products/processor/core2duo/mobile_prod_brief.pdf


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:32 ` Ingo Molnar
  2007-12-19 19:36   ` H. Peter Anvin
@ 2007-12-19 20:54   ` Ingo Molnar
  2007-12-20  2:08   ` Venki Pallipadi
  2 siblings, 0 replies; 15+ messages in thread
From: Ingo Molnar @ 2007-12-19 20:54 UTC (permalink / raw)
  To: Venki Pallipadi; +Cc: Thomas Gleixner, H. Peter Anvin, Len Brown, linux-kernel


FYI, your patch needed the fix below for !SMP. It works fine otherwise.

	Ingo

---
 include/asm-x86/mmu.h |    6 ++++++
 1 file changed, 6 insertions(+)

Index: linux/include/asm-x86/mmu.h
===================================================================
--- linux.orig/include/asm-x86/mmu.h
+++ linux/include/asm-x86/mmu.h
@@ -20,6 +20,12 @@ typedef struct { 
 	void *vdso;
 } mm_context_t;
 
+#ifdef CONFIG_SMP
 void leave_mm(int cpu);
+#else
+static inline void leave_mm(int cpu)
+{
+}
+#endif
 
 #endif /* _ASM_X86_MMU_H */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:32 ` Ingo Molnar
  2007-12-19 19:36   ` H. Peter Anvin
  2007-12-19 20:54   ` Ingo Molnar
@ 2007-12-20  2:08   ` Venki Pallipadi
  2007-12-20  9:31     ` Ingo Molnar
  2 siblings, 1 reply; 15+ messages in thread
From: Venki Pallipadi @ 2007-12-20  2:08 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Venki Pallipadi, Thomas Gleixner, H. Peter Anvin, Len Brown,
	linux-kernel

On Wed, Dec 19, 2007 at 08:32:55PM +0100, Ingo Molnar wrote:
> 
> * Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:
> 
> > Aviod TLB flush IPIs during C3 states by voluntary leave_mm() before 
> > entering C3.
> > 
> > The performance impact of TLB flush on C3 should not be significant 
> > with respect to C3 wakeup latency. Also, CPUs tend to flush TLB in 
> > hardware while in C3 anyways.
> > 
> > On a 8 logical CPU system, running make -j2, the number of tlbflush 
> > IPIs goes down from 40 per second to ~ 0. Total number of interrupts 
> > during the run of this workload was ~1200 per second, which makes it 
> > ~3% savings in wakeups.
> > 
> > There was no measurable performance or power impact however.
> 
> thanks, applied to x86.git. Nice and elegant patch!
> 
> Btw., since the TLB flush state machine is really subtle and fragile, 
> could you try to run the following mmap stresstest i wrote some time 
> ago:
> 
>    http://redhat.com/~mingo/threaded-mmap-stresstest/
> 
> for a couple of hours. It runs nr_cpus threads which then do a "random 
> crazy mix" of mappings/unmappings/remappings of a 800 MB memory window. 
> The more sockets/cores, the crazier the TLB races get ;-)
> 

Ingo,

I ran this stress test on two systems (8 cores and 2 cores) for over
4 hours without any issues. There was more than 20% C3 time during the
run. So, this C3 tlbflush path must have been stressed well during the run.

And sorry about the patch not working on UP config. That was a silly oversight
on my part.

Thanks,
Venki

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-19 19:48       ` H. Peter Anvin
  2007-12-19 20:23         ` Venki Pallipadi
@ 2007-12-20  7:53         ` Arjan van de Ven
  2007-12-20 16:16           ` H. Peter Anvin
  1 sibling, 1 reply; 15+ messages in thread
From: Arjan van de Ven @ 2007-12-20  7:53 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Venki Pallipadi, Thomas Gleixner, Len Brown,
	linux-kernel

On Wed, 19 Dec 2007 11:48:14 -0800
"H. Peter Anvin" <hpa@zytor.com> wrote:

> 
> I think C3 guarantees that the cache contents stay intact, and thus
> it might make sense in some technology to preserve the TLB as well
> (being a kind of cache.)

that sounds nice. It's fiction though ;-)

The thing to realize is that linux only sees "ACPI C3"; the BIOS maps that C3 to.. well any of the C states the processor in the system has. What you're saying is afaik correct for the *hardware* C3, not for the "C3" that Linux sees..


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-20  2:08   ` Venki Pallipadi
@ 2007-12-20  9:31     ` Ingo Molnar
  0 siblings, 0 replies; 15+ messages in thread
From: Ingo Molnar @ 2007-12-20  9:31 UTC (permalink / raw)
  To: Venki Pallipadi; +Cc: Thomas Gleixner, H. Peter Anvin, Len Brown, linux-kernel


* Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:

> > Btw., since the TLB flush state machine is really subtle and 
> > fragile, could you try to run the following mmap stresstest i wrote 
> > some time ago:
> > 
> >    http://redhat.com/~mingo/threaded-mmap-stresstest/
> > 
> > for a couple of hours. It runs nr_cpus threads which then do a 
> > "random crazy mix" of mappings/unmappings/remappings of a 800 MB 
> > memory window. The more sockets/cores, the crazier the TLB races get 
> > ;-)
> > 
> 
> Ingo,
> 
> I ran this stress test on two systems (8 cores and 2 cores) for over 4 
> hours without any issues. There was more than 20% C3 time during the 
> run. So, this C3 tlbflush path must have been stressed well during the 
> run.

ok, great. Regarding power consumption: i suspect a real difference will 
only show up on multi-socket systems that can do deeper C modes, or on 
multicore systems that will benefit from longer idle time on another 
core. (i suspect most multicore CPUs today will only truly save 
significant amounts of power if all cores are idle.) In any case, not 
doing these extra IPIs is definitely a plus.

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-20  7:53         ` Arjan van de Ven
@ 2007-12-20 16:16           ` H. Peter Anvin
  2007-12-20 18:22             ` Arjan van de Ven
  2007-12-20 19:28             ` Len Brown
  0 siblings, 2 replies; 15+ messages in thread
From: H. Peter Anvin @ 2007-12-20 16:16 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Ingo Molnar, Venki Pallipadi, Thomas Gleixner, Len Brown,
	linux-kernel

Arjan van de Ven wrote:
> On Wed, 19 Dec 2007 11:48:14 -0800
> "H. Peter Anvin" <hpa@zytor.com> wrote:
> 
>> I think C3 guarantees that the cache contents stay intact, and thus
>> it might make sense in some technology to preserve the TLB as well
>> (being a kind of cache.)
> 
> that sounds nice. It's fiction though ;-)
> 
> The thing to realize is that linux only sees "ACPI C3"; the BIOS maps that C3 to.. well any of the C states the processor in the system has. What you're saying is afaik correct for the *hardware* C3, not for the "C3" that Linux sees..
> 

Well, it can only map ACPI C3 to a state which is no more "dead" than 
what would normally be permitted by C3.  IIRC, C3 is allowed to require 
that DMA be turned off (unlike C2), but is not allowed to lose the CPU 
state.

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-20 16:16           ` H. Peter Anvin
@ 2007-12-20 18:22             ` Arjan van de Ven
  2007-12-20 18:32               ` H. Peter Anvin
  2007-12-20 19:28             ` Len Brown
  1 sibling, 1 reply; 15+ messages in thread
From: Arjan van de Ven @ 2007-12-20 18:22 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Venki Pallipadi, Thomas Gleixner, Len Brown,
	linux-kernel

On Thu, 20 Dec 2007 08:16:54 -0800
"H. Peter Anvin" <hpa@zytor.com> wrote:

> Arjan van de Ven wrote:
> > On Wed, 19 Dec 2007 11:48:14 -0800
> > "H. Peter Anvin" <hpa@zytor.com> wrote:
> > 
> >> I think C3 guarantees that the cache contents stay intact, and thus
> >> it might make sense in some technology to preserve the TLB as well
> >> (being a kind of cache.)
> > 
> > that sounds nice. It's fiction though ;-)
> > 
> > The thing to realize is that linux only sees "ACPI C3"; the BIOS
> > maps that C3 to.. well any of the C states the processor in the
> > system has. What you're saying is afaik correct for the *hardware*
> > C3, not for the "C3" that Linux sees..
> > 
> 
> Well, it can only map ACPI C3 to a state which is no more "dead" than 
> what would normally be permitted by C3.  IIRC, C3 is allowed to
> require that DMA be turned off (unlike C2), but is not allowed to
> lose the CPU state.

state isn't lost if the tlb or the caches are flushed... 
(properly, eg all pending writebacks are written back first etc)


-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-20 18:22             ` Arjan van de Ven
@ 2007-12-20 18:32               ` H. Peter Anvin
  0 siblings, 0 replies; 15+ messages in thread
From: H. Peter Anvin @ 2007-12-20 18:32 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Ingo Molnar, Venki Pallipadi, Thomas Gleixner, Len Brown,
	linux-kernel

Arjan van de Ven wrote:
> On Thu, 20 Dec 2007 08:16:54 -0800
> "H. Peter Anvin" <hpa@zytor.com> wrote:
> 
>> Arjan van de Ven wrote:
>>> On Wed, 19 Dec 2007 11:48:14 -0800
>>> "H. Peter Anvin" <hpa@zytor.com> wrote:
>>>
>>>> I think C3 guarantees that the cache contents stay intact, and thus
>>>> it might make sense in some technology to preserve the TLB as well
>>>> (being a kind of cache.)
>>> that sounds nice. It's fiction though ;-)
>>>
>>> The thing to realize is that linux only sees "ACPI C3"; the BIOS
>>> maps that C3 to.. well any of the C states the processor in the
>>> system has. What you're saying is afaik correct for the *hardware*
>>> C3, not for the "C3" that Linux sees..
>>>
>> Well, it can only map ACPI C3 to a state which is no more "dead" than 
>> what would normally be permitted by C3.  IIRC, C3 is allowed to
>> require that DMA be turned off (unlike C2), but is not allowed to
>> lose the CPU state.
> 
> state isn't lost if the tlb or the caches are flushed... 
> (properly, eg all pending writebacks are written back first etc)
> 

Oh, right.  My bad.

Of course C3 doesn't guarantee cache retention, only cache coherency.

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] x86: Voluntary leave_mm before entering ACPI C3
  2007-12-20 16:16           ` H. Peter Anvin
  2007-12-20 18:22             ` Arjan van de Ven
@ 2007-12-20 19:28             ` Len Brown
  1 sibling, 0 replies; 15+ messages in thread
From: Len Brown @ 2007-12-20 19:28 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Arjan van de Ven, Ingo Molnar, Venki Pallipadi, Thomas Gleixner,
	linux-kernel

On Thursday 20 December 2007 11:16, H. Peter Anvin wrote:
> Arjan van de Ven wrote:
> > On Wed, 19 Dec 2007 11:48:14 -0800
> > "H. Peter Anvin" <hpa@zytor.com> wrote:
> > 
> >> I think C3 guarantees that the cache contents stay intact, and thus
> >> it might make sense in some technology to preserve the TLB as well
> >> (being a kind of cache.)
> > 
> > that sounds nice. It's fiction though ;-)
> > 
> > The thing to realize is that linux only sees "ACPI C3";
> > the BIOS maps that C3 to..
> > well any of the C states the processor in the system has.
> > What you're saying is afaik correct for the *hardware* C3, not for the "C3" that Linux sees..   
> 
> Well, it can only map ACPI C3 to a state which is no more "dead" than 
> what would normally be permitted by C3.  IIRC, C3 is allowed to require 
> that DMA be turned off (unlike C2), but is not allowed to lose the CPU 
> state.


Re: mapping HW to ACPI C-states.

Right, it is fair game for the BIOS to map a "shallower" hardware C-state
to a "deeper" ACPI C-state.

Re: CPU state

All C-states preserve the CPU SW programming state.
(eg. while it may be saved and restored in HW,
 it appears to SW to be always intact).

Re: C3 guarantees that the cache contents stay intact

This is both true and false, depending on how you use the word "intact".

If "intact" == "stays valid in cache", then no, this not guaranteed.
The HW reserves the right to flush some or all of the L1 and L2
caches whenever it wants to --
this includes both HW and ACPI C2 and C3 states.

If "intact" = "cache consistent", then yes, this guarantee is true.
The way the guarantee is implemented varies by generation.
Older systems would lock the bus in C3 to assure
the processor was woken up for DMA to snoop.
Newer hardware simply wakes the cache to snoop
without waking the cores, or if it flushes the caches
then it doesn't have to snoop at all -- which also
counts as "cache consistent":-)

cheers,
-Len


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2007-12-20 19:29 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-19 18:34 [PATCH] x86: Voluntary leave_mm before entering ACPI C3 Venki Pallipadi
2007-12-19 19:32 ` Ingo Molnar
2007-12-19 19:36   ` H. Peter Anvin
2007-12-19 19:40     ` Ingo Molnar
2007-12-19 19:48       ` H. Peter Anvin
2007-12-19 20:23         ` Venki Pallipadi
2007-12-20  7:53         ` Arjan van de Ven
2007-12-20 16:16           ` H. Peter Anvin
2007-12-20 18:22             ` Arjan van de Ven
2007-12-20 18:32               ` H. Peter Anvin
2007-12-20 19:28             ` Len Brown
2007-12-19 19:56       ` Venki Pallipadi
2007-12-19 20:54   ` Ingo Molnar
2007-12-20  2:08   ` Venki Pallipadi
2007-12-20  9:31     ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).