* [PATCH v2] x86: avoid flush IPI when possible
@ 2016-02-16 10:14 Jan Beulich
2016-02-17 14:48 ` Andrew Cooper
0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2016-02-16 10:14 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper, Keir Fraser
[-- Attachment #1: Type: text/plain, Size: 3708 bytes --]
Since CLFLUSH, other than WBINVD, is a cache coherency domain wide
flush, there's no need to IPI other CPUs if this is the only flushing
being requested. (As a secondary change, move a local variable into the
scope where it's actually needed.)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Adjust the meaning of flush_area_local()'s return value and prefix
the function with a respective comment.
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -91,9 +91,13 @@ void write_cr3(unsigned long cr3)
local_irq_restore(flags);
}
-void flush_area_local(const void *va, unsigned int flags)
+/*
+ * The return value of this function is the passed in "flags" argument with
+ * bits cleared that have been fully (i.e. system-wide) taken care of, i.e.
+ * namely not requiring any further action on remote CPUs.
+ */
+unsigned int flush_area_local(const void *va, unsigned int flags)
{
- const struct cpuinfo_x86 *c = ¤t_cpu_data;
unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
unsigned long irqfl;
@@ -130,6 +134,7 @@ void flush_area_local(const void *va, un
if ( flags & FLUSH_CACHE )
{
+ const struct cpuinfo_x86 *c = ¤t_cpu_data;
unsigned long i, sz = 0;
if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
@@ -146,6 +151,7 @@ void flush_area_local(const void *va, un
"data16 clflush %0", /* clflushopt */
X86_FEATURE_CLFLUSHOPT,
"m" (((const char *)va)[i]));
+ flags &= ~FLUSH_CACHE;
}
else
{
@@ -154,4 +160,6 @@ void flush_area_local(const void *va, un
}
local_irq_restore(irqfl);
+
+ return flags;
}
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -205,26 +205,30 @@ static unsigned int flush_flags;
void invalidate_interrupt(struct cpu_user_regs *regs)
{
+ unsigned int flags = flush_flags;
ack_APIC_irq();
perfc_incr(ipis);
- if ( !__sync_local_execstate() ||
- (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
- flush_area_local(flush_va, flush_flags);
+ if ( __sync_local_execstate() )
+ flags &= ~FLUSH_TLB;
+ flush_area_local(flush_va, flags);
cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
}
void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
{
+ unsigned int cpu = smp_processor_id();
+
ASSERT(local_irq_is_enabled());
- if ( cpumask_test_cpu(smp_processor_id(), mask) )
- flush_area_local(va, flags);
+ if ( cpumask_test_cpu(cpu, mask) )
+ flags = flush_area_local(va, flags);
- if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) )
+ if ( (flags & ~FLUSH_ORDER_MASK) &&
+ !cpumask_subset(mask, cpumask_of(cpu)) )
{
spin_lock(&flush_lock);
cpumask_and(&flush_cpumask, mask, &cpu_online_map);
- cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
+ cpumask_clear_cpu(cpu, &flush_cpumask);
flush_va = va;
flush_flags = flags;
send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR);
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -87,7 +87,7 @@ void write_cr3(unsigned long cr3);
#define FLUSH_CACHE 0x400
/* Flush local TLBs/caches. */
-void flush_area_local(const void *va, unsigned int flags);
+unsigned int flush_area_local(const void *va, unsigned int flags);
#define flush_local(flags) flush_area_local(NULL, flags)
/* Flush specified CPUs' TLBs/caches */
[-- Attachment #2: x86-flush-overhead.patch --]
[-- Type: text/plain, Size: 3740 bytes --]
x86: avoid flush IPI when possible
Since CLFLUSH, other than WBINVD, is a cache coherency domain wide
flush, there's no need to IPI other CPUs if this is the only flushing
being requested. (As a secondary change, move a local variable into the
scope where it's actually needed.)
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Adjust the meaning of flush_area_local()'s return value and prefix
the function with a respective comment.
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -91,9 +91,13 @@ void write_cr3(unsigned long cr3)
local_irq_restore(flags);
}
-void flush_area_local(const void *va, unsigned int flags)
+/*
+ * The return value of this function is the passed in "flags" argument with
+ * bits cleared that have been fully (i.e. system-wide) taken care of, i.e.
+ * namely not requiring any further action on remote CPUs.
+ */
+unsigned int flush_area_local(const void *va, unsigned int flags)
{
- const struct cpuinfo_x86 *c = ¤t_cpu_data;
unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
unsigned long irqfl;
@@ -130,6 +134,7 @@ void flush_area_local(const void *va, un
if ( flags & FLUSH_CACHE )
{
+ const struct cpuinfo_x86 *c = ¤t_cpu_data;
unsigned long i, sz = 0;
if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
@@ -146,6 +151,7 @@ void flush_area_local(const void *va, un
"data16 clflush %0", /* clflushopt */
X86_FEATURE_CLFLUSHOPT,
"m" (((const char *)va)[i]));
+ flags &= ~FLUSH_CACHE;
}
else
{
@@ -154,4 +160,6 @@ void flush_area_local(const void *va, un
}
local_irq_restore(irqfl);
+
+ return flags;
}
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -205,26 +205,30 @@ static unsigned int flush_flags;
void invalidate_interrupt(struct cpu_user_regs *regs)
{
+ unsigned int flags = flush_flags;
ack_APIC_irq();
perfc_incr(ipis);
- if ( !__sync_local_execstate() ||
- (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
- flush_area_local(flush_va, flush_flags);
+ if ( __sync_local_execstate() )
+ flags &= ~FLUSH_TLB;
+ flush_area_local(flush_va, flags);
cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
}
void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
{
+ unsigned int cpu = smp_processor_id();
+
ASSERT(local_irq_is_enabled());
- if ( cpumask_test_cpu(smp_processor_id(), mask) )
- flush_area_local(va, flags);
+ if ( cpumask_test_cpu(cpu, mask) )
+ flags = flush_area_local(va, flags);
- if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) )
+ if ( (flags & ~FLUSH_ORDER_MASK) &&
+ !cpumask_subset(mask, cpumask_of(cpu)) )
{
spin_lock(&flush_lock);
cpumask_and(&flush_cpumask, mask, &cpu_online_map);
- cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
+ cpumask_clear_cpu(cpu, &flush_cpumask);
flush_va = va;
flush_flags = flags;
send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR);
--- a/xen/include/asm-x86/flushtlb.h
+++ b/xen/include/asm-x86/flushtlb.h
@@ -87,7 +87,7 @@ void write_cr3(unsigned long cr3);
#define FLUSH_CACHE 0x400
/* Flush local TLBs/caches. */
-void flush_area_local(const void *va, unsigned int flags);
+unsigned int flush_area_local(const void *va, unsigned int flags);
#define flush_local(flags) flush_area_local(NULL, flags)
/* Flush specified CPUs' TLBs/caches */
[-- Attachment #3: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] x86: avoid flush IPI when possible
2016-02-16 10:14 [PATCH v2] x86: avoid flush IPI when possible Jan Beulich
@ 2016-02-17 14:48 ` Andrew Cooper
2016-02-18 8:44 ` Jan Beulich
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Cooper @ 2016-02-17 14:48 UTC (permalink / raw)
To: Jan Beulich, xen-devel; +Cc: Keir Fraser
On 16/02/16 10:14, Jan Beulich wrote:
> --- a/xen/arch/x86/smp.c
> +++ b/xen/arch/x86/smp.c
> @@ -205,26 +205,30 @@ static unsigned int flush_flags;
>
> void invalidate_interrupt(struct cpu_user_regs *regs)
> {
> + unsigned int flags = flush_flags;
> ack_APIC_irq();
> perfc_incr(ipis);
> - if ( !__sync_local_execstate() ||
> - (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
> - flush_area_local(flush_va, flush_flags);
> + if ( __sync_local_execstate() )
> + flags &= ~FLUSH_TLB;
If a switch happened, write_ptbase() also flushed global mappings. I
believe you can also mask out FLUSH_TLB_GLOBAL here.
Otherwise, the rest looks ok. Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] x86: avoid flush IPI when possible
2016-02-17 14:48 ` Andrew Cooper
@ 2016-02-18 8:44 ` Jan Beulich
0 siblings, 0 replies; 3+ messages in thread
From: Jan Beulich @ 2016-02-18 8:44 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel, Keir Fraser
>>> On 17.02.16 at 15:48, <andrew.cooper3@citrix.com> wrote:
> On 16/02/16 10:14, Jan Beulich wrote:
>> --- a/xen/arch/x86/smp.c
>> +++ b/xen/arch/x86/smp.c
>> @@ -205,26 +205,30 @@ static unsigned int flush_flags;
>>
>> void invalidate_interrupt(struct cpu_user_regs *regs)
>> {
>> + unsigned int flags = flush_flags;
>> ack_APIC_irq();
>> perfc_incr(ipis);
>> - if ( !__sync_local_execstate() ||
>> - (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
>> - flush_area_local(flush_va, flush_flags);
>> + if ( __sync_local_execstate() )
>> + flags &= ~FLUSH_TLB;
>
> If a switch happened, write_ptbase() also flushed global mappings. I
> believe you can also mask out FLUSH_TLB_GLOBAL here.
Indeed - not doing so appears to be another leftover of 32-bit
days, where write_cr3() did not fiddle with CR4 when
!USER_MAPPINGS_ARE_GLOBAL (i.e. namely the 32-bit case).
> Otherwise, the rest looks ok. Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>
Thanks, Jan
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-02-18 8:44 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-16 10:14 [PATCH v2] x86: avoid flush IPI when possible Jan Beulich
2016-02-17 14:48 ` Andrew Cooper
2016-02-18 8:44 ` Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).