xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>,
	andrew.cooper3@citrix.com, wei.liu2@citrix.com,
	jbeulich@suse.com, dfaggioli@suse.com
Subject: [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active
Date: Fri,  2 Mar 2018 09:14:01 +0100	[thread overview]
Message-ID: <20180302081403.16953-5-jgross@suse.com> (raw)
In-Reply-To: <20180302081403.16953-1-jgross@suse.com>

Instead of flushing the TLB from global pages when switching address
spaces with XPTI being active just disable global pages via %cr4
completely when a domain subject to XPTI is active. This avoids the
need for extra TLB flushes as loading %cr3 will remove all TLB
entries.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/arch/x86/cpu/mtrr/generic.c | 32 +++++++++++++++++++++-----------
 xen/arch/x86/flushtlb.c         | 39 +++++++++++++++++++++++++--------------
 xen/arch/x86/x86_64/entry.S     | 10 ----------
 xen/include/asm-x86/domain.h    |  3 ++-
 4 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c
index e9c0e5e059..d705138100 100644
--- a/xen/arch/x86/cpu/mtrr/generic.c
+++ b/xen/arch/x86/cpu/mtrr/generic.c
@@ -400,8 +400,10 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
  * has been called.
  */
 
-static void prepare_set(void)
+static bool prepare_set(void)
 {
+	unsigned long cr4;
+
 	/*  Note that this is not ideal, since the cache is only flushed/disabled
 	   for this CPU while the MTRRs are changed, but changing this requires
 	   more invasive changes to the way the kernel boots  */
@@ -412,18 +414,22 @@ static void prepare_set(void)
 	write_cr0(read_cr0() | X86_CR0_CD);
 	wbinvd();
 
-	/*  TLB flushing here relies on Xen always using CR4.PGE. */
-	BUILD_BUG_ON(!(XEN_MINIMAL_CR4 & X86_CR4_PGE));
-	write_cr4(read_cr4() & ~X86_CR4_PGE);
+	cr4 = read_cr4();
+	if (cr4 & X86_CR4_PGE)
+		write_cr4(cr4 & ~X86_CR4_PGE);
+	else
+		asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
 
 	/*  Save MTRR state */
 	rdmsrl(MSR_MTRRdefType, deftype);
 
 	/*  Disable MTRRs, and set the default type to uncached  */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff);
+
+	return !!(cr4 & X86_CR4_PGE);
 }
 
-static void post_set(void)
+static void post_set(bool pge)
 {
 	/* Intel (P6) standard MTRRs */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype);
@@ -432,7 +438,10 @@ static void post_set(void)
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/*  Reenable CR4.PGE (also flushes the TLB) */
-	write_cr4(read_cr4() | X86_CR4_PGE);
+	if (pge)
+		write_cr4(read_cr4() | X86_CR4_PGE);
+	else
+		asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
 
 	spin_unlock(&set_atomicity_lock);
 }
@@ -441,14 +450,15 @@ static void generic_set_all(void)
 {
 	unsigned long mask, count;
 	unsigned long flags;
+	bool pge;
 
 	local_irq_save(flags);
-	prepare_set();
+	pge = prepare_set();
 
 	/* Actually set the state */
 	mask = set_mtrr_state();
 
-	post_set();
+	post_set(pge);
 	local_irq_restore(flags);
 
 	/*  Use the atomic bitops to update the global mask  */
@@ -457,7 +467,6 @@ static void generic_set_all(void)
 			set_bit(count, &smp_changes_mask);
 		mask >>= 1;
 	}
-	
 }
 
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
@@ -474,11 +483,12 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 {
 	unsigned long flags;
 	struct mtrr_var_range *vr;
+	bool pge;
 
 	vr = &mtrr_state.var_ranges[reg];
 
 	local_irq_save(flags);
-	prepare_set();
+	pge = prepare_set();
 
 	if (size == 0) {
 		/* The invalid bit is kept in the mask, so we simply clear the
@@ -499,7 +509,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 		mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask);
 	}
 
-	post_set();
+	post_set(pge);
 	local_irq_restore(flags);
 }
 
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index e4ea4f3297..186d9099f6 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -72,20 +72,39 @@ static void post_flush(u32 t)
     this_cpu(tlbflush_time) = t;
 }
 
+static void do_flush_tlb(unsigned long cr3)
+{
+    unsigned long cr4;
+
+    cr4 = read_cr4();
+    if ( cr4 & X86_CR4_PGE )
+    {
+        write_cr4(cr4 & ~X86_CR4_PGE);
+        if ( cr3 )
+            asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+        else
+            barrier();
+        write_cr4(cr4);
+    }
+    else
+    {
+        if ( !cr3 )
+            cr3 = read_cr3();
+        asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+    }
+}
+
 void write_cr3(unsigned long cr3)
 {
-    unsigned long flags, cr4;
+    unsigned long flags;
     u32 t;
 
     /* This non-reentrant function is sometimes called in interrupt context. */
     local_irq_save(flags);
 
     t = pre_flush();
-    cr4 = read_cr4();
 
-    write_cr4(cr4 & ~X86_CR4_PGE);
-    asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
-    write_cr4(cr4);
+    do_flush_tlb(cr3);
 
     post_flush(t);
 
@@ -123,22 +142,14 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
             u32 t = pre_flush();
 
             if ( !cpu_has_invpcid )
-            {
-                unsigned long cr4 = read_cr4();
-
-                write_cr4(cr4 & ~X86_CR4_PGE);
-                barrier();
-                write_cr4(cr4);
-            }
+                do_flush_tlb(0);
             else
-            {
                 /*
                  * Using invpcid to flush all mappings works
                  * regardless of whether PCID is enabled or not.
                  * It is faster than read-modify-write CR4.
                  */
                 invpcid_flush_all();
-            }
 
             post_flush(t);
         }
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index cdcdc2c40a..a8d38e7eb2 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -73,13 +73,8 @@ restore_all_guest:
                 ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
         rep movsq
 .Lrag_copy_done:
-        mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
-        mov   %rdi, %rsi
-        and   $~X86_CR4_PGE, %rdi
-        mov   %rdi, %cr4
         mov   %rax, %cr3
-        mov   %rsi, %cr4
 .Lrag_cr3_end:
         ALTERNATIVE_NOP .Lrag_cr3_start, .Lrag_cr3_end, X86_FEATURE_NO_XPTI
 
@@ -136,12 +131,7 @@ restore_all_xen:
          * so "g" will have to do.
          */
 UNLIKELY_START(g, exit_cr3)
-        mov   %cr4, %rdi
-        mov   %rdi, %rsi
-        and   $~X86_CR4_PGE, %rdi
-        mov   %rdi, %cr4
         mov   %rax, %cr3
-        mov   %rsi, %cr4
 UNLIKELY_END(exit_cr3)
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 0cc37dea05..316418a6fe 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -622,7 +622,8 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
             X86_CR4_SMAP | X86_CR4_OSXSAVE |                \
             X86_CR4_FSGSBASE))                              \
       | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
-     & ~X86_CR4_DE)
+     & ~(X86_CR4_DE |                                       \
+         ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0)))
 #define real_cr4_to_pv_guest_cr4(c)                         \
     ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
              X86_CR4_OSXSAVE | X86_CR4_SMEP |               \
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2018-03-02  8:14 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-02  8:13 [PATCH v2 0/6] xen/x86: various XPTI speedups Juergen Gross
2018-03-02  8:13 ` [PATCH v2 1/6] x86/xpti: avoid copying L4 page table contents when possible Juergen Gross
2018-03-05 16:43   ` Jan Beulich
2018-03-08 11:59     ` Juergen Gross
2018-03-08 12:47       ` Jan Beulich
     [not found]       ` <5AA13EEA02000078001AFCAF@suse.com>
2018-03-08 13:03         ` Juergen Gross
     [not found]   ` <5A9D81DC02000078001AEB68@suse.com>
2018-03-06  7:01     ` Juergen Gross
2018-03-06  7:58       ` Jan Beulich
     [not found]       ` <5A9E583002000078001AED3A@suse.com>
2018-03-06  8:06         ` Juergen Gross
2018-03-06  8:17           ` Jan Beulich
2018-03-02  8:13 ` [PATCH v2 2/6] x86/xpti: don't flush TLB twice when switching to 64-bit pv context Juergen Gross
2018-03-05 16:49   ` Jan Beulich
     [not found]   ` <5A9D831F02000078001AEB7E@suse.com>
2018-03-06  7:02     ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 3/6] xen/x86: support per-domain flag for xpti Juergen Gross
2018-03-08 10:17   ` Jan Beulich
     [not found]   ` <5AA11BDE02000078001AFB92@suse.com>
2018-03-08 11:30     ` Juergen Gross
2018-03-08 12:49       ` Jan Beulich
     [not found]       ` <5AA13F7D02000078001AFCB3@suse.com>
2018-03-08 13:13         ` Juergen Gross
2018-03-02  8:14 ` Juergen Gross [this message]
2018-03-02 11:03   ` [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active Wei Liu
2018-03-02 11:30     ` Juergen Gross
2018-03-08 13:38   ` Jan Beulich
2018-03-09  3:01     ` Tian, Kevin
2018-03-09  5:23     ` Tian, Kevin
2018-03-09  8:34       ` Jan Beulich
     [not found]       ` <5AA2551002000078001B0116@suse.com>
2018-03-09  8:42         ` Juergen Gross
     [not found]   ` <5AA14AF302000078001AFD30@suse.com>
2018-03-08 14:05     ` Juergen Gross
2018-03-08 14:33       ` Jan Beulich
     [not found]       ` <5AA157E002000078001AFDA4@suse.com>
2018-03-08 14:39         ` Juergen Gross
2018-03-08 15:06   ` Jan Beulich
2018-03-09 14:40     ` Juergen Gross
2018-03-09 15:30       ` Jan Beulich
2018-03-02  8:14 ` [PATCH v2 5/6] xen/x86: use flag byte for decision whether xen_cr3 is valid Juergen Gross
2018-03-08 14:24   ` Jan Beulich
     [not found]   ` <5AA155BE02000078001AFD89@suse.com>
2018-03-08 14:28     ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 6/6] xen/x86: use PCID feature for XPTI Juergen Gross
2018-03-08 15:27   ` Jan Beulich
2018-03-05 16:20 ` [PATCH v2 0/6] xen/x86: various XPTI speedups Dario Faggioli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180302081403.16953-5-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dfaggioli@suse.com \
    --cc=jbeulich@suse.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).