All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Yang, Xiaowei" <xiaowei.yang@intel.com>
To: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Subject: [PATCH] Fix performance issue brought by TSC-sync logic
Date: Mon, 23 Feb 2009 16:21:07 +0800	[thread overview]
Message-ID: <49A25C73.6090702@intel.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 975 bytes --]

Recently we found one performance bug when doing network test with VTd
assigned devices - in some extreme case, the network performance in HVM
using new Linux kernel could be 1/20 of native. Root cause is one of our
sync-tsc-under-deep-C-state patches brings extra kilo-TSC drift between
pCPUs and let check-tsc-sync logic in HVM failed. The result is the
kernel fails to use platform timer (HPET, PMtimer) for gettimeofday
instead of TSC and brings very frequent costly IOport access VMExit -
triple per one call.

We provides below 2 patches to address the issue:

tsc1.patch: Minimize the TSC drift between pCPUs by letting BSP/AP set
TSC at the same time in time_calibration_rendezvous(). Looping a few 
times before writing tsc sounds better, but it may be too costly.
Signed-off-by: Xiaowei Yang <xiaowei.yang@intel.com>

tsc2.patch: only do TSC-sync if really necessary, which narrows its 
effect a lot.
Signed-off-by: Wei Gang <wei.gang@intel.com>


Thanks,
Xiaowei


[-- Attachment #2: tsc1.patch --]
[-- Type: text/x-patch, Size: 1295 bytes --]

diff -r 0b0e7c2b4eef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Tue Jan 20 21:21:16 2009 +0800
+++ b/xen/arch/x86/time.c	Mon Feb 09 02:21:50 2009 +0800
@@ -1095,22 +1095,21 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        rdtscll(r->master_tsc_stamp);
+        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+            rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_* /then/ signal */
         atomic_inc(&r->nr_cpus);
-        c->local_tsc_stamp = r->master_tsc_stamp;
     }
     else
     {
         atomic_inc(&r->nr_cpus);
         while ( atomic_read(&r->nr_cpus) != total_cpus )
-            cpu_relax();
-        mb(); /* receive signal /then/ read r->master_* */
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
-            wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
-        rdtscll(c->local_tsc_stamp);
-    }
-
+            mb(); /* receive signal /then/ read r->master_* */
+    }
+
+    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
+    rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 

[-- Attachment #3: tsc2.patch --]
[-- Type: text/x-patch, Size: 2734 bytes --]

diff -r 246ecf354c85 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c	Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/acpi/cpu_idle.c	Mon Feb 16 12:57:08 2009 +0800
@@ -737,6 +737,15 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
     if ( cpu_id == 0 && pm_idle_save == NULL )
     {
+        int deepest_cx = acpi_power->states[acpi_power->count - 1].type;
+        if ( max_cstate >= 3 && deepest_cx >= ACPI_STATE_C3 )
+            tsc_may_stop = 1;
+        else if ( max_cstate >= 2 && deepest_cx >= ACPI_STATE_C2
+                  && !local_apic_timer_c2_ok )
+            tsc_may_stop = 1;
+        else
+            tsc_may_stop = 0;
+
         pm_idle_save = pm_idle;
         pm_idle = acpi_processor_idle;
     }
diff -r 246ecf354c85 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c	Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/arch/x86/time.c	Mon Feb 16 13:10:24 2009 +0800
@@ -1091,6 +1091,8 @@ struct calibration_rendezvous {
     u64 master_tsc_stamp;
 };
 
+int tsc_may_stop __read_mostly = 0;
+
 static void time_calibration_rendezvous(void *_r)
 {
     struct cpu_calibration *c = &this_cpu(cpu_calibration);
@@ -1102,7 +1104,9 @@ static void time_calibration_rendezvous(
         while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+        if ( !boot_cpu_has(X86_FEATURE_NOSTOP_TSC)
+             && boot_cpu_has(X86_FEATURE_CONSTANT_TSC)
+             && tsc_may_stop )
             rdtscll(r->master_tsc_stamp);
         mb(); /* write r->master_* /then/ signal */
         atomic_inc(&r->nr_cpus);
@@ -1114,7 +1118,7 @@ static void time_calibration_rendezvous(
             mb(); /* receive signal /then/ read r->master_* */
     }
 
-    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+    if ( r->master_tsc_stamp )
         wrmsrl(MSR_IA32_TSC, r->master_tsc_stamp);
     rdtscll(c->local_tsc_stamp);
     c->stime_local_stamp = get_s_time();
@@ -1127,7 +1131,8 @@ static void time_calibration(void *unuse
 {
     struct calibration_rendezvous r = {
         .cpu_calibration_map = cpu_online_map,
-        .nr_cpus = ATOMIC_INIT(0)
+        .nr_cpus = ATOMIC_INIT(0),
+        .master_tsc_stamp = 0
     };
 
     /* @wait=1 because we must wait for all cpus before freeing @r. */
diff -r 246ecf354c85 xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h	Mon Feb 16 12:21:52 2009 +0800
+++ b/xen/include/asm-x86/time.h	Mon Feb 16 12:57:08 2009 +0800
@@ -41,4 +41,6 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
 uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
 uint64_t ns_to_acpi_pm_tick(uint64_t ns);
 
+extern int tsc_may_stop;
+
 #endif /* __X86_TIME_H__ */

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2009-02-23  8:21 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-23  8:21 Yang, Xiaowei [this message]
2009-02-23 12:51 ` [PATCH] Fix performance issue brought by TSC-sync logic Keir Fraser
2009-02-23 12:55   ` Tian, Kevin
2009-02-24  6:33   ` Yang, Xiaowei
2009-02-24 12:10     ` Keir Fraser
2009-02-25 10:26       ` Yang, Xiaowei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49A25C73.6090702@intel.com \
    --to=xiaowei.yang@intel.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.