xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Chao Gao <chao.gao@intel.com>
To: xen-devel@lists.xen.org
Cc: Kevin Tian <kevin.tian@intel.com>,
	Stefano Stabellini <sstabellini@kernel.org>,
	Wei Liu <wei.liu2@citrix.com>,
	Jun Nakajima <jun.nakajima@intel.com>,
	George Dunlap <george.dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Tim Deegan <tim@xen.org>, Jan Beulich <jbeulich@suse.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Chao Gao <chao.gao@intel.com>
Subject: [PATCH v5 4/4] xentrace: add support for HVM's PI blocking list operation
Date: Wed, 16 Aug 2017 13:14:38 +0800	[thread overview]
Message-ID: <1502860478-84512-5-git-send-email-chao.gao@intel.com> (raw)
In-Reply-To: <1502860478-84512-1-git-send-email-chao.gao@intel.com>

In order to analyze PI blocking list operation frequence and obtain
the list length, add some relevant events to xentrace and some
associated code in xenalyze.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
v5:
 - Put pi list operation under HW events and get rid of ASYNC stuff
 - generate scatterplot of pi list length on pcpus to be vivid to
 analyst.
v4:
 - trace part of Patch 1 in v3

---
 tools/xentrace/formats     |   2 +
 tools/xentrace/xenalyze.c  | 116 +++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/vmx/vmx.c |  17 ++++++-
 xen/include/public/trace.h |   5 ++
 4 files changed, 138 insertions(+), 2 deletions(-)

diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index c1f584f..e926a18 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -205,6 +205,8 @@
 0x00802006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  assign_vector [ irq = %(1)d = vector 0x%(2)x, CPU mask: 0x%(3)08x ]
 0x00802007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  bogus_vector [ 0x%(1)x ]
 0x00802008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_irq [ irq = %(1)d, began = %(2)dus, ended = %(3)dus ]
+0x00804001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  pi_list_add [ domid = 0x%(1)04x vcpu = 0x%(2)04x, pcpu = 0x%(3)04x, #entry = 0x%(4)04x ]
+0x00804002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  pi_list_del [ domid = 0x%(1)04x vcpu = 0x%(2)04x ]
 
 0x00084001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  hpet create [ tn = %(1)d, irq = %(2)d, delta = 0x%(4)08x%(3)08x, period = 0x%(6)08x%(5)08x ]
 0x00084002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  pit create [ delta = 0x%(1)016x, period = 0x%(2)016x ]
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index 24cce2a..2276a23 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -159,6 +159,7 @@ struct {
         scatterplot_extint_cycles:1,
         scatterplot_rdtsc:1,
         scatterplot_irq:1,
+        scatterplot_pi_list:1,
         histogram_interrupt_eip:1,
         interval_mode:1,
         dump_all:1,
@@ -233,6 +234,7 @@ struct {
     .scatterplot_extint_cycles=0,
     .scatterplot_rdtsc=0,
     .scatterplot_irq=0,
+    .scatterplot_pi_list=0,
     .histogram_interrupt_eip=0,
     .dump_all = 0,
     .dump_raw_process = 0,
@@ -1391,6 +1393,9 @@ struct hvm_data {
 
     /* Historical info */
     tsc_t last_rdtsc;
+
+    /* Destination pcpu of posted interrupt's wakeup interrupt */
+    int pi_cpu;
 };
 
 enum {
@@ -1457,6 +1462,8 @@ void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) {
     }
     for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
         h->summary.guest_interrupt[i].count=0;
+
+    h->pi_cpu = -1;
 }
 
 /* PV data */
@@ -1852,6 +1859,9 @@ struct pcpu_info {
         tsc_t tsc;
         struct cycle_summary idle, running, lost;
     } time;
+
+    /* Posted Interrupt List Length */
+    int pi_list_length;
 };
 
 void __fill_in_record_info(struct pcpu_info *p);
@@ -8581,8 +8591,97 @@ void irq_process(struct pcpu_info *p) {
     }
 }
 
+static void process_pi_list_add(struct record_info *ri)
+{
+    struct {
+        int did;
+        int vid;
+        int pcpu;
+        int len;
+    } *data = (typeof(data))ri->d;
+    struct vcpu_data *v;
+
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+        printf("d%uv%u is added to pi blocking list of pcpu%u. "
+               "The list length is now %d\n",
+               data->did, data->vid, data->pcpu, data->len);
+
+    v->hvm.pi_cpu = data->pcpu;
+    /* Calibrate pi list length */
+    P.pcpu[data->pcpu].pi_list_length = data->len;
+
+    if ( opt.scatterplot_pi_list )
+    {
+        struct time_struct t;
+
+        abs_cycles_to_time(ri->tsc, &t);
+        printf("%d %u.%09u %d\n", data->pcpu, t.s, t.ns,
+               P.pcpu[data->pcpu].pi_list_length);
+    }
+}
+
+static void process_pi_list_del(struct record_info *ri)
+{
+    struct {
+        int did;
+        int vid;
+    } *data = (typeof(data))ri->d;
+    struct vcpu_data *v;
+
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+    {
+        if ( v->hvm.pi_cpu != -1 )
+            printf("d%uv%u is removed from pi blocking list of pcpu%u\n",
+                   data->did, data->vid, v->hvm.pi_cpu);
+        else
+            printf("d%uv%u is removed from pi blocking list\n",
+                   data->did, data->vid);
+    }
+
+    if ( (v->hvm.pi_cpu != -1) && (P.pcpu[v->hvm.pi_cpu].pi_list_length != -1) )
+    {
+        P.pcpu[v->hvm.pi_cpu].pi_list_length--;
+
+        if ( opt.scatterplot_pi_list )
+        {
+            struct time_struct t;
+
+            abs_cycles_to_time(ri->tsc, &t);
+            printf("%d %u.%09u %d\n", v->hvm.pi_cpu, t.s, t.ns,
+                   P.pcpu[v->hvm.pi_cpu].pi_list_length);
+        }
+    }
+    v->hvm.pi_cpu = -1;
+}
+
+
+static void vtd_process(struct pcpu_info *p) {
+    struct record_info *ri = &p->ri;
+
+    switch (ri->event)
+    {
+    case TRC_VTD_PI_LIST_ADD:
+        process_pi_list_add(ri);
+        break;
+    case TRC_VTD_PI_LIST_DEL:
+        process_pi_list_del(ri);
+        break;
+    default:
+        process_generic(&p->ri);
+    }
+}
+
 #define TRC_HW_SUB_PM 1
 #define TRC_HW_SUB_IRQ 2
+#define TRC_HW_SUB_VTD 4
 void hw_process(struct pcpu_info *p)
 {
     struct record_info *ri = &p->ri;
@@ -8595,6 +8694,11 @@ void hw_process(struct pcpu_info *p)
     case TRC_HW_SUB_IRQ:
         irq_process(p);
         break;
+    case TRC_HW_SUB_VTD:
+        vtd_process(p);
+        break;
+    default:
+        process_generic(&p->ri);
     }
 
 }
@@ -9027,6 +9131,7 @@ off_t scan_for_new_pcpu(off_t offset) {
 
         p->file_offset = offset;
         p->next_cpu_change_offset = offset;
+        p->pi_list_length = -1;
 
         record_order_insert(p);
 
@@ -9255,6 +9360,7 @@ void process_cpu_change(struct pcpu_info *p) {
         p2->ri.d = p2->ri.rec.u.notsc.data;
         p2->file_offset = p->file_offset;
         p2->next_cpu_change_offset = p->file_offset;
+        p2->pi_list_length = -1;
 
         fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
                 __func__, r->cpu, (unsigned long long)p->file_offset);
@@ -10015,6 +10121,7 @@ enum {
     OPT_SCATTERPLOT_EXTINT_CYCLES,
     OPT_SCATTERPLOT_RDTSC,
     OPT_SCATTERPLOT_IRQ,
+    OPT_SCATTERPLOT_PI_LIST,
     OPT_HISTOGRAM_INTERRUPT_EIP,
     /* Interval options */
     OPT_INTERVAL_CR3_SCHEDULE_TIME,
@@ -10304,6 +10411,10 @@ error_t cmd_parser(int key, char *arg, struct argp_state *state)
         G.output_defined = 1;
         opt.scatterplot_pcpu=1;
         break;
+    case OPT_SCATTERPLOT_PI_LIST:
+        G.output_defined = 1;
+        opt.scatterplot_pi_list=1;
+        break;
     case OPT_HISTOGRAM_INTERRUPT_EIP:
     {
         char * inval, *p;
@@ -10679,6 +10790,11 @@ const struct argp_option cmd_opts[] =  {
       .group = OPT_GROUP_EXTRA,
       .doc = "Output scatterplot of irqs on pcpus.", },
 
+    { .name = "scatterplot-pi-list",
+      .key = OPT_SCATTERPLOT_PI_LIST,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of pi blocking list length on pcpus.", },
+
     { .name = "histogram-interrupt-eip",
       .key = OPT_HISTOGRAM_INTERRUPT_EIP,
       .arg = "vector[,increment]",
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 646f409..3f71681 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -210,6 +210,8 @@ static void vmx_vcpu_block(struct vcpu *v)
 
     vmx_pi_add_vcpu(&v->arch.hvm_vmx.pi_blocking,
                     &per_cpu(vmx_pi_blocking, pi_cpu));
+    TRACE_4D(TRC_VTD_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, pi_cpu,
+             read_atomic(&per_cpu(vmx_pi_blocking, pi_cpu).counter));
     spin_unlock_irqrestore(pi_blocking_list_lock, flags);
 
     ASSERT(!pi_test_sn(pi_desc));
@@ -291,6 +293,7 @@ static void vmx_pi_unblock_vcpu(struct vcpu *v)
     if ( v->arch.hvm_vmx.pi_blocking.lock != NULL )
     {
         ASSERT(v->arch.hvm_vmx.pi_blocking.lock == pi_blocking_list_lock);
+        TRACE_2D(TRC_VTD_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
         vmx_pi_del_vcpu(&v->arch.hvm_vmx.pi_blocking,
                         container_of(pi_blocking_list_lock,
                                      struct vmx_pi_blocking_vcpu, lock));
@@ -328,6 +331,8 @@ void vmx_pi_desc_fixup(unsigned int cpu)
 
     list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
     {
+        struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
         /*
          * Suppress notification or we may miss an interrupt when the
          * target cpu is dying.
@@ -341,9 +346,10 @@ void vmx_pi_desc_fixup(unsigned int cpu)
          */
         if ( pi_test_on(&vmx->pi_desc) )
         {
+            TRACE_2D(TRC_VTD_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
             vmx_pi_del_vcpu(&vmx->pi_blocking, &per_cpu(vmx_pi_blocking, cpu));
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
         else
         {
@@ -363,9 +369,13 @@ void vmx_pi_desc_fixup(unsigned int cpu)
             write_atomic(&vmx->pi_desc.ndst,
                          x2apic_enabled ? dest : MASK_INSR(dest, PI_xAPIC_NDST_MASK));
 
+            TRACE_2D(TRC_VTD_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
             vmx_pi_del_vcpu(&vmx->pi_blocking, &per_cpu(vmx_pi_blocking, cpu));
             vmx_pi_add_vcpu(&vmx->pi_blocking, &per_cpu(vmx_pi_blocking,
                                                         new_cpu));
+            TRACE_4D(TRC_VTD_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id,
+                     new_cpu,
+                     read_atomic(&per_cpu(vmx_pi_blocking, new_cpu).counter));
             vmx->pi_blocking.lock = new_lock;
 
             spin_unlock(new_lock);
@@ -2476,10 +2486,13 @@ static void pi_wakeup_interrupt(struct cpu_user_regs *regs)
     {
         if ( pi_test_on(&vmx->pi_desc) )
         {
+            struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
+            TRACE_2D(TRC_VTD_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
             vmx_pi_del_vcpu(&vmx->pi_blocking, &per_cpu(vmx_pi_blocking, cpu));
             ASSERT(vmx->pi_blocking.lock == lock);
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
     }
 
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 3746bff..5eeb8ee 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -92,6 +92,7 @@
 /* Trace classes for Hardware */
 #define TRC_HW_PM           0x00801000   /* Power management traces */
 #define TRC_HW_IRQ          0x00802000   /* Traces relating to the handling of IRQs */
+#define TRC_HW_VTD          0x00804000   /* Traces relating to VTD */
 
 /* Trace events per class */
 #define TRC_LOST_RECORDS        (TRC_GEN + 1)
@@ -273,6 +274,10 @@
 #define TRC_HW_IRQ_UNMAPPED_VECTOR    (TRC_HW_IRQ + 0x7)
 #define TRC_HW_IRQ_HANDLED            (TRC_HW_IRQ + 0x8)
 
+/* Trace events relating to VT-d */
+#define TRC_VTD_PI_LIST_ADD     (TRC_HW_VTD + 0x1)
+#define TRC_VTD_PI_LIST_DEL     (TRC_HW_VTD + 0x2)
+
 /*
  * Event Flags
  *
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

      parent reply	other threads:[~2017-08-16  5:14 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-16  5:14 [PATCH v5 0/4] mitigate the per-pCPU blocking list may be too long Chao Gao
2017-08-16  5:14 ` [PATCH v5 1/4] VT-d PI: track the number of vcpus on pi blocking list Chao Gao
2017-08-30 16:00   ` Jan Beulich
2017-08-30 22:57     ` Chao Gao
2017-08-31  7:42       ` Jan Beulich
2017-08-31  7:15         ` Chao Gao
2017-08-31  8:33           ` Jan Beulich
2017-08-31  7:53             ` Chao Gao
2017-09-01  1:39             ` Chao Gao
2017-09-01  8:24               ` Jan Beulich
2017-09-01  7:55                 ` Chao Gao
2017-09-01  9:13                   ` Jan Beulich
2017-09-01  8:37                     ` Chao Gao
2017-09-01  9:55                       ` Jan Beulich
2017-09-01 10:04                         ` Jan Beulich
2017-08-16  5:14 ` [PATCH v5 2/4] x86/vcpu: track hvm vcpu number on the system Chao Gao
2017-08-16  5:14 ` [PATCH v5 3/4] VT-d PI: restrict the number of vcpus in a given pcpu's PI blocking list Chao Gao
2017-08-31 16:01   ` Jan Beulich
2017-08-16  5:14 ` Chao Gao [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1502860478-84512-5-git-send-email-chao.gao@intel.com \
    --to=chao.gao@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=sstabellini@kernel.org \
    --cc=tim@xen.org \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).