* [PATCH 2/4] tools/xenalyze: Remove weighted cpi summaries
2016-08-08 9:54 [PATCH 1/4] tools/xenalyze: Remove bogus library dependencies George Dunlap
@ 2016-08-08 9:54 ` George Dunlap
2016-08-08 11:06 ` Wei Liu
2016-08-08 9:54 ` [PATCH 3/4] tools/xenalyze: Get rid of extraneous data structure George Dunlap
` (3 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: George Dunlap @ 2016-08-08 9:54 UTC (permalink / raw)
To: xen-devel
Cc: Ian Jackson, Dario Faggioli, Wei Liu, George Dunlap,
Anshul Makkar
At the moment these structures are not used, and half of the code for
collecting it is commented out. To be used they require further
support for collecting hardware instruction counter data inside of
Xen.
Remove the code entirely; when they're wanted again they will be here
in the git log.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
---
tools/xentrace/xenalyze.c | 121 ----------------------------------------------
1 file changed, 121 deletions(-)
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index d223de6..d455a52 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -147,7 +147,6 @@ int verbosity = 5;
struct {
unsigned
scatterplot_interrupt_eip:1,
- scatterplot_cpi:1,
scatterplot_unpin_promote:1,
scatterplot_cr3_switch:1,
scatterplot_wake_to_halt:1,
@@ -223,7 +222,6 @@ struct {
} interval;
} opt = {
.scatterplot_interrupt_eip=0,
- .scatterplot_cpi=0,
.scatterplot_unpin_promote=0,
.scatterplot_cr3_switch=0,
.scatterplot_wake_to_halt=0,
@@ -290,15 +288,6 @@ struct cycle_summary {
struct interval_element interval;
};
-struct weighted_cpi_summary {
- int count;
- unsigned long long instructions;
- unsigned long long cycles;
- float *cpi;
- unsigned long long *cpi_weight;
- struct interval_element interval;
-};
-
/* -- Symbol list information -- */
#define SYMBOL_ENTRIES_PER_STRUCT 1023
#define SYMBOL_NAME_SIZE 124
@@ -1672,7 +1661,6 @@ struct vcpu_data {
struct cycle_framework f;
struct cycle_summary runstates[RUNSTATE_MAX];
struct cycle_summary runnable_states[RUNNABLE_STATE_MAX];
- struct weighted_cpi_summary cpi;
struct cycle_summary cpu_affinity_all,
cpu_affinity_pcpu[MAX_CPUS];
enum {
@@ -2297,54 +2285,6 @@ static inline void clear_interval_cycles(struct interval_element *e) {
e->instructions = 0;
}
-#if 0
-static inline void update_cpi(struct weighted_cpi_summary *s,
- unsigned long long i,
- unsigned long long c) {
-/* We don't know ahead of time how many samples there are, and working
- * with dynamic stuff is a pain, and unnecessary. This algorithm will
- * generate a sample set that approximates an even sample. We can
- * then take the percentiles on this, and get an approximate value. */
- int lap, index;
-
- if ( opt.sample_size ) {
- lap = (s->count/opt.sample_size)+1;
- index =s->count % opt.sample_size;
-
- if((index - (lap/3))%lap == 0) {
- if(!s->cpi) {
- assert(!s->cpi_weight);
-
- s->cpi = malloc(sizeof(*s->cpi) * opt.sample_size);
- s->cpi_weight = malloc(sizeof(*s->cpi_weight) * opt.sample_size);
- if(!s->cpi || !s->cpi_weight) {
- fprintf(stderr, "%s: malloc failed!\n", __func__);
- error(ERR_SYSTEM, NULL);
- }
- }
- assert(s->cpi_weight);
-
- s->cpi[index] = (float) c / i;
- s->cpi_weight[index]=c;
- }
- }
-
- s->instructions += i;
- s->cycles += c;
- s->count++;
-
- s->interval.instructions += i;
- s->interval.cycles += c;
- s->interval.count++;
-}
-
-static inline void clear_interval_cpi(struct weighted_cpi_summary *s) {
- s->interval.cycles = 0;
- s->interval.count = 0;
- s->interval.instructions = 0;
-}
-#endif
-
static inline void print_cpu_affinity(struct cycle_summary *s, char *p) {
if(s->count) {
long long avg;
@@ -2370,31 +2310,6 @@ static inline void print_cpu_affinity(struct cycle_summary *s, char *p) {
}
}
-static inline void print_cpi_summary(struct weighted_cpi_summary *s) {
- if(s->count) {
- float avg;
-
- avg = (float)s->cycles / s->instructions;
-
- if ( opt.sample_size ) {
- float p5, p50, p95;
- int data_size = s->count;
-
- if(data_size > opt.sample_size)
- data_size = opt.sample_size;
-
- p50 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 50);
- p5 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 5);
- p95 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 95);
-
- printf(" CPI summary: %2.2f {%2.2f|%2.2f|%2.2f}\n",
- avg, p5, p50, p95);
- } else {
- printf(" CPI summary: %2.2f\n", avg);
- }
- }
-}
-
static inline void print_cycle_percent_summary(struct cycle_summary *s,
tsc_t total, char *p) {
if(s->count) {
@@ -7324,31 +7239,6 @@ update:
else if ( sevt.old_runstate == RUNSTATE_RUNNING
|| v->runstate.state == RUNSTATE_RUNNING )
{
-#if 0
- /* A lot of traces include cpi that shouldn't... */
- if(perfctrs && v->runstate.tsc) {
- unsigned long long run_cycles, run_instr;
- double cpi;
-
- //run_cycles = r->p1 - v->runstate_p1_start;
- run_cycles = ri->tsc - v->runstate.tsc;
- run_instr = r->p2 - v->runstate.p2_start;
-
- cpi = ((double)run_cycles) / run_instr;
-
- if(opt.dump_all) {
- printf(" cpi: %2.2lf ( %lld / %lld )\n",
- cpi, run_cycles, run_instr);
- }
-
- if(opt.scatterplot_cpi && v->d->did == 1)
- printf("%lld,%2.2lf\n",
- ri->tsc, cpi);
-
- if(opt.summary_info)
- update_cpi(&v->cpi, run_instr, run_cycles);
- }
-#endif
/*
* Cases:
* old running, v running:
@@ -7520,7 +7410,6 @@ void sched_summary_vcpu(struct vcpu_data *v)
}
}
}
- print_cpi_summary(&v->cpi);
print_cpu_affinity(&v->cpu_affinity_all, " cpu affinity");
for ( i = 0; i < MAX_CPUS ; i++)
{
@@ -9931,7 +9820,6 @@ enum {
OPT_WITH_MMIO_ENUMERATION,
OPT_WITH_INTERRUPT_EIP_ENUMERATION,
OPT_SCATTERPLOT_INTERRUPT_EIP,
- OPT_SCATTERPLOT_CPI,
OPT_SCATTERPLOT_UNPIN_PROMOTE,
OPT_SCATTERPLOT_CR3_SWITCH,
OPT_SCATTERPLOT_WAKE_TO_HALT,
@@ -10167,10 +10055,6 @@ error_t cmd_parser(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
- case OPT_SCATTERPLOT_CPI:
- G.output_defined = 1;
- opt.scatterplot_cpi=1;
- break;
case OPT_SCATTERPLOT_UNPIN_PROMOTE:
G.output_defined = 1;
opt.scatterplot_unpin_promote=1;
@@ -10550,11 +10434,6 @@ const struct argp_option cmd_opts[] = {
.group = OPT_GROUP_EXTRA,
.doc = "Output a scatterplot of vmexit cycles for external interrupts of the given vector as a funciton of time.", },
- { .name = "scatterplot-cpi",
- .key = OPT_SCATTERPLOT_CPI,
- .group = OPT_GROUP_EXTRA,
- .doc = "Output scatterplot of cpi.", },
-
{ .name = "scatterplot-unpin-promote",
.key = OPT_SCATTERPLOT_UNPIN_PROMOTE,
.group = OPT_GROUP_EXTRA,
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 3/4] tools/xenalyze: Get rid of extraneous data structure
2016-08-08 9:54 [PATCH 1/4] tools/xenalyze: Remove bogus library dependencies George Dunlap
2016-08-08 9:54 ` [PATCH 2/4] tools/xenalyze: Remove weighted cpi summaries George Dunlap
@ 2016-08-08 9:54 ` George Dunlap
2016-08-08 11:06 ` Wei Liu
2016-08-08 9:54 ` [PATCH 4/4] tools/xenalyze: Allow automatic resizing of sample buffers George Dunlap
` (2 subsequent siblings)
4 siblings, 1 reply; 13+ messages in thread
From: George Dunlap @ 2016-08-08 9:54 UTC (permalink / raw)
To: xen-devel
Cc: Ian Jackson, Dario Faggioli, Wei Liu, George Dunlap,
Anshul Makkar
The only difference between event_cycle_summary and cycle_summary was
that the former has a separate counter for "events" which had
zero-cycle events. But a lot of the code dealing with them had to be
duplicated with slightly different fields.
Remove event_cycle_summary, add an "event_count" field to
cycle_symmary, and use cycle_summary for everything.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
---
tools/xentrace/xenalyze.c | 208 ++++++++++++++++++----------------------------
1 file changed, 81 insertions(+), 127 deletions(-)
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index d455a52..455cbdf 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -274,15 +274,8 @@ struct interval_element {
long long instructions;
};
-struct event_cycle_summary {
- int count, cycles_count;
- long long cycles;
- long long *cycles_sample;
- struct interval_element interval;
-};
-
struct cycle_summary {
- int count;
+ int event_count, count;
unsigned long long cycles;
long long *sample;
struct interval_element interval;
@@ -397,7 +390,7 @@ enum {
struct eip_list_struct {
struct eip_list_struct *next;
unsigned long long eip;
- struct event_cycle_summary summary;
+ struct cycle_summary summary;
int type;
void * extra;
};
@@ -1314,21 +1307,21 @@ struct hvm_data {
/* Information about particular exit reasons */
struct {
- struct event_cycle_summary exit_reason[HVM_EXIT_REASON_MAX];
+ struct cycle_summary exit_reason[HVM_EXIT_REASON_MAX];
int extint[EXTERNAL_INTERRUPT_MAX+1];
int *extint_histogram;
- struct event_cycle_summary trap[HVM_TRAP_MAX];
- struct event_cycle_summary pf_xen[PF_XEN_MAX];
- struct event_cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX];
- struct event_cycle_summary pf_xen_emul_early_unshadow[5];
- struct event_cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX];
- struct event_cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX];
- struct event_cycle_summary pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1];
- struct event_cycle_summary cr_write[CR_MAX];
- struct event_cycle_summary cr3_write_resyncs[RESYNCS_MAX+1];
- struct event_cycle_summary vmcall[HYPERCALL_MAX+1];
- struct event_cycle_summary generic[HVM_EVENT_HANDLER_MAX];
- struct event_cycle_summary mmio[NONPF_MMIO_MAX];
+ struct cycle_summary trap[HVM_TRAP_MAX];
+ struct cycle_summary pf_xen[PF_XEN_MAX];
+ struct cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX];
+ struct cycle_summary pf_xen_emul_early_unshadow[5];
+ struct cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX];
+ struct cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX];
+ struct cycle_summary pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1];
+ struct cycle_summary cr_write[CR_MAX];
+ struct cycle_summary cr3_write_resyncs[RESYNCS_MAX+1];
+ struct cycle_summary vmcall[HYPERCALL_MAX+1];
+ struct cycle_summary generic[HVM_EVENT_HANDLER_MAX];
+ struct cycle_summary mmio[NONPF_MMIO_MAX];
struct hvm_gi_struct {
int count;
struct cycle_summary runtime[GUEST_INTERRUPT_CASE_MAX];
@@ -1337,7 +1330,7 @@ struct hvm_data {
tsc_t start_tsc;
} guest_interrupt[GUEST_INTERRUPT_MAX + 1];
/* IPI Latency */
- struct event_cycle_summary ipi_latency;
+ struct cycle_summary ipi_latency;
int ipi_count[256];
struct {
struct io_address *mmio, *pio;
@@ -2200,62 +2193,28 @@ static inline double __cycles_percent(long long cycles, long long total) {
return (double)(cycles*100) / total;
}
-static inline double __summary_percent(struct event_cycle_summary *s,
+static inline double __summary_percent(struct cycle_summary *s,
struct cycle_framework *f) {
return __cycles_percent(s->cycles, f->total_cycles);
}
-static inline double summary_percent_global(struct event_cycle_summary *s) {
+static inline double summary_percent_global(struct cycle_summary *s) {
return __summary_percent(s, &P.f);
}
-static inline void update_summary(struct event_cycle_summary *s, long long c) {
-/* We don't know ahead of time how many samples there are, and working
- * with dynamic stuff is a pain, and unnecessary. This algorithm will
- * generate a sample set that approximates an even sample. We can
- * then take the percentiles on this, and get an approximate value. */
- if(c) {
- if(opt.sample_size) {
- int lap = (s->cycles_count/opt.sample_size)+1,
- index =s->cycles_count % opt.sample_size;
- if((index - (lap/3))%lap == 0) {
- if(!s->cycles_sample) {
- s->cycles_sample = malloc(sizeof(*s->cycles_sample) * opt.sample_size);
- if(!s->cycles_sample) {
- fprintf(stderr, "%s: malloc failed!\n", __func__);
- error(ERR_SYSTEM, NULL);
- }
- }
- s->cycles_sample[index]=c;
- }
- }
- s->cycles_count++;
- s->cycles += c;
-
- s->interval.count++;
- s->interval.cycles += c;
- }
- s->count++;
-}
-
static inline void update_cycles(struct cycle_summary *s, long long c) {
/* We don't know ahead of time how many samples there are, and working
* with dynamic stuff is a pain, and unnecessary. This algorithm will
* generate a sample set that approximates an even sample. We can
* then take the percentiles on this, and get an approximate value. */
- int lap, index;
+ s->event_count++;
- if ( c == 0 )
- {
- fprintf(warn, "%s: cycles 0! Not updating...\n",
- __func__);
+ if (!c)
return;
- }
-
- if ( opt.sample_size ) {
- lap = (s->count/opt.sample_size)+1;
- index =s->count % opt.sample_size;
-
+
+ if(opt.sample_size) {
+ int lap = (s->count/opt.sample_size)+1,
+ index =s->count % opt.sample_size;
if((index - (lap/3))%lap == 0) {
if(!s->sample) {
s->sample = malloc(sizeof(*s->sample) * opt.sample_size);
@@ -2264,19 +2223,14 @@ static inline void update_cycles(struct cycle_summary *s, long long c) {
error(ERR_SYSTEM, NULL);
}
}
- s->sample[index] = c;
+ s->sample[index]=c;
}
}
-
- if(c > 0) {
- s->cycles += c;
- s->interval.cycles += c;
- } else {
- s->cycles += -c;
- s->interval.cycles += -c;
- }
s->count++;
+ s->cycles += c;
+
s->interval.count++;
+ s->interval.cycles += c;
}
static inline void clear_interval_cycles(struct interval_element *e) {
@@ -2377,29 +2331,29 @@ static inline void print_cycle_summary(struct cycle_summary *s, char *p) {
#define PRINT_SUMMARY(_s, _p...) \
do { \
- if((_s).count) { \
+ if((_s).event_count) { \
if ( opt.sample_size ) { \
unsigned long long p5, p50, p95; \
- int data_size=(_s).cycles_count; \
+ int data_size=(_s).count; \
if(data_size > opt.sample_size) \
data_size=opt.sample_size; \
- p50=percentile((_s).cycles_sample, data_size, 50); \
- p5=percentile((_s).cycles_sample, data_size, 5); \
- p95=percentile((_s).cycles_sample, data_size, 95); \
+ p50=percentile((_s).sample, data_size, 50); \
+ p5=percentile((_s).sample, data_size, 5); \
+ p95=percentile((_s).sample, data_size, 95); \
printf(_p); \
printf(" %7d %5.2lfs %5.2lf%% %5lld cyc {%5lld|%5lld|%5lld}\n", \
- (_s).count, \
+ (_s).event_count, \
((double)(_s).cycles)/opt.cpu_hz, \
summary_percent_global(&(_s)), \
- (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0, \
+ (_s).count ? (_s).cycles / (_s).count:0, \
p5, p50, p95); \
} else { \
printf(_p); \
printf(" %7d %5.2lfs %5.2lf%% %5lld cyc\n", \
- (_s).count, \
+ (_s).event_count, \
((double)(_s).cycles)/opt.cpu_hz, \
summary_percent_global(&(_s)), \
- (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0); \
+ (_s).count ? (_s).cycles / (_s).count:0); \
} \
} \
} while(0)
@@ -2855,7 +2809,7 @@ void update_eip(struct eip_list_struct **head, unsigned long long eip,
eip_list_type[type].update(p, extra);
}
- update_summary(&p->summary, cycles);
+ update_cycles(&p->summary, cycles);
}
int eip_compare(const void *_a, const void *_b) {
@@ -3306,7 +3260,7 @@ void hvm_pf_xen_postprocess(struct hvm_data *h) {
if(opt.summary_info) {
if(e->pf_case)
- update_summary(&h->summary.pf_xen[e->pf_case],
+ update_cycles(&h->summary.pf_xen[e->pf_case],
h->arc_cycles);
else
fprintf(warn, "Strange, pf_case 0!\n");
@@ -3320,17 +3274,17 @@ void hvm_pf_xen_postprocess(struct hvm_data *h) {
break;
case PF_XEN_NON_EMULATE:
if(is_kernel(h->v->guest_paging_levels, h->rip))
- update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_KERNEL],
+ update_cycles(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_KERNEL],
h->arc_cycles);
else
- update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_USER],
+ update_cycles(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_USER],
h->arc_cycles);
if(is_kernel(h->v->guest_paging_levels, e->va))
- update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_KERNEL],
+ update_cycles(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_KERNEL],
h->arc_cycles);
else
- update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_USER],
+ update_cycles(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_USER],
h->arc_cycles);
}
@@ -3411,7 +3365,7 @@ void hvm_vlapic_vmentry_cleanup(struct vcpu_data *v, tsc_t tsc)
/* FIXME: make general somehow */
if(opt.summary_info)
{
- update_summary(&h->summary.ipi_latency, lat);
+ update_cycles(&h->summary.ipi_latency, lat);
h->summary.ipi_count[vla->outstanding_ipis]++;
}
#endif
@@ -3651,7 +3605,7 @@ void hvm_mmio_assist_postprocess(struct hvm_data *h)
if(opt.summary_info)
{
- update_summary(&h->summary.mmio[reason],
+ update_cycles(&h->summary.mmio[reason],
h->arc_cycles);
}
@@ -3787,7 +3741,7 @@ struct io_address {
struct io_address *next;
unsigned int pa;
unsigned int va;
- struct event_cycle_summary summary[2];
+ struct cycle_summary summary[2];
};
void update_io_address(struct io_address ** list, unsigned int pa, int dir,
@@ -3819,7 +3773,7 @@ void update_io_address(struct io_address ** list, unsigned int pa, int dir,
*list = p;
}
}
- update_summary(&p->summary[dir], arc_cycles);
+ update_cycles(&p->summary[dir], arc_cycles);
}
void hvm_io_address_summary(struct io_address *list, char * s) {
@@ -4139,10 +4093,10 @@ void hvm_cr_write_postprocess(struct hvm_data *h)
if(resyncs > RESYNCS_MAX)
resyncs = RESYNCS_MAX;
- update_summary(&h->summary.cr3_write_resyncs[resyncs],
+ update_cycles(&h->summary.cr3_write_resyncs[resyncs],
h->arc_cycles);
- update_summary(&h->summary.cr_write[3],
+ update_cycles(&h->summary.cr_write[3],
h->arc_cycles);
hvm_update_short_summary(h, HVM_SHORT_SUMMARY_CR3);
@@ -4154,7 +4108,7 @@ void hvm_cr_write_postprocess(struct hvm_data *h)
if(opt.summary_info)
{
if(h->inflight.cr_write.cr < CR_MAX)
- update_summary(&h->summary.cr_write[h->inflight.cr_write.cr],
+ update_cycles(&h->summary.cr_write[h->inflight.cr_write.cr],
h->arc_cycles);
}
@@ -4339,10 +4293,10 @@ void hvm_vmcall_postprocess(struct hvm_data *h)
if(opt.summary)
{
if ( eax < HYPERCALL_MAX )
- update_summary(&h->summary.vmcall[eax],
+ update_cycles(&h->summary.vmcall[eax],
h->arc_cycles);
else
- update_summary(&h->summary.vmcall[HYPERCALL_MAX],
+ update_cycles(&h->summary.vmcall[HYPERCALL_MAX],
h->arc_cycles);
hvm_set_summary_handler(h, hvm_vmcall_summary, NULL);
}
@@ -4683,7 +4637,7 @@ void hvm_generic_postprocess(struct hvm_data *h)
}
if(opt.summary_info) {
- update_summary(&h->summary.generic[evt],
+ update_cycles(&h->summary.generic[evt],
h->arc_cycles);
/* NB that h->exit_reason may be 0, so we offset by 1 */
@@ -5170,7 +5124,7 @@ void hvm_close_vmexit(struct hvm_data *h, tsc_t tsc) {
h->arc_cycles = tsc - h->exit_tsc;
if(opt.summary_info) {
- update_summary(&h->summary.exit_reason[h->exit_reason],
+ update_cycles(&h->summary.exit_reason[h->exit_reason],
h->arc_cycles);
h->summary_info = 1;
}
@@ -5445,26 +5399,26 @@ void shadow_emulate_postprocess(struct hvm_data *h)
h->rip,
h->arc_cycles,
0, NULL);
- update_summary(&h->summary.pf_xen[PF_XEN_EMULATE], h->arc_cycles);
- update_summary(&h->summary.pf_xen_emul[e->pt_level], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen[PF_XEN_EMULATE], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[e->pt_level], h->arc_cycles);
if(h->prealloc_unpin)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNPIN], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNPIN], h->arc_cycles);
if(e->flag_prealloc_unhook)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNHOOK], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNHOOK], h->arc_cycles);
if(e->flag_early_unshadow)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_EARLY_UNSHADOW], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_EARLY_UNSHADOW], h->arc_cycles);
if(e->flag_set_changed)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_CHANGED], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_CHANGED], h->arc_cycles);
else
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_UNCHANGED], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_UNCHANGED], h->arc_cycles);
if(e->flag_set_flush)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_FLUSH], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_FLUSH], h->arc_cycles);
if(e->flag_set_error)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_ERROR], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_ERROR], h->arc_cycles);
if(e->flag_promote)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PROMOTE], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_PROMOTE], h->arc_cycles);
if(e->flag_demote)
- update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_DEMOTE], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_emul[PF_XEN_EMUL_DEMOTE], h->arc_cycles);
/* more summary info */
hvm_update_short_summary(h, HVM_SHORT_SUMMARY_EMULATE);
@@ -5677,10 +5631,10 @@ void shadow_unsync_postprocess(struct hvm_data *h)
h->resyncs);
if(opt.summary_info) {
- update_summary(&h->summary.pf_xen[PF_XEN_EMULATE_UNSYNC],
+ update_cycles(&h->summary.pf_xen[PF_XEN_EMULATE_UNSYNC],
h->arc_cycles);
if(h->resyncs <= 1)
- update_summary(&h->summary.pf_xen_unsync[h->resyncs],
+ update_cycles(&h->summary.pf_xen_unsync[h->resyncs],
h->arc_cycles);
}
}
@@ -5744,36 +5698,36 @@ void shadow_fixup_postprocess(struct hvm_data *h)
if ( opt.summary_info )
{
- update_summary(&h->summary.pf_xen[PF_XEN_FIXUP], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen[PF_XEN_FIXUP], h->arc_cycles);
if(h->prealloc_unpin) {
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PREALLOC_UNPIN], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PREALLOC_UNPIN], h->arc_cycles);
}
if(e->flag_unsync) {
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UNSYNC], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UNSYNC], h->arc_cycles);
if(h->resyncs < PF_XEN_FIXUP_UNSYNC_RESYNC_MAX)
- update_summary(&h->summary.pf_xen_fixup_unsync_resync[h->resyncs],
+ update_cycles(&h->summary.pf_xen_fixup_unsync_resync[h->resyncs],
h->arc_cycles);
else
- update_summary(&h->summary.pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX],
+ update_cycles(&h->summary.pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX],
h->arc_cycles);
}
if(e->flag_oos_fixup_add)
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_ADD], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_ADD], h->arc_cycles);
if(e->flag_oos_fixup_evict)
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_EVICT], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_EVICT], h->arc_cycles);
if(e->flag_promote)
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PROMOTE], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PROMOTE], h->arc_cycles);
if(e->flag_wrmap) {
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_WRMAP], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_WRMAP], h->arc_cycles);
if(e->flag_wrmap_brute_force || h->wrmap_bf)
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_BRUTE_FORCE], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_BRUTE_FORCE], h->arc_cycles);
} else if(e->flag_wrmap_brute_force || h->wrmap_bf) {
fprintf(warn, "Strange: wrmap_bf but not wrmap!\n");
}
if(!(e->flag_promote || h->prealloc_unpin || e->flag_unsync))
- update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UPDATE_ONLY], h->arc_cycles);
+ update_cycles(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UPDATE_ONLY], h->arc_cycles);
/* more summary info */
if(e->flag_unsync)
@@ -5894,7 +5848,7 @@ void shadow_mmio_postprocess(struct hvm_data *h)
if ( opt.summary_info )
{
if(e->pf_case)
- update_summary(&h->summary.pf_xen[e->pf_case],
+ update_cycles(&h->summary.pf_xen[e->pf_case],
h->arc_cycles);
else
fprintf(warn, "Strange, pf_case 0!\n");
@@ -5974,7 +5928,7 @@ void shadow_propagate_postprocess(struct hvm_data *h)
if ( opt.summary_info )
{
if(e->pf_case)
- update_summary(&h->summary.pf_xen[e->pf_case],
+ update_cycles(&h->summary.pf_xen[e->pf_case],
h->arc_cycles);
else
fprintf(warn, "Strange, pf_case 0!\n");
@@ -6106,7 +6060,7 @@ void shadow_fault_generic_postprocess(struct hvm_data *h)
}
if(opt.summary_info) {
- update_summary(&h->summary.pf_xen[e->pf_case],
+ update_cycles(&h->summary.pf_xen[e->pf_case],
h->arc_cycles);
hvm_update_short_summary(h, HVM_SHORT_SUMMARY_PROPAGATE);
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 4/4] tools/xenalyze: Allow automatic resizing of sample buffers
2016-08-08 9:54 [PATCH 1/4] tools/xenalyze: Remove bogus library dependencies George Dunlap
2016-08-08 9:54 ` [PATCH 2/4] tools/xenalyze: Remove weighted cpi summaries George Dunlap
2016-08-08 9:54 ` [PATCH 3/4] tools/xenalyze: Get rid of extraneous data structure George Dunlap
@ 2016-08-08 9:54 ` George Dunlap
2016-08-08 11:05 ` Wei Liu
2016-08-08 17:11 ` anshul makkar
2016-08-08 11:06 ` [PATCH 1/4] tools/xenalyze: Remove bogus library dependencies Wei Liu
2016-08-08 12:25 ` Dario Faggioli
4 siblings, 2 replies; 13+ messages in thread
From: George Dunlap @ 2016-08-08 9:54 UTC (permalink / raw)
To: xen-devel
Cc: Ian Jackson, Dario Faggioli, Wei Liu, George Dunlap,
Anshul Makkar
Rather than have large fixed-size buffers, start with smaller buffers
and allow them to grow as needed (doubling each time), with a fairly
large maximum. Allow this maximum to be set by a command-line
parameter.
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Dario Faggioli <dario.faggioli@citrix.com>
CC: Anshul Makkar <anshul.makkar@citrix.com>
---
tools/xentrace/xenalyze.c | 95 +++++++++++++++++++++++++++++++++--------------
1 file changed, 68 insertions(+), 27 deletions(-)
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index 455cbdf..a4d8823 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -44,7 +44,8 @@ struct mread_ctrl;
#define QHZ_FROM_HZ(_hz) (((_hz) << 10)/ 1000000000)
#define ADDR_SPACE_BITS 48
-#define DEFAULT_SAMPLE_SIZE 10240
+#define DEFAULT_SAMPLE_SIZE 1024
+#define DEFAULT_SAMPLE_MAX 1024*1024*32
#define DEFAULT_INTERVAL_LENGTH 1000
struct array_struct {
@@ -187,7 +188,7 @@ struct {
unsigned long long histogram_interrupt_increment;
int interrupt_eip_enumeration_vector;
int default_guest_paging_levels;
- int sample_size;
+ int sample_size, sample_max;
enum error_level tolerance; /* Tolerate up to this level of error */
struct {
tsc_t cycles;
@@ -257,6 +258,7 @@ struct {
.cpu_qhz = QHZ_FROM_HZ(DEFAULT_CPU_HZ),
.default_guest_paging_levels = 2,
.sample_size = DEFAULT_SAMPLE_SIZE,
+ .sample_max = DEFAULT_SAMPLE_MAX,
.tolerance = ERR_SANITY,
.interval = { .msec = DEFAULT_INTERVAL_LENGTH },
};
@@ -275,7 +277,7 @@ struct interval_element {
};
struct cycle_summary {
- int event_count, count;
+ int event_count, count, sample_size;
unsigned long long cycles;
long long *sample;
struct interval_element interval;
@@ -2203,28 +2205,51 @@ static inline double summary_percent_global(struct cycle_summary *s) {
}
static inline void update_cycles(struct cycle_summary *s, long long c) {
-/* We don't know ahead of time how many samples there are, and working
- * with dynamic stuff is a pain, and unnecessary. This algorithm will
- * generate a sample set that approximates an even sample. We can
- * then take the percentiles on this, and get an approximate value. */
s->event_count++;
if (!c)
return;
if(opt.sample_size) {
- int lap = (s->count/opt.sample_size)+1,
- index =s->count % opt.sample_size;
- if((index - (lap/3))%lap == 0) {
- if(!s->sample) {
- s->sample = malloc(sizeof(*s->sample) * opt.sample_size);
- if(!s->sample) {
- fprintf(stderr, "%s: malloc failed!\n", __func__);
- error(ERR_SYSTEM, NULL);
- }
+ if (s->count >= s->sample_size
+ && (s->count == 0
+ || opt.sample_max == 0
+ || s->sample_size < opt.sample_max)) {
+ int new_size;
+ void * new_sample = NULL;
+
+ new_size = s->sample_size << 1;
+
+ if (new_size == 0)
+ new_size = opt.sample_size;
+
+ if (opt.sample_max != 0 && new_size > opt.sample_max)
+ new_size = opt.sample_max;
+
+ //printf("New size: %d\n", new_size);
+
+ new_sample = realloc(s->sample, sizeof(*s->sample) * new_size);
+
+ if (new_sample) {
+ s->sample = new_sample;
+ s->sample_size = new_size;
}
- s->sample[index]=c;
}
+
+ if (s->count < s->sample_size) {
+ s->sample[s->count]=c;
+ } else {
+ /*
+ * If we run out of space for samples, start taking only a
+ * subset of samples.
+ */
+ int lap, index;
+ lap = (s->count/s->sample_size)+1;
+ index =s->count % s->sample_size;
+ if((index - (lap/3))%lap == 0) {
+ s->sample[index]=c;
+ }
+ }
}
s->count++;
s->cycles += c;
@@ -2248,8 +2273,8 @@ static inline void print_cpu_affinity(struct cycle_summary *s, char *p) {
if ( opt.sample_size ) {
long long p5, p50, p95;
int data_size = s->count;
- if(data_size > opt.sample_size)
- data_size = opt.sample_size;
+ if(data_size > s->sample_size)
+ data_size = s->sample_size;
p50 = percentile(s->sample, data_size, 50);
p5 = percentile(s->sample, data_size, 5);
@@ -2280,8 +2305,8 @@ static inline void print_cycle_percent_summary(struct cycle_summary *s,
long long p5, p50, p95;
int data_size = s->count;
- if(data_size > opt.sample_size)
- data_size = opt.sample_size;
+ if(data_size > s->sample_size)
+ data_size = s->sample_size;
p50 = self_weighted_percentile(s->sample, data_size, 50);
p5 = self_weighted_percentile(s->sample, data_size, 5);
@@ -2312,8 +2337,8 @@ static inline void print_cycle_summary(struct cycle_summary *s, char *p) {
long long p5, p50, p95;
int data_size = s->count;
- if(data_size > opt.sample_size)
- data_size = opt.sample_size;
+ if(data_size > s->sample_size)
+ data_size = s->sample_size;
p50 = self_weighted_percentile(s->sample, data_size, 50);
p5 = self_weighted_percentile(s->sample, data_size, 5);
@@ -2335,8 +2360,8 @@ static inline void print_cycle_summary(struct cycle_summary *s, char *p) {
if ( opt.sample_size ) { \
unsigned long long p5, p50, p95; \
int data_size=(_s).count; \
- if(data_size > opt.sample_size) \
- data_size=opt.sample_size; \
+ if(data_size > (_s).sample_size) \
+ data_size=(_s).sample_size; \
p50=percentile((_s).sample, data_size, 50); \
p5=percentile((_s).sample, data_size, 5); \
p95=percentile((_s).sample, data_size, 95); \
@@ -9800,6 +9825,7 @@ enum {
OPT_SHOW_DEFAULT_DOMAIN_SUMMARY,
OPT_MMIO_ENUMERATION_SKIP_VGA,
OPT_SAMPLE_SIZE,
+ OPT_SAMPLE_MAX,
OPT_REPORT_PCPU,
/* Guest info */
OPT_DEFAULT_GUEST_PAGING_LEVELS,
@@ -9982,6 +10008,14 @@ error_t cmd_parser(int key, char *arg, struct argp_state *state)
argp_usage(state);
break;
}
+ case OPT_SAMPLE_MAX:
+ {
+ char * inval;
+ opt.sample_max = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+ break;
+ }
case OPT_MMIO_ENUMERATION_SKIP_VGA:
{
char * inval;
@@ -10519,8 +10553,15 @@ const struct argp_option cmd_opts[] = {
.key = OPT_SAMPLE_SIZE,
.arg = "size",
.group = OPT_GROUP_SUMMARY,
- .doc = "Keep [size] samples for percentile purposes. Enter 0 to " \
- "disable. Default 10240.", },
+ .doc = "Start with [size] samples for percentile purposes. Enter 0 to" \
+ "disable. Default 1024.", },
+
+ { .name = "sample-max",
+ .key = OPT_SAMPLE_MAX,
+ .arg = "size",
+ .group = OPT_GROUP_SUMMARY,
+ .doc = "Do not allow sample to grow beyond [size] samples for percentile"\
+ " purposes. Enter 0 for no limit.", },
{ .name = "summary",
.key = OPT_SUMMARY,
--
2.1.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 13+ messages in thread