From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: Keir Fraser <keir@xen.org>,
Ian Campbell <ian.campbell@citrix.com>,
Ian Jackson <Ian.Jackson@eu.citrix.com>,
Donald D Dugger <donald.d.dugger@intel.com>,
Jun Nakajima <jun.nakajima@intel.com>,
xen-devel <xen-devel@lists.xenproject.org>
Subject: Re: [PATCH 2/3] x86/idle: update to include further package/core residency MSRs
Date: Wed, 05 Mar 2014 10:07:21 -0500 [thread overview]
Message-ID: <53173DA9.2030103@oracle.com> (raw)
In-Reply-To: <53170C650200007800121216@nat28.tlf.novell.com>
[-- Attachment #1.1: Type: text/plain, Size: 14899 bytes --]
On 03/05/2014 05:37 AM, Jan Beulich wrote:
> With the number of these growing it becomes increasingly desirable to
> not repeatedly alter the sysctl interface to accommodate them. Replace
> the explicit listing of numbered states by arrays, unused fields of
> which will remain untouched by the hypercall.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- 2014-02-13.orig/tools/libxc/xc_pm.c 2014-03-04 17:43:06.000000000 +0100
> +++ 2014-02-13/tools/libxc/xc_pm.c 2014-03-05 08:54:58.000000000 +0100
> @@ -123,46 +123,90 @@ int xc_pm_get_max_cx(xc_interface *xch,
>
> int xc_pm_get_cxstat(xc_interface *xch, int cpuid, struct xc_cx_stat *cxpt)
> {
> - DECLARE_SYSCTL;
> - DECLARE_NAMED_HYPERCALL_BOUNCE(triggers, cxpt->triggers, 0, XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> - DECLARE_NAMED_HYPERCALL_BOUNCE(residencies, cxpt->residencies, 0, XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
> + uint64_t pc[7], cc[7];
Do you need pc[10]? There seem to exist pc8-10 states (at least there
are references to them below for Haswell).
> + struct xc_cx_stat_v2 cxpt2 = {
> + .triggers = cxpt->triggers,
> + .residencies = cxpt->residencies,
> + .nr_pc = sizeof(pc) / sizeof(*pc),
> + .nr_cc = sizeof(cc) / sizeof(*cc),
> + .pc = pc,
> + .cc = cc
> + };
> int max_cx, ret;
>
> if( !cxpt->triggers || !cxpt->residencies )
> return -EINVAL;
>
> if ( (ret = xc_pm_get_max_cx(xch, cpuid, &max_cx)) )
> - goto unlock_0;
> + return ret;
>
> - HYPERCALL_BOUNCE_SET_SIZE(triggers, max_cx * sizeof(uint64_t));
> - HYPERCALL_BOUNCE_SET_SIZE(residencies, max_cx * sizeof(uint64_t));
> + cxpt2.nr = max_cx;
> + ret = xc_pm_get_cx_stat(xch, cpuid, &cxpt2);
Why are you not returning on error here?
> +
> + cxpt->nr = cxpt2.nr;
> + cxpt->last = cxpt2.last;
> + cxpt->idle_time = cxpt2.idle_time;
> + cxpt->pc2 = pc[1];
> + cxpt->pc3 = pc[2];
> + cxpt->pc6 = pc[5];
> + cxpt->pc7 = pc[6];
> + cxpt->cc3 = cc[2];
> + cxpt->cc6 = cc[5];
> + cxpt->cc7 = cc[6];
> +
> + return ret;
> +}
> +
> +int xc_pm_get_cx_stat(xc_interface *xch, int cpuid, struct xc_cx_stat_v2 *cxpt)
> +{
> + DECLARE_SYSCTL;
> + DECLARE_NAMED_HYPERCALL_BOUNCE(triggers, cxpt->triggers,
> + cxpt->nr * sizeof(*cxpt->triggers),
> + XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> + DECLARE_NAMED_HYPERCALL_BOUNCE(residencies, cxpt->residencies,
> + cxpt->nr * sizeof(*cxpt->residencies),
> + XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> + DECLARE_NAMED_HYPERCALL_BOUNCE(pc, cxpt->pc,
> + cxpt->nr_pc * sizeof(*cxpt->pc),
> + XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> + DECLARE_NAMED_HYPERCALL_BOUNCE(cc, cxpt->cc,
> + cxpt->nr_cc * sizeof(*cxpt->cc),
> + XC_HYPERCALL_BUFFER_BOUNCE_OUT);
> + int ret = -1;
>
> - ret = -1;
> if ( xc_hypercall_bounce_pre(xch, triggers) )
> goto unlock_0;
> if ( xc_hypercall_bounce_pre(xch, residencies) )
> goto unlock_1;
> + if ( xc_hypercall_bounce_pre(xch, pc) )
> + goto unlock_2;
> + if ( xc_hypercall_bounce_pre(xch, cc) )
> + goto unlock_3;
>
> sysctl.cmd = XEN_SYSCTL_get_pmstat;
> sysctl.u.get_pmstat.type = PMSTAT_get_cxstat;
> sysctl.u.get_pmstat.cpuid = cpuid;
> + sysctl.u.get_pmstat.u.getcx.nr = cxpt->nr;
> + sysctl.u.get_pmstat.u.getcx.nr_pc = cxpt->nr_pc;
> + sysctl.u.get_pmstat.u.getcx.nr_cc = cxpt->nr_cc;
> set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.triggers, triggers);
> set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.residencies, residencies);
> + set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.pc, pc);
> + set_xen_guest_handle(sysctl.u.get_pmstat.u.getcx.cc, cc);
>
> if ( (ret = xc_sysctl(xch, &sysctl)) )
> - goto unlock_2;
> + goto unlock_4;
>
> cxpt->nr = sysctl.u.get_pmstat.u.getcx.nr;
> cxpt->last = sysctl.u.get_pmstat.u.getcx.last;
> cxpt->idle_time = sysctl.u.get_pmstat.u.getcx.idle_time;
> - cxpt->pc2 = sysctl.u.get_pmstat.u.getcx.pc2;
> - cxpt->pc3 = sysctl.u.get_pmstat.u.getcx.pc3;
> - cxpt->pc6 = sysctl.u.get_pmstat.u.getcx.pc6;
> - cxpt->pc7 = sysctl.u.get_pmstat.u.getcx.pc7;
> - cxpt->cc3 = sysctl.u.get_pmstat.u.getcx.cc3;
> - cxpt->cc6 = sysctl.u.get_pmstat.u.getcx.cc6;
> - cxpt->cc7 = sysctl.u.get_pmstat.u.getcx.cc7;
> + cxpt->nr_pc = sysctl.u.get_pmstat.u.getcx.nr_pc;
> + cxpt->nr_cc = sysctl.u.get_pmstat.u.getcx.nr_cc;
>
> +unlock_4:
> + xc_hypercall_bounce_post(xch, cc);
> +unlock_3:
> + xc_hypercall_bounce_post(xch, pc);
> unlock_2:
> xc_hypercall_bounce_post(xch, residencies);
> unlock_1:
> --- 2014-02-13.orig/tools/libxc/xenctrl.h 2014-03-04 17:43:06.000000000 +0100
> +++ 2014-02-13/tools/libxc/xenctrl.h 2014-03-04 17:50:49.000000000 +0100
> @@ -1934,7 +1934,7 @@ int xc_pm_get_max_px(xc_interface *xch,
> int xc_pm_get_pxstat(xc_interface *xch, int cpuid, struct xc_px_stat *pxpt);
> int xc_pm_reset_pxstat(xc_interface *xch, int cpuid);
>
> -struct xc_cx_stat {
> +struct xc_cx_stat { /* DEPRECATED (use v2 below instead)! */
> uint32_t nr; /* entry nr in triggers & residencies, including C0 */
> uint32_t last; /* last Cx state */
> uint64_t idle_time; /* idle time from boot */
> @@ -1950,8 +1950,22 @@ struct xc_cx_stat {
> };
> typedef struct xc_cx_stat xc_cx_stat_t;
>
> +struct xc_cx_stat_v2 {
> + uint32_t nr; /* entry nr in triggers[]/residencies[], incl C0 */
> + uint32_t last; /* last Cx state */
> + uint64_t idle_time; /* idle time from boot */
> + uint64_t *triggers; /* Cx trigger counts */
> + uint64_t *residencies; /* Cx residencies */
> + uint32_t nr_pc; /* entry nr in pc[] */
> + uint32_t nr_cc; /* entry nr in cc[] */
Are these entry number or number of entries (or largest entry number) in
appropriate array?
> + uint64_t *pc; /* 1-biased indexing (i.e. excl C0) */
> + uint64_t *cc; /* 1-biased indexing (i.e. excl C0) */
> +};
> +typedef struct xc_cx_stat_v2 xc_cx_stat_v2_t;
> +
> int xc_pm_get_max_cx(xc_interface *xch, int cpuid, int *max_cx);
> int xc_pm_get_cxstat(xc_interface *xch, int cpuid, struct xc_cx_stat *cxpt);
> +int xc_pm_get_cx_stat(xc_interface *xch, int cpuid, struct xc_cx_stat_v2 *);
You forgot last parameter's name.
-boris
> int xc_pm_reset_cxstat(xc_interface *xch, int cpuid);
>
> int xc_cpu_online(xc_interface *xch, int cpu);
> --- 2014-02-13.orig/xen/arch/x86/acpi/cpu_idle.c 2014-03-04 17:43:06.000000000 +0100
> +++ 2014-02-13/xen/arch/x86/acpi/cpu_idle.c 2014-03-04 17:38:39.000000000 +0100
> @@ -62,13 +62,17 @@
>
> #define GET_HW_RES_IN_NS(msr, val) \
> do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
> -#define GET_PC2_RES(val) GET_HW_RES_IN_NS(0x60D, val) /* SNB only */
> +#define GET_PC2_RES(val) GET_HW_RES_IN_NS(0x60D, val) /* SNB onwards */
> #define GET_PC3_RES(val) GET_HW_RES_IN_NS(0x3F8, val)
> #define GET_PC6_RES(val) GET_HW_RES_IN_NS(0x3F9, val)
> #define GET_PC7_RES(val) GET_HW_RES_IN_NS(0x3FA, val)
> +#define GET_PC8_RES(val) GET_HW_RES_IN_NS(0x630, val) /* some Haswells only */
> +#define GET_PC9_RES(val) GET_HW_RES_IN_NS(0x631, val) /* some Haswells only */
> +#define GET_PC10_RES(val) GET_HW_RES_IN_NS(0x632, val) /* some Haswells only */
> +#define GET_CC1_RES(val) GET_HW_RES_IN_NS(0x660, val) /* Silvermont only */
> #define GET_CC3_RES(val) GET_HW_RES_IN_NS(0x3FC, val)
> #define GET_CC6_RES(val) GET_HW_RES_IN_NS(0x3FD, val)
> -#define GET_CC7_RES(val) GET_HW_RES_IN_NS(0x3FE, val) /* SNB only */
> +#define GET_CC7_RES(val) GET_HW_RES_IN_NS(0x3FE, val) /* SNB onwards */
>
> static void lapic_timer_nop(void) { }
> void (*__read_mostly lapic_timer_off)(void);
> @@ -111,8 +115,13 @@ struct hw_residencies
> {
> uint64_t pc2;
> uint64_t pc3;
> + uint64_t pc4;
> uint64_t pc6;
> uint64_t pc7;
> + uint64_t pc8;
> + uint64_t pc9;
> + uint64_t pc10;
> + uint64_t cc1;
> uint64_t cc3;
> uint64_t cc6;
> uint64_t cc7;
> @@ -128,6 +137,12 @@ static void do_get_hw_residencies(void *
>
> switch ( c->x86_model )
> {
> + /* 4th generation Intel Core (Haswell) */
> + case 0x45:
> + GET_PC8_RES(hw_res->pc8);
> + GET_PC9_RES(hw_res->pc9);
> + GET_PC10_RES(hw_res->pc10);
> + /* fall through */
> /* Sandy bridge */
> case 0x2A:
> case 0x2D:
> @@ -137,7 +152,6 @@ static void do_get_hw_residencies(void *
> /* Haswell */
> case 0x3C:
> case 0x3F:
> - case 0x45:
> case 0x46:
> /* future */
> case 0x3D:
> @@ -160,6 +174,22 @@ static void do_get_hw_residencies(void *
> GET_CC3_RES(hw_res->cc3);
> GET_CC6_RES(hw_res->cc6);
> break;
> + /* various Atoms */
> + case 0x27:
> + GET_PC3_RES(hw_res->pc2); /* abusing GET_PC3_RES */
> + GET_PC6_RES(hw_res->pc4); /* abusing GET_PC6_RES */
> + GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
> + break;
> + /* Silvermont */
> + case 0x37:
> + case 0x4A:
> + case 0x4D:
> + case 0x5A:
> + case 0x5D:
> + GET_PC7_RES(hw_res->pc6); /* abusing GET_PC7_RES */
> + GET_CC1_RES(hw_res->cc1);
> + GET_CC6_RES(hw_res->cc6);
> + break;
> }
> }
>
> @@ -179,10 +209,16 @@ static void print_hw_residencies(uint32_
>
> get_hw_residencies(cpu, &hw_res);
>
> - printk("PC2[%"PRId64"] PC3[%"PRId64"] PC6[%"PRId64"] PC7[%"PRId64"]\n",
> - hw_res.pc2, hw_res.pc3, hw_res.pc6, hw_res.pc7);
> - printk("CC3[%"PRId64"] CC6[%"PRId64"] CC7[%"PRId64"]\n",
> - hw_res.cc3, hw_res.cc6,hw_res.cc7);
> + printk("PC2[%"PRIu64"] PC%d[%"PRIu64"] PC6[%"PRIu64"] PC7[%"PRIu64"]\n",
> + hw_res.pc2,
> + hw_res.pc4 ? 4 : 3, hw_res.pc4 ?: hw_res.pc3,
> + hw_res.pc6, hw_res.pc7);
> + if ( hw_res.pc8 | hw_res.pc9 | hw_res.pc10 )
> + printk("PC8[%"PRIu64"] PC9[%"PRIu64"] PC10[%"PRIu64"]\n",
> + hw_res.pc8, hw_res.pc9, hw_res.pc10);
> + printk("CC%d[%"PRIu64"] CC6[%"PRIu64"] CC7[%"PRIu64"]\n",
> + hw_res.cc1 ? 1 : 3, hw_res.cc1 ?: hw_res.cc3,
> + hw_res.cc6, hw_res.cc7);
> }
>
> static char* acpi_cstate_method_name[] =
> @@ -1097,19 +1133,21 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
> struct acpi_processor_power *power = processor_powers[cpuid];
> uint64_t idle_usage = 0, idle_res = 0;
> uint64_t usage[ACPI_PROCESSOR_MAX_POWER], res[ACPI_PROCESSOR_MAX_POWER];
> - int i;
> - struct hw_residencies hw_res;
> + unsigned int i, nr, nr_pc = 0, nr_cc = 0;
>
> if ( power == NULL )
> {
> stat->last = 0;
> stat->nr = 0;
> stat->idle_time = 0;
> + stat->nr_pc = 0;
> + stat->nr_cc = 0;
> return 0;
> }
>
> stat->last = power->last_state ? power->last_state->idx : 0;
> stat->idle_time = get_cpu_idle_time(cpuid);
> + nr = min(stat->nr, power->count);
>
> /* mimic the stat when detail info hasn't been registered by dom0 */
> if ( pm_idle_save == NULL )
> @@ -1118,14 +1156,14 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
>
> usage[1] = idle_usage = 1;
> res[1] = idle_res = stat->idle_time;
> -
> - memset(&hw_res, 0, sizeof(hw_res));
> }
> else
> {
> + struct hw_residencies hw_res;
> +
> stat->nr = power->count;
>
> - for ( i = 1; i < power->count; i++ )
> + for ( i = 1; i < nr; i++ )
> {
> spin_lock_irq(&power->stat_lock);
> usage[i] = power->states[i].usage;
> @@ -1137,22 +1175,42 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
> }
>
> get_hw_residencies(cpuid, &hw_res);
> +
> +#define PUT_xC(what, n) do { \
> + if ( stat->nr_##what >= n && \
> + copy_to_guest_offset(stat->what, n - 1, &hw_res.what##n, 1) ) \
> + return -EFAULT; \
> + if ( hw_res.what##n ) \
> + nr_##what = n; \
> + } while ( 0 )
> +#define PUT_PC(n) PUT_xC(pc, n)
> + PUT_PC(2);
> + PUT_PC(3);
> + PUT_PC(4);
> + PUT_PC(6);
> + PUT_PC(7);
> + PUT_PC(8);
> + PUT_PC(9);
> + PUT_PC(10);
> +#undef PUT_PC
> +#define PUT_CC(n) PUT_xC(cc, n)
> + PUT_CC(1);
> + PUT_CC(3);
> + PUT_CC(6);
> + PUT_CC(7);
> +#undef PUT_CC
> +#undef PUT_xC
> }
>
> usage[0] = idle_usage;
> res[0] = NOW() - idle_res;
>
> - if ( copy_to_guest(stat->triggers, usage, stat->nr) ||
> - copy_to_guest(stat->residencies, res, stat->nr) )
> + if ( copy_to_guest(stat->triggers, usage, nr) ||
> + copy_to_guest(stat->residencies, res, nr) )
> return -EFAULT;
>
> - stat->pc2 = hw_res.pc2;
> - stat->pc3 = hw_res.pc3;
> - stat->pc6 = hw_res.pc6;
> - stat->pc7 = hw_res.pc7;
> - stat->cc3 = hw_res.cc3;
> - stat->cc6 = hw_res.cc6;
> - stat->cc7 = hw_res.cc7;
> + stat->nr_pc = nr_pc;
> + stat->nr_cc = nr_cc;
>
> return 0;
> }
> --- 2014-02-13.orig/xen/include/public/sysctl.h 2014-03-04 17:43:06.000000000 +0100
> +++ 2014-02-13/xen/include/public/sysctl.h 2014-03-04 17:34:15.000000000 +0100
> @@ -34,7 +34,7 @@
> #include "xen.h"
> #include "domctl.h"
>
> -#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000A
> +#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B
>
> /*
> * Read console content from Xen buffer ring.
> @@ -226,13 +226,10 @@ struct pm_cx_stat {
> uint64_aligned_t idle_time; /* idle time from boot */
> XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */
> XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
> - uint64_aligned_t pc2;
> - uint64_aligned_t pc3;
> - uint64_aligned_t pc6;
> - uint64_aligned_t pc7;
> - uint64_aligned_t cc3;
> - uint64_aligned_t cc6;
> - uint64_aligned_t cc7;
> + uint32_t nr_pc; /* entry nr in pc[] */
> + uint32_t nr_cc; /* entry nr in cc[] */
> + XEN_GUEST_HANDLE_64(uint64) pc; /* 1-biased indexing */
> + XEN_GUEST_HANDLE_64(uint64) cc; /* 1-biased indexing */
> };
>
> struct xen_sysctl_get_pmstat {
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
[-- Attachment #1.2: Type: text/html, Size: 15695 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2014-03-05 15:05 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-05 10:34 [PATCH 0/3] x86: support further Intel CPU families Jan Beulich
2014-03-05 10:36 ` [PATCH 1/3] " Jan Beulich
2014-03-18 2:44 ` Tian, Kevin
2014-03-05 10:37 ` [PATCH 2/3] x86/idle: update to include further package/core residency MSRs Jan Beulich
2014-03-05 10:42 ` Jan Beulich
2014-03-18 2:44 ` Tian, Kevin
2014-03-05 15:07 ` Boris Ostrovsky [this message]
2014-03-05 15:15 ` Jan Beulich
2014-03-05 15:30 ` Boris Ostrovsky
2014-03-13 14:11 ` Ian Campbell
2014-03-13 14:27 ` Jan Beulich
2014-03-13 15:34 ` Ian Campbell
2014-03-13 15:48 ` Jan Beulich
2014-03-13 15:53 ` Ian Campbell
2014-03-18 16:18 ` Ian Jackson
2014-03-18 16:25 ` Jan Beulich
2014-03-13 14:28 ` Keir Fraser
2014-03-05 10:37 ` [PATCH 3/3] xenpm: use new Cx statistics interface Jan Beulich
2014-03-05 15:47 ` Boris Ostrovsky
2014-03-05 15:53 ` Jan Beulich
2014-03-05 17:05 ` Boris Ostrovsky
2014-03-06 9:37 ` Jan Beulich
2014-03-13 14:12 ` Ian Campbell
2014-03-18 2:45 ` Tian, Kevin
2014-03-12 9:38 ` Ping: [PATCH 0/3] x86: support further Intel CPU families Jan Beulich
2014-03-12 10:18 ` Ian Campbell
2014-03-17 13:28 ` [PATCH v2 0/2] " Jan Beulich
2014-03-17 13:38 ` [PATCH v2 1/2] x86: Intel CPU family update Jan Beulich
2014-03-17 13:39 ` [PATCH v2 2/2] x86/idle: update to include further package/core residency MSRs Jan Beulich
2014-03-17 13:43 ` Ian Campbell
2014-03-17 15:48 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=53173DA9.2030103@oracle.com \
--to=boris.ostrovsky@oracle.com \
--cc=Ian.Jackson@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=donald.d.dugger@intel.com \
--cc=ian.campbell@citrix.com \
--cc=jun.nakajima@intel.com \
--cc=keir@xen.org \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).