From: Andrew Cooper <andrew.cooper3@citrix.com>
To: dongxiao.xu@intel.com
Cc: xen-devel@lists.xen.org
Subject: Re: [PATCH v2 6/8] x86: get per domain CQM information
Date: Thu, 21 Nov 2013 14:09:12 +0000 [thread overview]
Message-ID: <528E1408.6090205@citrix.com> (raw)
In-Reply-To: <1385018444-104477-7-git-send-email-dongxiao.xu@intel.com>
On 21/11/13 07:20, dongxiao.xu@intel.com wrote:
> From: Dongxiao Xu <dongxiao.xu@intel.com>
>
> Retrive CQM information for certain domain, which reflects the L3 cache
> occupancy for a socket.
>
> Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> ---
> xen/arch/x86/pqos.c | 57 ++++++++++++++++++++++++++++++++++
> xen/arch/x86/sysctl.c | 64 +++++++++++++++++++++++++++++++++++++++
> xen/include/asm-x86/msr-index.h | 4 +++
> xen/include/asm-x86/pqos.h | 14 +++++++++
> xen/include/public/domctl.h | 14 +++++++++
> xen/include/public/sysctl.h | 15 +++++++++
> 6 files changed, 168 insertions(+)
>
> diff --git a/xen/arch/x86/pqos.c b/xen/arch/x86/pqos.c
> index e294799..d95784c 100644
> --- a/xen/arch/x86/pqos.c
> +++ b/xen/arch/x86/pqos.c
> @@ -19,14 +19,30 @@
> * Place - Suite 330, Boston, MA 02111-1307 USA.
> */
> #include <asm/processor.h>
> +#include <asm/msr.h>
> #include <xen/init.h>
> #include <xen/spinlock.h>
> #include <xen/sched.h>
> +#include <public/domctl.h>
> #include <asm/pqos.h>
>
> static bool_t pqos_enabled = 1;
> boolean_param("pqos", pqos_enabled);
>
> +static void read_qm_data(void *arg)
> +{
> + struct qm_element *qm_element = arg;
> +
> + wrmsr(MSR_IA32_QOSEVTSEL, qm_element->evtid, qm_element->rmid);
> + rdmsrl(MSR_IA32_QMC, qm_element->qm_data);
> +}
> +
> +static void get_generic_qm_info(struct qm_element *qm_element)
> +{
> + unsigned int cpu = qm_element->cpu;
> + on_selected_cpus(cpumask_of(cpu), read_qm_data, qm_element, 1);
> +}
> +
> unsigned int cqm_res_count = 0;
> unsigned int cqm_upscaling_factor = 0;
> bool_t cqm_enabled = 0;
> @@ -86,6 +102,23 @@ bool_t system_supports_cqm(void)
> return cqm_enabled;
> }
>
> +unsigned int get_cqm_count(void)
> +{
> + return cqm_res_count;
> +}
> +
> +unsigned int get_cqm_avail(void)
> +{
> + unsigned int cqm_avail = 0;
> + int i;
unsigned int please. If cqm_res_count has its top bit set, the
following loop may never terminate.
> +
> + for ( i = 0; i < cqm_res_count; i++ )
> + if ( !cqm_res_array[i].inuse )
> + cqm_avail++;
> +
> + return cqm_avail;
> +}
> +
> int alloc_cqm_rmid(struct domain *d)
> {
> int rmid, rc = 0;
> @@ -137,6 +170,30 @@ void free_cqm_rmid(struct domain *d)
> d->arch.pqos_cqm_rmid = 0;
> }
>
> +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map,
A cpumask_t is already quite large, and will get larger in the future.
Pass by pointer please.
> + struct xen_domctl_getdomcqminfo *info)
> +{
> + struct qm_element element;
> + unsigned int cpu, i;
> +
> + for_each_cpu ( cpu, &cpu_cqmdata_map )
> + {
> + element.cpu = cpu;
> + element.rmid = rmid;
> + element.evtid = QOS_MONITOR_EVTID_L3;
> +
> + get_generic_qm_info(&element);
> +
> + i = cpu_to_socket(cpu);
cpu_to_socket() can return BAD_APICID.
> + info->socket_cqmdata[i].valid =
> + (element.qm_data & IA32_QM_CTR_ERROR_MASK) ? 0 : 1;
info->socket_cqmdata[i].valid = !(element.qm_data & IA32_QM_CTR_ERROR_MASK);
> + if ( info->socket_cqmdata[i].valid )
> + info->socket_cqmdata[i].l3c_occupancy = element.qm_data * cqm_upscaling_factor;
> + else
> + info->socket_cqmdata[i].l3c_occupancy = 0;
> + }
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 15d4b91..d631769 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -28,6 +28,7 @@
> #include <xen/nodemask.h>
> #include <xen/cpu.h>
> #include <xsm/xsm.h>
> +#include <asm/pqos.h>
>
> #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
>
> @@ -101,6 +102,69 @@ long arch_do_sysctl(
> }
> break;
>
> + case XEN_SYSCTL_getdomcqminfolist:
> + {
This whole hypercall makes me somewhat uneasy.
> + struct domain *d;
> + struct xen_domctl_getdomcqminfo info;
> + uint32_t resource_count;
> + uint32_t resource_avail;
> + uint32_t num_domains = 0;
> + cpumask_t cpu_cqmdata_map;
> + DECLARE_BITMAP(sockets, QOS_MAX_SOCKETS);
> + unsigned int cpu;
> +
> + if ( !system_supports_cqm() )
> + {
> + ret = -ENODEV;
> + break;
> + }
> +
> + resource_count = get_cqm_count();
> + resource_avail = get_cqm_avail();
> +
> + cpumask_clear(&cpu_cqmdata_map);
> + bitmap_zero(sockets, QOS_MAX_SOCKETS);
> + for_each_online_cpu(cpu)
> + {
> + int i = cpu_to_socket(cpu);
> + if ( test_and_set_bit(i, sockets) )
> + continue;
> + cpumask_set_cpu(cpu, &cpu_cqmdata_map);
> + }
What is this doing? It appears to be finding the first cpu on each socket.
> +
> + rcu_read_lock(&domlist_read_lock);
> + for_each_domain ( d )
> + {
> + if ( d->domain_id < sysctl->u.getdomaininfolist.first_domain )
> + continue;
> + if ( num_domains == sysctl->u.getdomaininfolist.max_domains )
> + break;
> + if ( d->arch.pqos_cqm_rmid <= 0 )
> + continue;
Is there any case where pqos_cqm_rmid can be negative? alloc_cqm_rmid()
never assigns a negative number now in v2, in which case
d->arch.pqos_cqm_rmid can probably be unsigned (and related int rmid's
can be similarly promoted to unsigned)
> + memset(&info, 0, sizeof(struct xen_domctl_getdomcqminfo));
> + info.domain = d->domain_id;
> + get_cqm_info(d->arch.pqos_cqm_rmid, cpu_cqmdata_map, &info);
So for a domain the hypercallee is interested in, we get its rmid, and
ask get_cqm_info() to individually IPI each one cpu from a socket to
fill in the info field?
The IPIs are quite expensive, and this system will currently monopolise
the first cpu on each socket.
> +
> + if ( copy_to_guest_offset(sysctl->u.getdomcqminfolist.buffer,
> + num_domains, &info, 1) )
> + {
> + ret = -EFAULT;
> + break;
> + }
> +
> + num_domains++;
So this loop is primarily bounded by the number of domains, where each
domain with a valid rmid will result in a spate of IPI?
This looks like it needs hypercall continuation logic.
Also, how well does this intersect with updating the rmid assignment?
> + }
> + rcu_read_unlock(&domlist_read_lock);
> +
> + sysctl->u.getdomcqminfolist.num_domains = num_domains;
> + sysctl->u.getdomcqminfolist.resource_count = resource_count;
> + sysctl->u.getdomcqminfolist.resource_avail = resource_avail;
> +
> + if ( copy_to_guest(u_sysctl, sysctl, 1) )
> + ret = -EFAULT;
> + }
> + break;
break should be inside the brace.
> +
> default:
> ret = -ENOSYS;
> break;
> diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
> index e597a28..46ef165 100644
> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -488,4 +488,8 @@
> /* Geode defined MSRs */
> #define MSR_GEODE_BUSCONT_CONF0 0x00001900
>
> +/* Platform QoS register */
> +#define MSR_IA32_QOSEVTSEL 0x00000c8d
> +#define MSR_IA32_QMC 0x00000c8e
> +
> #endif /* __ASM_MSR_INDEX_H */
> diff --git a/xen/include/asm-x86/pqos.h b/xen/include/asm-x86/pqos.h
> index 88de139..5c86c5d 100644
> --- a/xen/include/asm-x86/pqos.h
> +++ b/xen/include/asm-x86/pqos.h
> @@ -27,15 +27,29 @@
> /* QoS Monitoring Event ID */
> #define QOS_MONITOR_EVTID_L3 0x1
>
> +/* IA32_QM_CTR */
> +#define IA32_QM_CTR_ERROR_MASK (0x3ul << 62)
> +
> struct cqm_res_struct {
> domid_t domain_id;
> bool_t inuse;
> };
>
> +struct qm_element {
> + uint64_t qm_data;
> + uint32_t cpu;
> + uint32_t rmid;
> + uint8_t evtid;
> +};
> +
> void init_platform_qos(void);
>
> bool_t system_supports_cqm(void);
> int alloc_cqm_rmid(struct domain *);
> void free_cqm_rmid(struct domain *);
> +unsigned int get_cqm_count(void);
> +unsigned int get_cqm_avail(void);
> +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map,
> + struct xen_domctl_getdomcqminfo *info);
>
> #endif
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index f5d7062..fe8b37f 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -883,6 +883,20 @@ struct xen_domctl_qos_resource {
> typedef struct xen_domctl_qos_resource xen_domctl_qos_resource_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_qos_resource_t);
>
> +struct xen_socket_cqmdata {
> + uint64_t l3c_occupancy;
> + uint8_t valid;
> +};
> +
> +struct xen_domctl_getdomcqminfo {
> + /* OUT variables. */
> + domid_t domain;
> +#define QOS_MAX_SOCKETS 128
Baking this into the ABI seems short sighted, and in this specific case
looks to blow the 128 byte union size in a domctl structure.
The toolstack should be able to find the number of sockets on the
system, and provide a GUEST_HANDLE to an array of socket_cqmdata's of
the appropriate length.
> + struct xen_socket_cqmdata socket_cqmdata[QOS_MAX_SOCKETS];
> +};
> +typedef struct xen_domctl_getdomcqminfo xen_domctl_getdomcqminfo_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomcqminfo_t);
> +
> struct xen_domctl {
> uint32_t cmd;
> #define XEN_DOMCTL_createdomain 1
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 8437d31..0def306 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -632,6 +632,19 @@ struct xen_sysctl_coverage_op {
> typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
>
> +/* XEN_SYSCTL_getdomcqminfolist */
> +struct xen_sysctl_getdomcqminfolist {
> + /* IN variables. */
> + domid_t first_domain;
> + uint32_t max_domains;
> + XEN_GUEST_HANDLE_64(xen_domctl_getdomcqminfo_t) buffer;
> + /* OUT variables. */
> + uint32_t num_domains;
num_domains and max_domains can be folded together as both an in and an
out parameter. Also, "max_domains" is confusingly close to "max_domain"
at a glance.
~Andrew
> + uint32_t resource_count;
> + uint32_t resource_avail;
> +};
> +typedef struct xen_sysctl_getdomcqminfolist xen_sysctl_getdomcqminfolist_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomcqminfolist_t);
>
> struct xen_sysctl {
> uint32_t cmd;
> @@ -654,6 +667,7 @@ struct xen_sysctl {
> #define XEN_SYSCTL_cpupool_op 18
> #define XEN_SYSCTL_scheduler_op 19
> #define XEN_SYSCTL_coverage_op 20
> +#define XEN_SYSCTL_getdomcqminfolist 21
> uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
> union {
> struct xen_sysctl_readconsole readconsole;
> @@ -675,6 +689,7 @@ struct xen_sysctl {
> struct xen_sysctl_cpupool_op cpupool_op;
> struct xen_sysctl_scheduler_op scheduler_op;
> struct xen_sysctl_coverage_op coverage_op;
> + struct xen_sysctl_getdomcqminfolist getdomcqminfolist;
> uint8_t pad[128];
> } u;
> };
next prev parent reply other threads:[~2013-11-21 14:09 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-21 7:20 [PATCH v2 0/8] enable Cache QoS Monitoring (CQM) feature dongxiao.xu
2013-11-21 7:20 ` [PATCH v2 1/8] x86: detect and initialize Cache QoS Monitoring feature dongxiao.xu
2013-11-21 12:14 ` Andrew Cooper
2013-11-21 12:19 ` Andrew Cooper
2013-11-25 3:06 ` Xu, Dongxiao
2013-11-25 15:40 ` Andrew Cooper
2013-11-25 8:57 ` Xu, Dongxiao
2013-11-25 15:58 ` Andrew Cooper
2013-11-21 7:20 ` [PATCH v2 2/8] x86: handle CQM resource when creating/destroying guests dongxiao.xu
2013-11-21 12:33 ` Andrew Cooper
2013-11-25 3:21 ` Xu, Dongxiao
2013-11-25 16:02 ` Andrew Cooper
2013-11-21 7:20 ` [PATCH v2 3/8] tools: " dongxiao.xu
2013-11-21 7:20 ` [PATCH v2 4/8] x86: dynamically attach/detach CQM service for a guest dongxiao.xu
2013-11-21 12:50 ` Andrew Cooper
2013-11-25 3:26 ` Xu, Dongxiao
2013-11-25 16:05 ` Andrew Cooper
2013-11-25 21:06 ` Konrad Rzeszutek Wilk
2013-11-21 7:20 ` [PATCH v2 5/8] tools: " dongxiao.xu
2013-11-25 21:00 ` Konrad Rzeszutek Wilk
2013-11-25 21:01 ` Konrad Rzeszutek Wilk
2013-11-21 7:20 ` [PATCH v2 6/8] x86: get per domain CQM information dongxiao.xu
2013-11-21 14:09 ` Andrew Cooper [this message]
2013-11-25 6:20 ` Xu, Dongxiao
2013-11-25 16:28 ` Andrew Cooper
2013-11-21 7:20 ` [PATCH v2 7/8] tools: " dongxiao.xu
2013-11-21 7:20 ` [PATCH v2 8/8] x86: enable CQM monitoring for each domain RMID dongxiao.xu
2013-11-21 14:19 ` Andrew Cooper
2013-11-25 7:22 ` Xu, Dongxiao
2013-11-25 16:32 ` Andrew Cooper
2013-11-21 14:36 ` [PATCH v2 0/8] enable Cache QoS Monitoring (CQM) feature Andrew Cooper
2013-11-25 7:24 ` Xu, Dongxiao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=528E1408.6090205@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=dongxiao.xu@intel.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).