From: Andrew Cooper <andrew.cooper3@citrix.com>
To: dongxiao.xu@intel.com
Cc: xen-devel@lists.xen.org
Subject: Re: [PATCH v2 6/8] x86: get per domain CQM information
Date: Thu, 21 Nov 2013 14:09:12 +0000 [thread overview]
Message-ID: <528E1408.6090205@citrix.com> (raw)
In-Reply-To: <1385018444-104477-7-git-send-email-dongxiao.xu@intel.com>
On 21/11/13 07:20, dongxiao.xu@intel.com wrote:
> From: Dongxiao Xu <dongxiao.xu@intel.com>
>
> Retrive CQM information for certain domain, which reflects the L3 cache
> occupancy for a socket.
>
> Signed-off-by: Jiongxi Li <jiongxi.li@intel.com>
> Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
> ---
> xen/arch/x86/pqos.c | 57 ++++++++++++++++++++++++++++++++++
> xen/arch/x86/sysctl.c | 64 +++++++++++++++++++++++++++++++++++++++
> xen/include/asm-x86/msr-index.h | 4 +++
> xen/include/asm-x86/pqos.h | 14 +++++++++
> xen/include/public/domctl.h | 14 +++++++++
> xen/include/public/sysctl.h | 15 +++++++++
> 6 files changed, 168 insertions(+)
>
> diff --git a/xen/arch/x86/pqos.c b/xen/arch/x86/pqos.c
> index e294799..d95784c 100644
> --- a/xen/arch/x86/pqos.c
> +++ b/xen/arch/x86/pqos.c
> @@ -19,14 +19,30 @@
> * Place - Suite 330, Boston, MA 02111-1307 USA.
> */
> #include <asm/processor.h>
> +#include <asm/msr.h>
> #include <xen/init.h>
> #include <xen/spinlock.h>
> #include <xen/sched.h>
> +#include <public/domctl.h>
> #include <asm/pqos.h>
>
> static bool_t pqos_enabled = 1;
> boolean_param("pqos", pqos_enabled);
>
> +static void read_qm_data(void *arg)
> +{
> + struct qm_element *qm_element = arg;
> +
> + wrmsr(MSR_IA32_QOSEVTSEL, qm_element->evtid, qm_element->rmid);
> + rdmsrl(MSR_IA32_QMC, qm_element->qm_data);
> +}
> +
> +static void get_generic_qm_info(struct qm_element *qm_element)
> +{
> + unsigned int cpu = qm_element->cpu;
> + on_selected_cpus(cpumask_of(cpu), read_qm_data, qm_element, 1);
> +}
> +
> unsigned int cqm_res_count = 0;
> unsigned int cqm_upscaling_factor = 0;
> bool_t cqm_enabled = 0;
> @@ -86,6 +102,23 @@ bool_t system_supports_cqm(void)
> return cqm_enabled;
> }
>
> +unsigned int get_cqm_count(void)
> +{
> + return cqm_res_count;
> +}
> +
> +unsigned int get_cqm_avail(void)
> +{
> + unsigned int cqm_avail = 0;
> + int i;
unsigned int please. If cqm_res_count has its top bit set, the
following loop may never terminate.
> +
> + for ( i = 0; i < cqm_res_count; i++ )
> + if ( !cqm_res_array[i].inuse )
> + cqm_avail++;
> +
> + return cqm_avail;
> +}
> +
> int alloc_cqm_rmid(struct domain *d)
> {
> int rmid, rc = 0;
> @@ -137,6 +170,30 @@ void free_cqm_rmid(struct domain *d)
> d->arch.pqos_cqm_rmid = 0;
> }
>
> +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map,
A cpumask_t is already quite large, and will get larger in the future.
Pass by pointer please.
> + struct xen_domctl_getdomcqminfo *info)
> +{
> + struct qm_element element;
> + unsigned int cpu, i;
> +
> + for_each_cpu ( cpu, &cpu_cqmdata_map )
> + {
> + element.cpu = cpu;
> + element.rmid = rmid;
> + element.evtid = QOS_MONITOR_EVTID_L3;
> +
> + get_generic_qm_info(&element);
> +
> + i = cpu_to_socket(cpu);
cpu_to_socket() can return BAD_APICID.
> + info->socket_cqmdata[i].valid =
> + (element.qm_data & IA32_QM_CTR_ERROR_MASK) ? 0 : 1;
info->socket_cqmdata[i].valid = !(element.qm_data & IA32_QM_CTR_ERROR_MASK);
> + if ( info->socket_cqmdata[i].valid )
> + info->socket_cqmdata[i].l3c_occupancy = element.qm_data * cqm_upscaling_factor;
> + else
> + info->socket_cqmdata[i].l3c_occupancy = 0;
> + }
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 15d4b91..d631769 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -28,6 +28,7 @@
> #include <xen/nodemask.h>
> #include <xen/cpu.h>
> #include <xsm/xsm.h>
> +#include <asm/pqos.h>
>
> #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
>
> @@ -101,6 +102,69 @@ long arch_do_sysctl(
> }
> break;
>
> + case XEN_SYSCTL_getdomcqminfolist:
> + {
This whole hypercall makes me somewhat uneasy.
> + struct domain *d;
> + struct xen_domctl_getdomcqminfo info;
> + uint32_t resource_count;
> + uint32_t resource_avail;
> + uint32_t num_domains = 0;
> + cpumask_t cpu_cqmdata_map;
> + DECLARE_BITMAP(sockets, QOS_MAX_SOCKETS);
> + unsigned int cpu;
> +
> + if ( !system_supports_cqm() )
> + {
> + ret = -ENODEV;
> + break;
> + }
> +
> + resource_count = get_cqm_count();
> + resource_avail = get_cqm_avail();
> +
> + cpumask_clear(&cpu_cqmdata_map);
> + bitmap_zero(sockets, QOS_MAX_SOCKETS);
> + for_each_online_cpu(cpu)
> + {
> + int i = cpu_to_socket(cpu);
> + if ( test_and_set_bit(i, sockets) )
> + continue;
> + cpumask_set_cpu(cpu, &cpu_cqmdata_map);
> + }
What is this doing? It appears to be finding the first cpu on each socket.
> +
> + rcu_read_lock(&domlist_read_lock);
> + for_each_domain ( d )
> + {
> + if ( d->domain_id < sysctl->u.getdomaininfolist.first_domain )
> + continue;
> + if ( num_domains == sysctl->u.getdomaininfolist.max_domains )
> + break;
> + if ( d->arch.pqos_cqm_rmid <= 0 )
> + continue;
Is there any case where pqos_cqm_rmid can be negative? alloc_cqm_rmid()
never assigns a negative number now in v2, in which case
d->arch.pqos_cqm_rmid can probably be unsigned (and related int rmid's
can be similarly promoted to unsigned)
> + memset(&info, 0, sizeof(struct xen_domctl_getdomcqminfo));
> + info.domain = d->domain_id;
> + get_cqm_info(d->arch.pqos_cqm_rmid, cpu_cqmdata_map, &info);
So for a domain the hypercallee is interested in, we get its rmid, and
ask get_cqm_info() to individually IPI each one cpu from a socket to
fill in the info field?
The IPIs are quite expensive, and this system will currently monopolise
the first cpu on each socket.
> +
> + if ( copy_to_guest_offset(sysctl->u.getdomcqminfolist.buffer,
> + num_domains, &info, 1) )
> + {
> + ret = -EFAULT;
> + break;
> + }
> +
> + num_domains++;
So this loop is primarily bounded by the number of domains, where each
domain with a valid rmid will result in a spate of IPI?
This looks like it needs hypercall continuation logic.
Also, how well does this intersect with updating the rmid assignment?
> + }
> + rcu_read_unlock(&domlist_read_lock);
> +
> + sysctl->u.getdomcqminfolist.num_domains = num_domains;
> + sysctl->u.getdomcqminfolist.resource_count = resource_count;
> + sysctl->u.getdomcqminfolist.resource_avail = resource_avail;
> +
> + if ( copy_to_guest(u_sysctl, sysctl, 1) )
> + ret = -EFAULT;
> + }
> + break;
break should be inside the brace.
> +
> default:
> ret = -ENOSYS;
> break;
> diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
> index e597a28..46ef165 100644
> --- a/xen/include/asm-x86/msr-index.h
> +++ b/xen/include/asm-x86/msr-index.h
> @@ -488,4 +488,8 @@
> /* Geode defined MSRs */
> #define MSR_GEODE_BUSCONT_CONF0 0x00001900
>
> +/* Platform QoS register */
> +#define MSR_IA32_QOSEVTSEL 0x00000c8d
> +#define MSR_IA32_QMC 0x00000c8e
> +
> #endif /* __ASM_MSR_INDEX_H */
> diff --git a/xen/include/asm-x86/pqos.h b/xen/include/asm-x86/pqos.h
> index 88de139..5c86c5d 100644
> --- a/xen/include/asm-x86/pqos.h
> +++ b/xen/include/asm-x86/pqos.h
> @@ -27,15 +27,29 @@
> /* QoS Monitoring Event ID */
> #define QOS_MONITOR_EVTID_L3 0x1
>
> +/* IA32_QM_CTR */
> +#define IA32_QM_CTR_ERROR_MASK (0x3ul << 62)
> +
> struct cqm_res_struct {
> domid_t domain_id;
> bool_t inuse;
> };
>
> +struct qm_element {
> + uint64_t qm_data;
> + uint32_t cpu;
> + uint32_t rmid;
> + uint8_t evtid;
> +};
> +
> void init_platform_qos(void);
>
> bool_t system_supports_cqm(void);
> int alloc_cqm_rmid(struct domain *);
> void free_cqm_rmid(struct domain *);
> +unsigned int get_cqm_count(void);
> +unsigned int get_cqm_avail(void);
> +void get_cqm_info(uint32_t rmid, cpumask_t cpu_cqmdata_map,
> + struct xen_domctl_getdomcqminfo *info);
>
> #endif
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index f5d7062..fe8b37f 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -883,6 +883,20 @@ struct xen_domctl_qos_resource {
> typedef struct xen_domctl_qos_resource xen_domctl_qos_resource_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_qos_resource_t);
>
> +struct xen_socket_cqmdata {
> + uint64_t l3c_occupancy;
> + uint8_t valid;
> +};
> +
> +struct xen_domctl_getdomcqminfo {
> + /* OUT variables. */
> + domid_t domain;
> +#define QOS_MAX_SOCKETS 128
Baking this into the ABI seems short sighted, and in this specific case
looks to blow the 128 byte union size in a domctl structure.
The toolstack should be able to find the number of sockets on the
system, and provide a GUEST_HANDLE to an array of socket_cqmdata's of
the appropriate length.
> + struct xen_socket_cqmdata socket_cqmdata[QOS_MAX_SOCKETS];
> +};
> +typedef struct xen_domctl_getdomcqminfo xen_domctl_getdomcqminfo_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomcqminfo_t);
> +
> struct xen_domctl {
> uint32_t cmd;
> #define XEN_DOMCTL_createdomain 1
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 8437d31..0def306 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -632,6 +632,19 @@ struct xen_sysctl_coverage_op {
> typedef struct xen_sysctl_coverage_op xen_sysctl_coverage_op_t;
> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_coverage_op_t);
>
> +/* XEN_SYSCTL_getdomcqminfolist */
> +struct xen_sysctl_getdomcqminfolist {
> + /* IN variables. */
> + domid_t first_domain;
> + uint32_t max_domains;
> + XEN_GUEST_HANDLE_64(xen_domctl_getdomcqminfo_t) buffer;
> + /* OUT variables. */
> + uint32_t num_domains;
num_domains and max_domains can be folded together as both an in and an
out parameter. Also, "max_domains" is confusingly close to "max_domain"
at a glance.
~Andrew
> + uint32_t resource_count;
> + uint32_t resource_avail;
> +};
> +typedef struct xen_sysctl_getdomcqminfolist xen_sysctl_getdomcqminfolist_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomcqminfolist_t);
>
> struct xen_sysctl {
> uint32_t cmd;
> @@ -654,6 +667,7 @@ struct xen_sysctl {
> #define XEN_SYSCTL_cpupool_op 18
> #define XEN_SYSCTL_scheduler_op 19
> #define XEN_SYSCTL_coverage_op 20
> +#define XEN_SYSCTL_getdomcqminfolist 21
> uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
> union {
> struct xen_sysctl_readconsole readconsole;
> @@ -675,6 +689,7 @@ struct xen_sysctl {
> struct xen_sysctl_cpupool_op cpupool_op;
> struct xen_sysctl_scheduler_op scheduler_op;
> struct xen_sysctl_coverage_op coverage_op;
> + struct xen_sysctl_getdomcqminfolist getdomcqminfolist;
> uint8_t pad[128];
> } u;
> };
next prev parent reply other threads:[~2013-11-21 14:09 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-21 7:20 [PATCH v2 0/8] enable Cache QoS Monitoring (CQM) feature dongxiao.xu
2013-11-21 7:20 ` [PATCH v2 1/8] x86: detect and initialize Cache QoS Monitoring feature dongxiao.xu
2013-11-21 12:14 ` Andrew Cooper
2013-11-21 12:19 ` Andrew Cooper
2013-11-25 3:06 ` Xu, Dongxiao
2013-11-25 15:40 ` Andrew Cooper
2013-11-25 8:57 ` Xu, Dongxiao
2013-11-25 15:58 ` Andrew Cooper
2013-11-21 7:20 ` [PATCH v2 2/8] x86: handle CQM resource when creating/destroying guests dongxiao.xu
2013-11-21 12:33 ` Andrew Cooper
2013-11-25 3:21 ` Xu, Dongxiao
2013-11-25 16:02 ` Andrew Cooper
2013-11-21 7:20 ` [PATCH v2 3/8] tools: " dongxiao.xu
2013-11-21 7:20 ` [PATCH v2 4/8] x86: dynamically attach/detach CQM service for a guest dongxiao.xu
2013-11-21 12:50 ` Andrew Cooper
2013-11-25 3:26 ` Xu, Dongxiao
2013-11-25 16:05 ` Andrew Cooper
2013-11-25 21:06 ` Konrad Rzeszutek Wilk
2013-11-21 7:20 ` [PATCH v2 5/8] tools: " dongxiao.xu
2013-11-25 21:00 ` Konrad Rzeszutek Wilk
2013-11-25 21:01 ` Konrad Rzeszutek Wilk
2013-11-21 7:20 ` [PATCH v2 6/8] x86: get per domain CQM information dongxiao.xu
2013-11-21 14:09 ` Andrew Cooper [this message]
2013-11-25 6:20 ` Xu, Dongxiao
2013-11-25 16:28 ` Andrew Cooper
2013-11-21 7:20 ` [PATCH v2 7/8] tools: " dongxiao.xu
2013-11-21 7:20 ` [PATCH v2 8/8] x86: enable CQM monitoring for each domain RMID dongxiao.xu
2013-11-21 14:19 ` Andrew Cooper
2013-11-25 7:22 ` Xu, Dongxiao
2013-11-25 16:32 ` Andrew Cooper
2013-11-21 14:36 ` [PATCH v2 0/8] enable Cache QoS Monitoring (CQM) feature Andrew Cooper
2013-11-25 7:24 ` Xu, Dongxiao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=528E1408.6090205@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=dongxiao.xu@intel.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.