From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Elena Ufimtseva <ufimtseva@gmail.com>
Cc: keir@xen.org, Ian.Campbell@citrix.com,
stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
ian.jackson@eu.citrix.com, xen-devel@lists.xen.org,
JBeulich@suse.com
Subject: Re: [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions
Date: Mon, 16 Dec 2013 14:57:11 -0500 [thread overview]
Message-ID: <20131216195711.GA29733@phenom.dumpdata.com> (raw)
In-Reply-To: <1386136035-19544-4-git-send-email-ufimtseva@gmail.com>
On Wed, Dec 04, 2013 at 12:47:11AM -0500, Elena Ufimtseva wrote:
> Parses vnuma topoplogy number of nodes and memory
> ranges. If not defined, initializes vnuma with
> only one node and default topology.
>
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
> Changes since v3:
> - added subop hypercall to retrive number of vnodes
> and vcpus for domain to make correct allocations before
> requesting vnuma topology.
> ---
> tools/libxl/libxl_types.idl | 6 +-
> tools/libxl/libxl_vnuma.h | 11 ++
> tools/libxl/xl_cmdimpl.c | 234 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 250 insertions(+), 1 deletion(-)
> create mode 100644 tools/libxl/libxl_vnuma.h
>
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index cba8eff..ba46f58 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -311,7 +311,11 @@ libxl_domain_build_info = Struct("domain_build_info",[
> ("disable_migrate", libxl_defbool),
> ("cpuid", libxl_cpuid_policy_list),
> ("blkdev_start", string),
> -
> + ("vnuma_memszs", Array(uint64, "nr_vnodes")),
> + ("vcpu_to_vnode", Array(uint32, "nr_vnodemap")),
> + ("vdistance", Array(uint32, "nr_vdist")),
> + ("vnode_to_pnode", Array(uint32, "nr_vnode_to_pnode")),
> + ("vnuma_placement", libxl_defbool),
Are those 'v' prefixes needed?
> ("device_model_version", libxl_device_model_version),
> ("device_model_stubdomain", libxl_defbool),
> # if you set device_model you must set device_model_version too
> diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h
> new file mode 100644
> index 0000000..f1568ae
> --- /dev/null
> +++ b/tools/libxl/libxl_vnuma.h
> @@ -0,0 +1,11 @@
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#define VNUMA_NO_NODE ~((unsigned int)0)
> +
> +/*
> + * Max vNUMA node size in Mb is taken 64Mb even now Linux lets
> + * 32Mb, thus letting some slack. Will be modified to match Linux.
-EPARSE :-)
> + */
> +#define MIN_VNODE_SIZE 64U
> +
> +#define MAX_VNUMA_NODES (unsigned int)1 << 10
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index 341863e..c79e73e 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -40,6 +40,7 @@
> #include "libxl_json.h"
> #include "libxlutil.h"
> #include "xl.h"
> +#include "libxl_vnuma.h"
>
> #define CHK_ERRNO( call ) ({ \
> int chk_errno = (call); \
> @@ -622,6 +623,134 @@ vcpp_out:
> return rc;
> }
>
> +/* Should exit after calling this */
/* Caller should exit after calling this. */
> +static void vnuma_info_release(libxl_domain_build_info *info)
> +{
> + free(info->vnuma_memszs);
> + free(info->vdistance);
> + free(info->vcpu_to_vnode);
> + free(info->vnode_to_pnode);
> + info->nr_vnodes = 0;
> +}
> +
> +static void vdistance_default(unsigned int *vdistance,
> + unsigned int nr_vnodes,
> + unsigned int samenode,
> + unsigned int othernode)
> +{
> + int i, j;
> + for (i = 0; i < nr_vnodes; i++)
For clarity I would replace 'i' with 'idx' and 'j' with
'slot'.
Or leave 'j' alone, but just change 'i' to 'idx'
> + for (j = 0; j < nr_vnodes; j++)
> + *(vdistance + j * nr_vnodes + i) = i == j ? samenode : othernode;
> +}
> +
> +static void vcputovnode_default(unsigned int *vcpu_to_vnode,
> + unsigned int nr_vnodes,
> + unsigned int max_vcpus)
> +{
> + int i;
> + for(i = 0; i < max_vcpus; i++)
You editor ate a space :-)
> + vcpu_to_vnode[i] = i % nr_vnodes;
> +}
> +
> +/* Split domain memory between vNUMA nodes equally */
> +static int split_vnumamem(libxl_domain_build_info *b_info)
> +{
> + unsigned long long vnodemem = 0;
> + unsigned long n;
> + unsigned int i;
> +
> + /* In MBytes */
> + vnodemem = (b_info->max_memkb >> 10) / b_info->nr_vnodes;
If b_info->nr_vnodes is zero you are going to have a problem.
> + if (vnodemem < MIN_VNODE_SIZE)
> + return -1;
> + /* reminder in MBytes */
> + n = (b_info->max_memkb >> 10) % b_info->nr_vnodes;
> + /* get final sizes in MBytes */
> + for(i = 0; i < (b_info->nr_vnodes - 1); i++)
Your editor is really hungry. Another space!
> + b_info->vnuma_memszs[i] = vnodemem;
> + /* add the reminder to the last node */
> + b_info->vnuma_memszs[i] = vnodemem + n;
> + return 0;
> +}
> +
> +static void vnode_to_pnode_default(unsigned int *vnode_to_pnode,
> + unsigned int nr_vnodes)
> +{
> + unsigned int i;
> + for (i = 0; i < nr_vnodes; i++)
> + vnode_to_pnode[i] = VNUMA_NO_NODE;
> +}
> +
> +/*
> + * vNUMA zero config initialization for every pv domain that has
> + * no vnuma defined in config file.
> + */
> +static int vnuma_zero_config(libxl_domain_build_info *b_info)
> +{
> + b_info->nr_vnodes = 1;
> + /* dont leak memory with realloc */
? Right?
> + unsigned int *vdist, *vntop, *vcputov;
> + uint64_t *memsz;
> +
> + /* all memory goes to this one vnode */
> + memsz = b_info->vnuma_memszs;
> + b_info->vnuma_memszs = (uint64_t *)realloc(b_info->vnuma_memszs,
> + b_info->nr_vnodes *
> + sizeof(*b_info->vnuma_memszs));
> + if (b_info->vnuma_memszs == NULL) {
> + b_info->vnuma_memszs = memsz;
> + goto bad_vnumazerocfg;
> + }
> + b_info->vnuma_memszs[0] = b_info->max_memkb >> 10;
> +
> + /* all vcpus assigned to this vnode */
> + vcputov = b_info->vcpu_to_vnode;
Perhaps call it 'old_vnode' ?
> + b_info->vcpu_to_vnode = (unsigned int *)realloc(
> + b_info->vcpu_to_vnode,
> + b_info->max_vcpus *
> + sizeof(*b_info->vcpu_to_vnode));
> + if (b_info->vcpu_to_vnode == NULL) {
> + b_info->vcpu_to_vnode = vcputov;
> + goto bad_vnumazerocfg;
> + }
> + vcputovnode_default(b_info->vcpu_to_vnode,
> + b_info->nr_vnodes,
> + b_info->max_vcpus);
> +
> + /* default vdistance 10 */
> + vdist = b_info->vdistance;
And 'old_dist' ?
> + b_info->vdistance = (unsigned int *)realloc(b_info->vdistance,
> + b_info->nr_vnodes * b_info->nr_vnodes *
> + sizeof(*b_info->vdistance));
> + if (b_info->vdistance == NULL) {
> + b_info->vdistance = vdist;
> + goto bad_vnumazerocfg;
> + }
> + vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 10);
> +
> + /* VNUMA_NO_NODE for vnode_to_pnode */
> + vntop = b_info->vnode_to_pnode;
'vntop'? old_pnode?
> + b_info->vnode_to_pnode = (unsigned int *)realloc(b_info->vnode_to_pnode,
> + b_info->nr_vnodes *
> + sizeof(*b_info->vnode_to_pnode));
> + if (b_info->vnode_to_pnode == NULL) {
> + b_info->vnode_to_pnode = vntop;
> + goto bad_vnumazerocfg;
> + }
> + vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_vnodes);
> +
> + /*
> + * will be placed to some physical nodes defined by automatic
> + * numa placement or VNUMA_NO_NODE will not request exact node
> + */
> + libxl_defbool_set(&b_info->vnuma_placement, true);
> + return 0;
> +
> +bad_vnumazerocfg:
Actually your code could make this a bit simpler.
p = realloc(...)
if (p)
b_info->vcpu_to_vnode = p;
else
goto err_out;
p = realloc(..)
if (p)
b_info->vcpu_to_vnode = p;
That way you don't have those 'old_vnode' or 'vntop'
and only assign the new value if it worked - instead of
doing the rewind.
bad_vnumazercfg:
> + return -1;
> +}
> +
> static void parse_config_data(const char *config_source,
> const char *config_data,
> int config_len,
> @@ -960,6 +1089,11 @@ static void parse_config_data(const char *config_source,
> char *cmdline = NULL;
> const char *root = NULL, *extra = "";
>
> + XLU_ConfigList *vnumamemcfg;
> + int nr_vnuma_regions;
> + unsigned long long vnuma_memparsed = 0;
> + unsigned long ul;
> +
> xlu_cfg_replace_string (config, "kernel", &b_info->u.pv.kernel, 0);
>
> xlu_cfg_get_string (config, "root", &root, 0);
> @@ -977,6 +1111,106 @@ static void parse_config_data(const char *config_source,
> exit(1);
> }
>
> + libxl_defbool_set(&b_info->vnuma_placement, false);
> +
> + if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) {
> + if (l > MAX_VNUMA_NODES) {
> + fprintf(stderr, "Too many vnuma nodes, max %d is allowed.\n", MAX_VNUMA_NODES);
> + exit(1);
> + }
> +
> + b_info->nr_vnodes = l;
> +
> + xlu_cfg_get_defbool(config, "vnuma_autoplacement", &b_info->vnuma_placement, 0);
> +
> + if (b_info->nr_vnodes != 0 && b_info->max_vcpus >= b_info->nr_vnodes) {
> + if (!xlu_cfg_get_list(config, "vnumamem",
> + &vnumamemcfg, &nr_vnuma_regions, 0)) {
> +
> + if (nr_vnuma_regions != b_info->nr_vnodes) {
> + fprintf(stderr, "Number of numa regions is incorrect.\n");
You could make this be:
"Number of numa regions (vnumamem=%d) is incorrect (should be %d)"
> + exit(1);
> + }
> +
> + b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
> + sizeof(*b_info->vnuma_memszs));
> + if (b_info->vnuma_memszs == NULL) {
> + fprintf(stderr, "unable to allocate memory for vnuma ranges.\n");
Unable
> + exit(1);
> + }
> +
> + char *ep;
> + /*
> + * Will parse only nr_vnodes times, even if we have more/less regions.
> + * Take care of it later if less or discard if too many regions.
> + */
> + for (i = 0; i < b_info->nr_vnodes; i++) {
> + buf = xlu_cfg_get_listitem(vnumamemcfg, i);
> + if (!buf) {
> + fprintf(stderr,
> + "xl: Unable to get element %d in vnuma memroy list.\n", i);
s/memroy/memory/
> + break;
> + }
> + ul = strtoul(buf, &ep, 10);
> + if (ep == buf) {
> + fprintf(stderr,
> + "xl: Invalid argument parsing vnumamem: %s.\n", buf);
> + break;
> + }
> +
> + /* 32Mb is a min size for a node, taken from Linux */
> + if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) {
> + fprintf(stderr, "xl: vnuma memory %lu is not withing %u - %u range.\n",
s/withing/within/
> + ul, MIN_VNODE_SIZE, UINT32_MAX);
> + break;
> + }
> +
> + /* memory in MBytes */
> + b_info->vnuma_memszs[i] = ul;
> + }
> +
> + /* Total memory for vNUMA parsed to verify */
> + for(i = 0; i < nr_vnuma_regions; i++)
Editor hungry :-)
> + vnuma_memparsed = vnuma_memparsed + (b_info->vnuma_memszs[i]);
> +
> + /* Amount of memory for vnodes same as total? */
> + if((vnuma_memparsed << 10) != (b_info->max_memkb)) {
Ditto.
> + fprintf(stderr, "xl: vnuma memory is not the same as initial domain memory size.\n");
s/initial domain/domain/
> + vnuma_info_release(b_info);
> + exit(1);
> + }
> + } else {
> + b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
> + sizeof(*b_info->vnuma_memszs));
> + if (b_info->vnuma_memszs == NULL) {
> + vnuma_info_release(b_info);
> + fprintf(stderr, "unable to allocate memory for vnuma ranges.\n");
> + exit(1);
> + }
> +
> + fprintf(stderr, "WARNING: vNUMA memory ranges were not specified.\n");
> + fprintf(stderr, "Using default equal vnode memory size %lu Kbytes to cover %lu Kbytes.\n",
> + b_info->max_memkb / b_info->nr_vnodes, b_info->max_memkb);
> +
> + if (split_vnumamem(b_info) < 0) {
> + fprintf(stderr, "Could not split vnuma memory into equal chunks.\n");
> + vnuma_info_release(b_info);
> + exit(1);
> + }
> + }
> + }
> + else
> + if (vnuma_zero_config(b_info)) {
> + vnuma_info_release(b_info);
> + exit(1);
> + }
> + }
> + else
> + if (vnuma_zero_config(b_info)) {
> + vnuma_info_release(b_info);
> + exit(1);
> + }
> +
> xlu_cfg_replace_string (config, "bootloader", &b_info->u.pv.bootloader, 0);
> switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args",
> &b_info->u.pv.bootloader_args, 1))
> --
> 1.7.10.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2013-12-16 19:57 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-04 5:47 [PATCH v4 0/7] vNUMA introduction Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 1/7] xen: vNUMA support for PV guests Elena Ufimtseva
2013-12-04 11:34 ` Jan Beulich
2013-12-04 18:02 ` Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 2/7] libxc: Plumb Xen with vNUMA topology for domain Elena Ufimtseva
2013-12-16 19:16 ` Konrad Rzeszutek Wilk
2013-12-04 5:47 ` [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions Elena Ufimtseva
2013-12-16 19:57 ` Konrad Rzeszutek Wilk [this message]
2013-12-04 5:47 ` [PATCH v4 4/7] xl: vnuma distance, vcpu and pnode masks parser Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 5/7] libxc: vnuma memory domain allocation Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 6/7] libxl: vNUMA supporting interface Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 7/7] xen: adds vNUMA info debug-key u Elena Ufimtseva
2013-12-04 11:23 ` Jan Beulich
2014-02-13 12:49 ` [PATCH v4 0/7] vNUMA introduction Li Yechen
2014-02-13 16:26 ` Elena Ufimtseva
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20131216195711.GA29733@phenom.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=Ian.Campbell@citrix.com \
--cc=JBeulich@suse.com \
--cc=dario.faggioli@citrix.com \
--cc=george.dunlap@eu.citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=keir@xen.org \
--cc=lccycc123@gmail.com \
--cc=msw@linux.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=ufimtseva@gmail.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.