Re: [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Elena Ufimtseva <ufimtseva@gmail.com>
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
	msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
	ian.jackson@eu.citrix.com, xen-devel@lists.xen.org,
	JBeulich@suse.com
Subject: Re: [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions
Date: Mon, 16 Dec 2013 14:57:11 -0500	[thread overview]
Message-ID: <20131216195711.GA29733@phenom.dumpdata.com> (raw)
In-Reply-To: <1386136035-19544-4-git-send-email-ufimtseva@gmail.com>

On Wed, Dec 04, 2013 at 12:47:11AM -0500, Elena Ufimtseva wrote:
> Parses vnuma topoplogy number of nodes and memory
> ranges. If not defined, initializes vnuma with
> only one node and default topology.
> 
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
> Changes since v3:
> - added subop hypercall to retrive number of vnodes
> and vcpus for domain to make correct allocations before
> requesting vnuma topology.
> ---
>  tools/libxl/libxl_types.idl |    6 +-
>  tools/libxl/libxl_vnuma.h   |   11 ++
>  tools/libxl/xl_cmdimpl.c    |  234 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 250 insertions(+), 1 deletion(-)
>  create mode 100644 tools/libxl/libxl_vnuma.h
> 
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index cba8eff..ba46f58 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -311,7 +311,11 @@ libxl_domain_build_info = Struct("domain_build_info",[
>      ("disable_migrate", libxl_defbool),
>      ("cpuid",           libxl_cpuid_policy_list),
>      ("blkdev_start",    string),
> -    
> +    ("vnuma_memszs",    Array(uint64, "nr_vnodes")),
> +    ("vcpu_to_vnode",   Array(uint32, "nr_vnodemap")),
> +    ("vdistance",       Array(uint32, "nr_vdist")),
> +    ("vnode_to_pnode",  Array(uint32, "nr_vnode_to_pnode")),
> +    ("vnuma_placement", libxl_defbool),

Are those 'v' prefixes needed?

>      ("device_model_version", libxl_device_model_version),
>      ("device_model_stubdomain", libxl_defbool),
>      # if you set device_model you must set device_model_version too
> diff --git a/tools/libxl/libxl_vnuma.h b/tools/libxl/libxl_vnuma.h
> new file mode 100644
> index 0000000..f1568ae
> --- /dev/null
> +++ b/tools/libxl/libxl_vnuma.h
> @@ -0,0 +1,11 @@
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#define VNUMA_NO_NODE ~((unsigned int)0)
> +
> +/*
> + * Max vNUMA node size in Mb is taken 64Mb even now Linux lets
> + * 32Mb, thus letting some slack. Will be modified to match Linux.

-EPARSE :-)

> + */
> +#define MIN_VNODE_SIZE  64U
> +
> +#define MAX_VNUMA_NODES (unsigned int)1 << 10
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index 341863e..c79e73e 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -40,6 +40,7 @@
>  #include "libxl_json.h"
>  #include "libxlutil.h"
>  #include "xl.h"
> +#include "libxl_vnuma.h"
>  
>  #define CHK_ERRNO( call ) ({                                            \
>          int chk_errno = (call);                                         \
> @@ -622,6 +623,134 @@ vcpp_out:
>      return rc;
>  }
>  
> +/* Should exit after calling this */

/* Caller should exit after calling this. */

> +static void vnuma_info_release(libxl_domain_build_info *info)
> +{
> +    free(info->vnuma_memszs);
> +    free(info->vdistance);
> +    free(info->vcpu_to_vnode);
> +    free(info->vnode_to_pnode);
> +    info->nr_vnodes = 0;
> +}
> +
> +static void vdistance_default(unsigned int *vdistance,
> +                                unsigned int nr_vnodes,
> +                                unsigned int samenode,
> +                                unsigned int othernode)
> +{
> +    int i, j;
> +    for (i = 0; i < nr_vnodes; i++)

For clarity I would replace 'i' with 'idx' and 'j' with
'slot'.

Or leave 'j' alone, but just change 'i' to 'idx'

> +        for (j = 0; j < nr_vnodes; j++)
> +            *(vdistance + j * nr_vnodes + i) = i == j ? samenode : othernode;
> +}
> +
> +static void vcputovnode_default(unsigned int *vcpu_to_vnode,
> +                                unsigned int nr_vnodes,
> +                                unsigned int max_vcpus)
> +{
> +    int i;
> +    for(i = 0; i < max_vcpus; i++)

You editor ate a space :-)

> +        vcpu_to_vnode[i] = i % nr_vnodes;
> +}
> +
> +/* Split domain memory between vNUMA nodes equally */
> +static int split_vnumamem(libxl_domain_build_info *b_info)
> +{
> +    unsigned long long vnodemem = 0;
> +    unsigned long n;
> +    unsigned int i;
> +
> +    /* In MBytes */
> +    vnodemem = (b_info->max_memkb >> 10) / b_info->nr_vnodes;

If b_info->nr_vnodes is zero you are going to have a problem.

> +    if (vnodemem < MIN_VNODE_SIZE)
> +        return -1;
> +    /* reminder in MBytes */
> +    n = (b_info->max_memkb >> 10) % b_info->nr_vnodes;
> +    /* get final sizes in MBytes */
> +    for(i = 0; i < (b_info->nr_vnodes - 1); i++)

Your editor is really hungry. Another space!

> +        b_info->vnuma_memszs[i] = vnodemem;
> +    /* add the reminder to the last node */
> +    b_info->vnuma_memszs[i] = vnodemem + n;
> +    return 0;
> +}
> +
> +static void vnode_to_pnode_default(unsigned int *vnode_to_pnode,
> +                                   unsigned int nr_vnodes)
> +{
> +    unsigned int i;
> +    for (i = 0; i < nr_vnodes; i++)
> +        vnode_to_pnode[i] = VNUMA_NO_NODE;
> +}
> +
> +/*
> + * vNUMA zero config initialization for every pv domain that has
> + * no vnuma defined in config file.
> + */
> +static int vnuma_zero_config(libxl_domain_build_info *b_info)
> +{
> +    b_info->nr_vnodes = 1;
> +    /* dont leak memory with realloc */

? Right?

> +    unsigned int *vdist, *vntop, *vcputov;
> +    uint64_t *memsz;
> +
> +    /* all memory goes to this one vnode */
> +    memsz = b_info->vnuma_memszs;
> +    b_info->vnuma_memszs = (uint64_t *)realloc(b_info->vnuma_memszs,
> +                                b_info->nr_vnodes *
> +                                sizeof(*b_info->vnuma_memszs));
> +    if (b_info->vnuma_memszs == NULL) {
> +        b_info->vnuma_memszs = memsz;
> +        goto bad_vnumazerocfg;
> +    }
> +    b_info->vnuma_memszs[0] = b_info->max_memkb >> 10;
> +
> +    /* all vcpus assigned to this vnode */
> +    vcputov = b_info->vcpu_to_vnode;

Perhaps call it 'old_vnode' ?

> +    b_info->vcpu_to_vnode = (unsigned int *)realloc(
> +                             b_info->vcpu_to_vnode,
> +                             b_info->max_vcpus *
> +                             sizeof(*b_info->vcpu_to_vnode));
> +    if (b_info->vcpu_to_vnode == NULL) {
> +        b_info->vcpu_to_vnode = vcputov;
> +        goto bad_vnumazerocfg;
> +    }
> +    vcputovnode_default(b_info->vcpu_to_vnode,
> +                        b_info->nr_vnodes,
> +                        b_info->max_vcpus);
> +
> +    /* default vdistance 10 */
> +    vdist = b_info->vdistance;

And 'old_dist' ?

> +    b_info->vdistance = (unsigned int *)realloc(b_info->vdistance,
> +                         b_info->nr_vnodes * b_info->nr_vnodes *
> +                         sizeof(*b_info->vdistance));
> +    if (b_info->vdistance == NULL) {
> +        b_info->vdistance = vdist;
> +        goto bad_vnumazerocfg;
> +    }
> +    vdistance_default(b_info->vdistance, b_info->nr_vnodes, 10, 10);
> +
> +    /* VNUMA_NO_NODE for vnode_to_pnode */
> +    vntop = b_info->vnode_to_pnode;

'vntop'? old_pnode?

> +    b_info->vnode_to_pnode = (unsigned int *)realloc(b_info->vnode_to_pnode,
> +                             b_info->nr_vnodes *
> +                             sizeof(*b_info->vnode_to_pnode));
> +    if (b_info->vnode_to_pnode == NULL) {
> +        b_info->vnode_to_pnode = vntop;

> +        goto bad_vnumazerocfg;
> +    }
> +    vnode_to_pnode_default(b_info->vnode_to_pnode, b_info->nr_vnodes);
> +
> +    /*
> +     * will be placed to some physical nodes defined by automatic
> +     * numa placement or VNUMA_NO_NODE will not request exact node
> +     */
> +    libxl_defbool_set(&b_info->vnuma_placement, true);
> +    return 0;
> +
> +bad_vnumazerocfg:

Actually your code could make this a bit simpler.

    p =  realloc(...)
    if (p)
	b_info->vcpu_to_vnode = p;
    else
	goto err_out;

    p = realloc(..)
    if (p)
	b_info->vcpu_to_vnode = p;


That way you don't have those 'old_vnode' or 'vntop'
and only assign the new value if it worked - instead of
doing the rewind.


bad_vnumazercfg:
> +    return -1;
> +}
> +
>  static void parse_config_data(const char *config_source,
>                                const char *config_data,
>                                int config_len,
> @@ -960,6 +1089,11 @@ static void parse_config_data(const char *config_source,
>          char *cmdline = NULL;
>          const char *root = NULL, *extra = "";
>  
> +        XLU_ConfigList *vnumamemcfg;
> +        int nr_vnuma_regions;
> +        unsigned long long vnuma_memparsed = 0;
> +        unsigned long ul;
> +
>          xlu_cfg_replace_string (config, "kernel", &b_info->u.pv.kernel, 0);
>  
>          xlu_cfg_get_string (config, "root", &root, 0);
> @@ -977,6 +1111,106 @@ static void parse_config_data(const char *config_source,
>              exit(1);
>          }
>  
> +        libxl_defbool_set(&b_info->vnuma_placement, false);
> +
> +        if (!xlu_cfg_get_long (config, "vnodes", &l, 0)) {
> +            if (l > MAX_VNUMA_NODES) {
> +                fprintf(stderr, "Too many vnuma nodes, max %d is allowed.\n", MAX_VNUMA_NODES);
> +                exit(1);
> +            }
> +
> +            b_info->nr_vnodes = l;
> +
> +            xlu_cfg_get_defbool(config, "vnuma_autoplacement", &b_info->vnuma_placement, 0);
> +
> +            if (b_info->nr_vnodes != 0 && b_info->max_vcpus >= b_info->nr_vnodes) {
> +                if (!xlu_cfg_get_list(config, "vnumamem",
> +                                      &vnumamemcfg, &nr_vnuma_regions, 0)) {
> +
> +                    if (nr_vnuma_regions != b_info->nr_vnodes) {
> +                        fprintf(stderr, "Number of numa regions is incorrect.\n");

You could make this be:
"Number of numa regions (vnumamem=%d) is incorrect (should be %d)"


> +                        exit(1);
> +                    }
> +
> +                    b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
> +                                                  sizeof(*b_info->vnuma_memszs));
> +                    if (b_info->vnuma_memszs == NULL) {
> +                        fprintf(stderr, "unable to allocate memory for vnuma ranges.\n");

Unable

> +                        exit(1);
> +                    }
> +
> +                    char *ep;
> +                    /*
> +                     * Will parse only nr_vnodes times, even if we have more/less regions.
> +                     * Take care of it later if less or discard if too many regions.
> +                     */
> +                    for (i = 0; i < b_info->nr_vnodes; i++) {
> +                        buf = xlu_cfg_get_listitem(vnumamemcfg, i);
> +                        if (!buf) {
> +                            fprintf(stderr,
> +                                    "xl: Unable to get element %d in vnuma memroy list.\n", i);

s/memroy/memory/

> +                            break;
> +                        }
> +                        ul = strtoul(buf, &ep, 10);
> +                        if (ep == buf) {
> +                            fprintf(stderr,
> +                                    "xl: Invalid argument parsing vnumamem: %s.\n", buf);
> +                            break;
> +                        }
> +
> +                        /* 32Mb is a min size for a node, taken from Linux */
> +                        if (ul >= UINT32_MAX || ul < MIN_VNODE_SIZE) {
> +                            fprintf(stderr, "xl: vnuma memory %lu is not withing %u - %u range.\n",

s/withing/within/

> +                                    ul, MIN_VNODE_SIZE, UINT32_MAX);
> +                            break;
> +                        }
> +
> +                        /* memory in MBytes */
> +                        b_info->vnuma_memszs[i] = ul;
> +                    }
> +
> +                    /* Total memory for vNUMA parsed to verify */
> +                    for(i = 0; i < nr_vnuma_regions; i++)

Editor hungry :-)

> +                        vnuma_memparsed = vnuma_memparsed + (b_info->vnuma_memszs[i]);
> +
> +                    /* Amount of memory for vnodes same as total? */
> +                    if((vnuma_memparsed << 10) != (b_info->max_memkb)) {

Ditto.

> +                        fprintf(stderr, "xl: vnuma memory is not the same as initial domain memory size.\n");

s/initial domain/domain/

> +                        vnuma_info_release(b_info);
> +                        exit(1);
> +                    }
> +                } else {
> +                    b_info->vnuma_memszs = calloc(b_info->nr_vnodes,
> +                                                  sizeof(*b_info->vnuma_memszs));
> +                    if (b_info->vnuma_memszs == NULL) {
> +                        vnuma_info_release(b_info);
> +                        fprintf(stderr, "unable to allocate memory for vnuma ranges.\n");
> +                        exit(1);
> +                    }
> +
> +                    fprintf(stderr, "WARNING: vNUMA memory ranges were not specified.\n");
> +                    fprintf(stderr, "Using default equal vnode memory size %lu Kbytes to cover %lu Kbytes.\n",
> +                                    b_info->max_memkb / b_info->nr_vnodes, b_info->max_memkb);
> +
> +                    if (split_vnumamem(b_info) < 0) {
> +                        fprintf(stderr, "Could not split vnuma memory into equal chunks.\n");
> +                        vnuma_info_release(b_info);
> +                        exit(1);
> +                    }
> +                }
> +            }
> +            else
> +                if (vnuma_zero_config(b_info)) {
> +                    vnuma_info_release(b_info);
> +                    exit(1);
> +                }
> +        }
> +        else
> +            if (vnuma_zero_config(b_info)) {
> +                vnuma_info_release(b_info);
> +                exit(1);
> +            }
> +
>          xlu_cfg_replace_string (config, "bootloader", &b_info->u.pv.bootloader, 0);
>          switch (xlu_cfg_get_list_as_string_list(config, "bootloader_args",
>                                        &b_info->u.pv.bootloader_args, 1))
> -- 
> 1.7.10.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

next prev parent reply	other threads:[~2013-12-16 19:57 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-04  5:47 [PATCH v4 0/7] vNUMA introduction Elena Ufimtseva
2013-12-04  5:47 ` [PATCH v4 1/7] xen: vNUMA support for PV guests Elena Ufimtseva
2013-12-04 11:34   ` Jan Beulich
2013-12-04 18:02     ` Elena Ufimtseva
2013-12-04  5:47 ` [PATCH v4 2/7] libxc: Plumb Xen with vNUMA topology for domain Elena Ufimtseva
2013-12-16 19:16   ` Konrad Rzeszutek Wilk
2013-12-04  5:47 ` [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions Elena Ufimtseva
2013-12-16 19:57   ` Konrad Rzeszutek Wilk [this message]
2013-12-04  5:47 ` [PATCH v4 4/7] xl: vnuma distance, vcpu and pnode masks parser Elena Ufimtseva
2013-12-04  5:47 ` [PATCH v4 5/7] libxc: vnuma memory domain allocation Elena Ufimtseva
2013-12-04  5:47 ` [PATCH v4 6/7] libxl: vNUMA supporting interface Elena Ufimtseva
2013-12-04  5:47 ` [PATCH v4 7/7] xen: adds vNUMA info debug-key u Elena Ufimtseva
2013-12-04 11:23   ` Jan Beulich
2014-02-13 12:49 ` [PATCH v4 0/7] vNUMA introduction Li Yechen
2014-02-13 16:26   ` Elena Ufimtseva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131216195711.GA29733@phenom.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=ufimtseva@gmail.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).