All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Elena Ufimtseva <ufimtseva@gmail.com>
Cc: keir@xen.org, stefano.stabellini@eu.citrix.com,
	george.dunlap@eu.citrix.com, msw@linux.com,
	dario.faggioli@citrix.com, lccycc123@gmail.com,
	xen-devel@lists.xen.org, JBeulich@suse.com
Subject: Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
Date: Fri, 13 Sep 2013 10:31:14 +0100	[thread overview]
Message-ID: <5232DB62.30809@citrix.com> (raw)
In-Reply-To: <1379062190-13720-1-git-send-email-ufimtseva@gmail.com>

On 13/09/2013 09:49, Elena Ufimtseva wrote:
> Defines XENMEM subop hypercall for PV vNUMA
> enabled guests and provides vNUMA topology
> information from per-domain vnuma topology build info.
>
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
>
> ---
> Changes since v1:
> * changed type from int to uint and unsigned in vNUMA structures;
> * removed unecessary file vnuma.h as requested in review
> * added domain_vnuma_destroy;
> * fixed usage of rcu_lock_domain_by_any_id;
> * removed unecessary guest_handle_cast calls;
> * coding style fixes;
> ---
>  xen/common/domain.c         |   25 +++++++++++++++-
>  xen/common/domctl.c         |   68 ++++++++++++++++++++++++++++++++++++++++++-
>  xen/common/memory.c         |   56 +++++++++++++++++++++++++++++++++++
>  xen/include/public/domctl.h |   15 +++++++++-
>  xen/include/public/memory.h |    9 +++++-
>  xen/include/xen/domain.h    |   11 +++++++
>  xen/include/xen/sched.h     |    1 +
>  xen/include/xen/vnuma.h     |   27 +++++++++++++++++
>  8 files changed, 208 insertions(+), 4 deletions(-)
>  create mode 100644 xen/include/xen/vnuma.h
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 9390a22..bb414cf 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -227,6 +227,7 @@ struct domain *domain_create(
>      spin_lock_init(&d->node_affinity_lock);
>      d->node_affinity = NODE_MASK_ALL;
>      d->auto_node_affinity = 1;
> +    d->vnuma.nr_vnodes = 0;

alloc_domain_struct() clears d, so this assignment of 0 is not needed.

>  
>      spin_lock_init(&d->shutdown_lock);
>      d->shutdown_code = -1;
> @@ -530,8 +531,9 @@ int domain_kill(struct domain *d)
>          evtchn_destroy(d);
>          gnttab_release_mappings(d);
>          tmem_destroy(d->tmem);
> -        domain_set_outstanding_pages(d, 0);

Any particular reason for this function call to move?  I cant see any
reason.

>          d->tmem = NULL;
> +        domain_set_outstanding_pages(d, 0);
> +        domain_vnuma_destroy(&d->vnuma);
>          /* fallthrough */
>      case DOMDYING_dying:
>          rc = domain_relinquish_resources(d);
> @@ -1279,6 +1281,27 @@ int continue_hypercall_on_cpu(
>      return 0;
>  }
>  
> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
> +{
> +    if (v->vnuma_memblks) {
> +        xfree(v->vnuma_memblks);
> +        v->vnuma_memblks = NULL;
> +    }

Coding style (see CODING_STYLE in the root of the tree) so this would
become:

if ( v->vnuma_memblks )
{
  xfree(v->vnuma_memblks);
  v->vnuma_memblks = NULL;
}

However, xfree() (like regular free()) is happy with NULL pointers, so
you can drop the if altogether.


> +    if (v->vcpu_to_vnode) {
> +        xfree(v->vcpu_to_vnode);
> +        v->vcpu_to_vnode = NULL;
> +    }
> +    if (v->vdistance) {
> +        xfree(v->vdistance);
> +        v->vdistance = NULL;
> +    }
> +    if (v->vnode_to_pnode) {
> +        xfree(v->vnode_to_pnode);
> +        v->vnode_to_pnode = NULL;
> +    }
> +    v->nr_vnodes = 0;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
> index 9760d50..e5d05c7 100644
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -29,6 +29,7 @@
>  #include <asm/page.h>
>  #include <public/domctl.h>
>  #include <xsm/xsm.h>
> +#include <xen/vnuma.h>
>  
>  static DEFINE_SPINLOCK(domctl_lock);
>  DEFINE_SPINLOCK(vcpu_alloc_lock);
> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>          ret = set_global_virq_handler(d, virq);
>      }
>      break;
> -

Please leave this blank line in here.

> +    case XEN_DOMCTL_setvnumainfo:
> +    {
> +        unsigned int i, j, nr_vnodes, dist_size;
> +        unsigned int dist, vmap, vntop;
> +        vnuma_memblk_t vmemblk;
> +        
> +        ret = -EINVAL;
> +        dist = i = j = 0;
> +        nr_vnodes = op->u.vnuma.nr_vnodes; 
> +        if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
> +            break;

nr_vnodes is unsigned.  It cannot possibly be less than 0.

> +        d->vnuma.nr_vnodes = nr_vnodes;
> +        dist_size = nr_vnodes * nr_vnodes;
> +        if (
> +        (d->vnuma.vdistance = xmalloc_bytes(
> +                 sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||

sizeof in an operator not a function.  As you are not passing a type,
the brackets are not required.

> +        (d->vnuma.vnuma_memblks = xmalloc_bytes(
> +                 sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
> +        (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
> +                 sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
> +        (d->vnuma.vnode_to_pnode = xmalloc_bytes(
> +                 sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
> +            goto err_dom;
> +        for ( i = 0; i < nr_vnodes; i++ )
> +            for ( j = 0; j < nr_vnodes; j++ )
> +            {
> +                if ( unlikely(__copy_from_guest_offset(&dist,
> +                                        op->u.vnuma.vdistance,
> +                                        __vnode_distance_offset(d, i, j), 1)) )
> +                    goto err_dom;

This error logic is broken.  A failure in __copy_from_guest_offset()
should result in -EFAULT, but will result in -EINVAL after following
err_dom;

> +                __vnode_distance_set(d, i, j, dist);
> +            }
> +        for ( i = 0; i < nr_vnodes; i++ )
> +        {
> +            if ( unlikely(__copy_from_guest_offset(&vmemblk,
> +                                    op->u.vnuma.vnuma_memblks, i, 1)) )
> +                goto err_dom;
> +            d->vnuma.vnuma_memblks[i].start = vmemblk.start;
> +            d->vnuma.vnuma_memblks[i].end = vmemblk.end;
> +        }
> +        for ( i = 0; i < d->max_vcpus; i++ )
> +        {
> +            if ( unlikely(__copy_from_guest_offset(&vmap,
> +                           op->u.vnuma.vcpu_to_vnode, i, 1)) )
> +                goto err_dom;
> +            d->vnuma.vcpu_to_vnode[i] = vmap;
> +        }
> +        if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
> +        {
> +            for ( i = 0; i < nr_vnodes; i++ )
> +            {
> +                if ( unlikely(__copy_from_guest_offset(&vntop,
> +                                        op->u.vnuma.vnode_to_pnode, i, 1)) )
> +                    goto err_dom;
> +                d->vnuma.vnode_to_pnode[i] = vntop;
> +            }
> +        }
> +        else
> +            for(i = 0; i < nr_vnodes; i++)
> +                d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
> +        ret = 0;
> +        break;
> +err_dom:
> +        ret = -EINVAL;
> +    }
> +    break;

And an extra line in here please.

>      default:
>          ret = arch_do_domctl(op, d, u_domctl);
>          break;
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 50b740f..6dc2452 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -28,6 +28,7 @@
>  #include <public/memory.h>
>  #include <xsm/xsm.h>
>  #include <xen/trace.h>
> +#include <xen/vnuma.h>
>  
>  struct memop_args {
>      /* INPUT */
> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>          rcu_unlock_domain(d);
>  
>          break;
> +    case XENMEM_get_vnuma_info:
> +    {
> +        int i, j;
> +        struct vnuma_topology_info mtopology;
> +        struct vnuma_topology_info touser_topo;
> +        struct domain *d;
> +        unsigned int max_pages, max_vcpus, nr_vnodes;
> +        vnuma_memblk_t *vblks;
>  
> +        rc = -EINVAL;
> +        if ( guest_handle_is_null(arg) )
> +            return rc;
> +        if( copy_from_guest(&mtopology, arg, 1) )
> +            return -EINVAL;

-EFAULT;

> +        if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
> +            return -EINVAL;

-ESRCH ( I think? )

> +        touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
> +        max_pages = d->max_pages;
> +        max_vcpus = d->max_vcpus;
> +        rcu_unlock_domain(d);
> +    
> +        nr_vnodes = touser_topo.nr_vnodes;
> +        rc = copy_to_guest(arg, &touser_topo, 1);
> +        if ( rc )
> +            return -EFAULT;
> +        if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
> +            return -EFAULT;

-EINVAL;

> +        vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
> +        if ( vblks == NULL )
> +            return -EFAULT;

-ENOMEM;


Now, if you have an unconditional rc = -EFAULT then all your goto out
logic will be correct

> +        for ( i = 0; i < nr_vnodes; i++ )
> +        {
> +                if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
> +                           &d->vnuma.vnuma_memblks[i], 1) < 0 )
> +                    goto out;
> +        }
> +        for ( i = 0; i < touser_topo.nr_vnodes; i++ )
> +            for ( j = 0; j < touser_topo.nr_vnodes; j++ )
> +            {
> +                if ( copy_to_guest_offset(mtopology.vdistance,
> +                                            __vnode_distance_offset(d, i, j),
> +                                            &__vnode_distance(d, i, j), 1) )
> +                    goto out;
> +            }
> +        for ( i = 0; i < d->max_vcpus ; i++ )
> +        {
> +            if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
> +                                        &d->vnuma.vcpu_to_vnode[i], 1) )
> +                    goto out;
> +        }
> +        return rc;

But this would need to turn into rc = 0; to avoid return -EFAULT in the
success case, and unconditionally leaking vblks

> +out:
> +        if ( vblks ) xfree(vblks);
> +        return rc;
> +        break;
> +    }
>      default:
>          rc = arch_memory_op(op, arg);
>          break;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 4c5b2bb..3574d0a 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -35,6 +35,7 @@
>  #include "xen.h"
>  #include "grant_table.h"
>  #include "hvm/save.h"
> +#include "memory.h"
>  
>  #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
>  
> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
>  typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>  
> +struct xen_domctl_vnuma {
> +    uint16_t nr_vnodes;
> +    XEN_GUEST_HANDLE_64(uint) vdistance;
> +    XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> +    XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
> +    XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
> +};
> +
> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
> +
>  struct xen_domctl {
>      uint32_t cmd;
>  #define XEN_DOMCTL_createdomain                   1
> @@ -920,6 +932,7 @@ struct xen_domctl {
>  #define XEN_DOMCTL_set_broken_page_p2m           67
>  #define XEN_DOMCTL_setnodeaffinity               68
>  #define XEN_DOMCTL_getnodeaffinity               69
> +#define XEN_DOMCTL_setvnumainfo                  70
>  #define XEN_DOMCTL_gdbsx_guestmemio            1000
>  #define XEN_DOMCTL_gdbsx_pausevcpu             1001
>  #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
> @@ -979,6 +992,7 @@ struct xen_domctl {
>          struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>          struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>          struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
> +        struct xen_domctl_vnuma             vnuma;
>          uint8_t                             pad[128];
>      } u;
>  };
> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
>  
>  #endif /* __XEN_PUBLIC_DOMCTL_H__ */
> -

Spurious whitespace change.  Please remove

>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 7a26dee..28f6aaf 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>   * Caller must be privileged or the hypercall fails.
>   */
>  #define XENMEM_claim_pages                  24
> -

And here

>  /*
>   * XENMEM_claim_pages flags - the are no flags at this time.
>   * The zero value is appropiate.
>   */
>  
> +struct vnuma_memblk {
> +              uint64_t start, end;
> +};
> +typedef struct vnuma_memblk vnuma_memblk_t;
> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
> +
> +#define XENMEM_get_vnuma_info               25
> +
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  
>  #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
> index a057069..c9d53e3 100644
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -4,6 +4,7 @@
>  
>  #include <public/xen.h>
>  #include <asm/domain.h>
> +#include <public/memory.h>
>  
>  typedef union {
>      struct vcpu_guest_context *nat;
> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>  
>  extern bool_t opt_dom0_vcpus_pin;
>  
> +struct domain_vnuma_info {
> +    uint16_t nr_vnodes;
> +    uint *vdistance;
> +    uint *vcpu_to_vnode;
> +    uint *vnode_to_pnode;
> +    vnuma_memblk_t *vnuma_memblks;
> +};
> +
> +void domain_vnuma_destroy(struct domain_vnuma_info *v);
> +
>  #endif /* __XEN_DOMAIN_H__ */
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index ae6a3b8..cb023cf 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -377,6 +377,7 @@ struct domain
>      nodemask_t node_affinity;
>      unsigned int last_alloc_node;
>      spinlock_t node_affinity_lock;
> +    struct domain_vnuma_info vnuma;
>  };
>  
>  struct domain_setup_info
> diff --git a/xen/include/xen/vnuma.h b/xen/include/xen/vnuma.h
> new file mode 100644
> index 0000000..0b41da0
> --- /dev/null
> +++ b/xen/include/xen/vnuma.h
> @@ -0,0 +1,27 @@
> +#ifndef _VNUMA_H
> +#define _VNUMA_H
> +#include <public/memory.h>
> +
> +/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */
> +
> +struct vnuma_topology_info {
> +    domid_t domid;
> +    uint16_t nr_vnodes;
> +    uint32_t _pad;
> +    XEN_GUEST_HANDLE_64(uint) vdistance;
> +    XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> +    XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
> +};
> +typedef struct vnuma_topology_info vnuma_topology_info_t;
> +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
> +
> +#define __vnode_distance_offset(_dom, _i, _j) \
> +        ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) )
> +
> +#define __vnode_distance(_dom, _i, _j) \
> +        ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i), (_j))] )

You expand _dom here twice, which is bad practice for macros.

I suggest static inline functions as a better alternative.

~Andrew

> +
> +#define __vnode_distance_set(_dom, _i, _j, _v) \
> +        do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0)
> +
> +#endif

  reply	other threads:[~2013-09-13  9:31 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-13  8:49 [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests Elena Ufimtseva
2013-09-13  9:31 ` Andrew Cooper [this message]
2013-09-13 11:53   ` Jan Beulich
2013-09-13 11:00 ` Jan Beulich
2013-09-16 15:46 ` George Dunlap
2013-09-17  6:44   ` Elena Ufimtseva
2013-09-17  6:59     ` Elena Ufimtseva
2013-09-17  7:05     ` Jan Beulich
2013-09-17  7:11       ` Dario Faggioli
2013-09-17  7:19         ` Elena Ufimtseva
2013-09-17  9:04           ` Dario Faggioli
2013-10-03 12:27             ` Li Yechen
2013-10-03 15:17               ` Dario Faggioli
2013-10-03 15:33                 ` Elena Ufimtseva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5232DB62.30809@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=ufimtseva@gmail.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.