Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest

All of lore.kernel.org
 help / color / mirror / Atom feed

From: David Vrabel <david.vrabel@citrix.com>
To: Elena Ufimtseva <ufimtseva@gmail.com>
Cc: xen-devel@lists.xen.org
Subject: Re: [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest
Date: Tue, 27 Aug 2013 14:52:36 +0100	[thread overview]
Message-ID: <521CAF24.1030201@citrix.com> (raw)
In-Reply-To: <1377593580-10040-2-git-send-email-ufimtseva@gmail.com>

On 27/08/13 09:52, Elena Ufimtseva wrote:
> Uses subop hypercall to request XEN about vnuma topology.
> Sets the memory blocks (aligned by XEN), cpus, distance table
> on boot. NUMA support should be compiled in kernel.
> 
> NUMA support should be compiled in kernel.
> 
> Memory blocks are aligned by xen. Xen is aware of guest initial
> RAM layout.
> If the memory blocks are incorrect, call for default linux numa
> dummy init.
> 
> Requires XEN with vNUMA support.
> 
> Applies to 3.10 git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
> latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376
> 
> TODO:
> Change subop from XENMEM to SYSCTL.

XENMEM subop seems right here.

> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
>  arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
>  arch/x86/mm/numa.c                   |    8 +++
>  arch/x86/xen/enlighten.c             |  115 ++++++++++++++++++++++++++++++++++
>  include/xen/interface/memory.h       |    1 +
>  4 files changed, 160 insertions(+)
>  create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h
> 
> diff --git a/arch/x86/include/asm/xen/vnuma-xen.h b/arch/x86/include/asm/xen/vnuma-xen.h
> new file mode 100644
> index 0000000..73c1bde
> --- /dev/null
> +++ b/arch/x86/include/asm/xen/vnuma-xen.h
> @@ -0,0 +1,36 @@
> +#ifndef _ASM_X86_VNUMA_XEN_H
> +#define _ASM_X86_VNUMA_XEN_H
> +
> +#ifdef CONFIG_XEN
> +int __initdata xen_numa_init(void);
> +
> +struct vnuma_memblk {
> +	          uint64_t start, end;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memblk);
> +
> +struct vnuma_topology_info {
> +	domid_t domid;
> +	uint16_t nr_nodes;
> +	GUEST_HANDLE(vnuma_memblk) vmemblk;
> +	GUEST_HANDLE(int) vdistance;
> +	GUEST_HANDLE(int) cpu_to_node;
> +	GUEST_HANDLE(int) vnode_to_pnode;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
> +
> +struct xen_domctl {
> +	uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
> +	domid_t  domain;
> +	union {
> +		struct vnuma_topology_info vnuma;
> +		} u;
> +};
> +typedef struct xen_domctl xen_domctl_t;
> +DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_t);

Most of this is the public interface provided by Xen, this should be in
include/xen/interface/memory.h. And only the xen_numa_init() prototype
in this header.

> +
> +#else
> +int __init xen_numa_init(void) {}
> +#endif
> +
> +#endif /* _ASM_X86_VNUMA_XEN_H */
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 8bf93ba..3ec7855 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -20,6 +20,10 @@

#include <asm/xen/vnuma.h>

here.

>  #include "numa_internal.h"
>  
> +#ifdef CONFIG_XEN
> +#include "asm/xen/vnuma-xen.h"
> +#endif

Instead of this.

> +
>  int __initdata numa_off;
>  nodemask_t numa_nodes_parsed __initdata;
>  
> @@ -621,6 +625,10 @@ static int __init dummy_numa_init(void)
>  void __init x86_numa_init(void)
>  {
>  	if (!numa_off) {
> +#ifdef CONFIG_XEN
> +		if (xen_pv_domain() && !numa_init(xen_numa_init))
> +			return;
> +#endif

Move the test for xen_pv_domain() into xen_numa_init.

>  #ifdef CONFIG_X86_NUMAQ
>  		if (!numa_init(numaq_numa_init))
>  			return;
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 193097e..4658e9d 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -33,6 +33,7 @@
>  #include <linux/memblock.h>
>  #include <linux/edd.h>
>  
> +#include <asm/numa.h>
>  #include <xen/xen.h>
>  #include <xen/events.h>
>  #include <xen/interface/xen.h>
> @@ -69,6 +70,7 @@
>  #include <asm/mwait.h>
>  #include <asm/pci_x86.h>
>  #include <asm/pat.h>
> +#include <asm/xen/vnuma-xen.h>
>  
>  #ifdef CONFIG_ACPI
>  #include <linux/acpi.h>
> @@ -1750,4 +1752,117 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
>  	.x2apic_available	= xen_x2apic_para_available,
>  };
>  EXPORT_SYMBOL(x86_hyper_xen_hvm);
> +
> +/* Xen PV NUMA topology initialization */
> +int __initdata xen_numa_init(void)

This function needs to more clearly separate the different parts.  This
could be by splitting it into several functions or by improving the
comments and spacing.

I'd also move it to a new file: arch/x86/xen/vnuma.c.

> +{
> +	struct vnuma_memblk *vblk;
> +	struct vnuma_topology_info numa_topo;
> +	uint64_t size, start, end;
> +	int i, j, cpu, rc;
> +	u64 phys, physd, physc;
> +	u64 new_end_pfn;
> +	size_t mem_size;
> +
> +	int *vdistance, *cpu_to_node;
> +	unsigned long dist_size, cpu_to_node_size;
> +	numa_topo.domid = DOMID_SELF;
> +	rc = -EINVAL;
> +	/* block mem reservation for memblks */
> +	mem_size = num_possible_cpus() * sizeof(struct vnuma_memblk);
> +	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), mem_size, PAGE_SIZE);
> +	if (!phys) {
> +		pr_info("vNUMA: can't allocate memory for membloks size %zu\n", mem_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}
> +	if (memblock_reserve(phys, mem_size) < 0) {
> +		pr_info("vNUMA: Cannot reserver mem for blocks of size %zu \n", mem_size);
> +		rc = -ENOMEM;
> +		goto errout;
> +	}

Does mem_block_alloc() work here instead of the find_in_range/reserve
combo?  Similarly for the other allocations.

> +	vblk = __va(phys);
> +	set_xen_guest_handle(numa_topo.vmemblk, vblk);

Please group all the memory allocations followed by grouping the
initialization of the hypercall arguments.

> +	if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo) < 0)
> +		goto errout;

ret = HYPERVISOR_memory_op(...);
if (ret < 0)
     goto out;

> +	if (numa_topo.nr_nodes == 0)
> +		/* Pass to DUMMY numa */
> +		goto errout;
> +	if (numa_topo.nr_nodes > num_possible_cpus()) {
> +		pr_info("vNUMA: Node without cpu is not supported, nodes = %d\n", numa_topo.nr_nodes);
> +		goto errout;
> +	}

Is this a constraint of the Linux NUMA support?

> +	new_end_pfn = 0;
> +	/* We have sizes in pages received from hypervisor, no holes and ordered */

How does this play with the relocate of frames done for dom0 during
initial setup?

> +	for (i = 0; i < numa_topo.nr_nodes; i++) {
> +		start = vblk[i].start;
> +		end = vblk[i].end;
> +		size = end - start;
> +		pr_info("VNMA blk[%d] size is %#010Lx start = %#010Lx end = %#010Lx\n",
> +			i, size, start, end);
> +		if (numa_add_memblk(i, start, end)) {
> +			pr_info("vNUMA: Could not add memblk[%d]\n", i);
> +			rc = -EAGAIN;
> +			goto errout;
> +		}
> +		node_set(i, numa_nodes_parsed);
> +	}
> +	setup_nr_node_ids();
> +	/* Setting the cpu, apicid to node */
> +	for_each_cpu(cpu, cpu_possible_mask) {
> +		pr_info("Setting up vcpu[%d] to node[%d]\n", cpu, cpu_to_node[cpu]);
> +		set_apicid_to_node(cpu, cpu_to_node[cpu]);
> +		numa_set_node(cpu, cpu_to_node[cpu]);
> +		__apicid_to_node[cpu] = cpu_to_node[cpu];
> +		cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
> +	}
> +	setup_nr_node_ids();
> +	rc = 0;
> +	for (i = 0; i < numa_topo.nr_nodes; i++)

Style: {} around this multi-line body.

> +		for (j = 0; j < numa_topo.nr_nodes; j++) {
> +			numa_set_distance(i, j, *(vdistance + j*numa_topo.nr_nodes + i));
> +			pr_info("distance %d <-> %d = %d\n", i, j, *(vdistance + j*numa_topo.nr_nodes + i));
> +		}
> +errout:

This isn't (always) an error path, name this label "out".

> +	if (phys)
> +		memblock_free(__pa(phys), mem_size);
> +	if (physd)
> +		memblock_free(__pa(physd), dist_size);
> +	if (physc)
> +		memblock_free(__pa(physc), cpu_to_node_size);
> +	if (rc)
> +		pr_debug("XEN: hypercall failed to get vNUMA topology.\n");

Remove this debug print, you already have prints for specific failures.

> +	return rc;
> +}
> +
>  #endif

David

next prev parent reply	other threads:[~2013-08-27 13:52 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-27  8:52 [PATCH RFC 0/2] linux/vnuma: vnuma topology PV guest Elena Ufimtseva
2013-08-27  8:52 ` [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest Elena Ufimtseva
2013-08-27 13:52   ` David Vrabel [this message]
2013-08-27 14:33   ` Konrad Rzeszutek Wilk
2013-08-28 13:41     ` Elena Ufimtseva
2013-08-28 23:12     ` Dario Faggioli
2013-08-29 14:07       ` Konrad Rzeszutek Wilk
2013-08-28  1:27   ` Matt Wilson
2013-08-28  1:37     ` Matt Wilson
2013-08-28 13:08       ` Dario Faggioli
2013-08-28 13:39         ` George Dunlap
2013-08-28 16:01       ` Konrad Rzeszutek Wilk
2013-08-28 16:38         ` Matt Wilson
2013-08-28 20:10           ` Elena Ufimtseva
2013-08-28 23:25             ` Matt Wilson
2013-08-29  9:16               ` George Dunlap
2013-08-28 22:21           ` Dario Faggioli
2013-08-29  0:11             ` Matt Wilson
2013-08-29 13:41               ` David Vrabel
2013-08-29 14:23                 ` Konrad Rzeszutek Wilk
2013-08-29 14:32                   ` George Dunlap
2013-08-29 14:51                     ` Konrad Rzeszutek Wilk
2013-08-29 22:29                       ` Dario Faggioli
2013-08-30 12:57                         ` Konrad Rzeszutek Wilk
2013-08-27  8:53 ` [PATCH RFC 2/2] linux/vnuma: Enables NUMA support for PV guest Elena Ufimtseva
2013-08-27 13:35   ` David Vrabel
2013-08-27 15:36     ` Konrad Rzeszutek Wilk
2013-08-27 14:17   ` Konrad Rzeszutek Wilk
2013-08-27 13:48 ` [PATCH RFC 0/2] linux/vnuma: vnuma topology " David Vrabel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=521CAF24.1030201@citrix.com \
    --to=david.vrabel@citrix.com \
    --cc=ufimtseva@gmail.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.