xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: Elena Ufimtseva <ufimtseva@gmail.com>
Cc: george.dunlap@eu.citrix.com, dario.faggioli@citrix.com,
	lccycc123@gmail.com, xen-devel@lists.xen.org, msw@amazon.com,
	anddavid.vrabel@citrix.com
Subject: Re: [PATCH RFC v2 1/2] linux/vnuma: vNUMA for PV domu guest
Date: Tue, 17 Sep 2013 10:21:51 -0400	[thread overview]
Message-ID: <5238657F.3040209@oracle.com> (raw)
In-Reply-To: <1379406841-7441-2-git-send-email-ufimtseva@gmail.com>

On 09/17/2013 04:34 AM, Elena Ufimtseva wrote:
> Requests NUMA topology info from Xen by issuing subop
> hypercall. Initializes NUMA nodes, sets number of CPUs,
> distance table and NUMA nodes memory ranges during boot.
> vNUMA topology defined by user in VM config file. Memory
> ranges are represented by structure vnuma_topology_info
> where start and end of memory area are defined in guests
> pfns numbers, constructed and aligned accordingly to
> e820 domain map.
> In case the received structure has errors, will fail to
> dummy numa init.
> Requires XEN with applied patches from vnuma patchset;
>
> Changes since v1:
> - moved the test for xen_pv_domain() into xen_numa_init;
> - replaced memory block search/allocation by single memblock_alloc;
> - moved xen_numa_init to vnuma.c from enlighten.c;
> - moved memblock structure to public interface memory.h;
> - specified signedness of vnuma topology structure members;
> - removed excessive debug output;
>
> TODO:
> - consider common interface for Dom0, HVM and PV guests to provide
> vNUMA topology;
> - dynamic numa balancing at the time of this patch (kernel 3.11
> 6e4664525b1db28f8c4e1130957f70a94c19213e with boot parameter
> numa_balancing=true that is such by default) crashes numa-enabled
> guest. Investigate further.
>
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
> ---
>   arch/x86/include/asm/xen/vnuma.h |   12 +++++
>   arch/x86/mm/numa.c               |    5 +++
>   arch/x86/xen/Makefile            |    2 +-
>   arch/x86/xen/vnuma.c             |   92 ++++++++++++++++++++++++++++++++++++++
>   include/xen/interface/memory.h   |   27 +++++++++++
>   5 files changed, 137 insertions(+), 1 deletion(-)
>   create mode 100644 arch/x86/include/asm/xen/vnuma.h
>   create mode 100644 arch/x86/xen/vnuma.c
>
> diff --git a/arch/x86/include/asm/xen/vnuma.h b/arch/x86/include/asm/xen/vnuma.h
> new file mode 100644
> index 0000000..1bf4cae
> --- /dev/null
> +++ b/arch/x86/include/asm/xen/vnuma.h
> @@ -0,0 +1,12 @@
> +#ifndef _ASM_X86_VNUMA_H
> +#define _ASM_X86_VNUMA_H
> +
> +#ifdef CONFIG_XEN
> +int xen_vnuma_support(void);
> +int xen_numa_init(void);
> +#else
> +int xen_vnuma_support(void) { return 0; };
> +int xen_numa_init(void) {};

This should return -EINVAL. Or perhaps you can add

#ifdef CONFIG_XEN
#include "asm/xen/vnuma.h"
#endif

in numa.c  and not bother with ifdef here.

> +#endif
> +
> +#endif /* _ASM_X86_VNUMA_H */
> diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> index 8bf93ba..a95fadf 100644
> --- a/arch/x86/mm/numa.c
> +++ b/arch/x86/mm/numa.c
> @@ -19,6 +19,7 @@
>   #include <asm/amd_nb.h>
>   
>   #include "numa_internal.h"
> +#include "asm/xen/vnuma.h"
>   
>   int __initdata numa_off;
>   nodemask_t numa_nodes_parsed __initdata;
> @@ -621,6 +622,10 @@ static int __init dummy_numa_init(void)
>   void __init x86_numa_init(void)
>   {
>   	if (!numa_off) {
> +#ifdef CONFIG_XEN
> +		if (!numa_init(xen_numa_init))
> +			return;
> +#endif
>   #ifdef CONFIG_X86_NUMAQ
>   		if (!numa_init(numaq_numa_init))
>   			return;
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index 96ab2c0..de9deab 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -13,7 +13,7 @@ CFLAGS_mmu.o			:= $(nostackp)
>   obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
>   			time.o xen-asm.o xen-asm_$(BITS).o \
>   			grant-table.o suspend.o platform-pci-unplug.o \
> -			p2m.o
> +			p2m.o vnuma.o
>   
>   obj-$(CONFIG_EVENT_TRACING) += trace.o
>   
> diff --git a/arch/x86/xen/vnuma.c b/arch/x86/xen/vnuma.c
> new file mode 100644
> index 0000000..3c6c73f
> --- /dev/null
> +++ b/arch/x86/xen/vnuma.c
> @@ -0,0 +1,92 @@
> +#include <linux/err.h>
> +#include <linux/memblock.h>
> +#include <xen/interface/xen.h>
> +#include <xen/interface/memory.h>
> +#include <asm/xen/interface.h>
> +#include <asm/xen/hypercall.h>
> +#include <asm/xen/vnuma.h>
> +#ifdef CONFIG_NUMA
> +/* Xen PV NUMA topology initialization */
> +static unsigned int xen_vnuma_init = 0;
> +int xen_vnuma_support()
> +{
> +	return xen_vnuma_init;
> +}
> +int __init xen_numa_init(void)
> +{
> +	int rc;
> +	unsigned int i, j, cpu, idx, pcpus;
> +	u64 phys, physd, physc;
> +	unsigned int *vdistance, *cpu_to_node;

cpu_to_node may not be a particularly good name as there is a macro with
the same name in topology.h

> +	unsigned long mem_size, dist_size, cpu_to_node_size;
> +	struct vnuma_memarea *varea;
> +
> +	struct vnuma_topology_info numa_topo = {
> +		.domid = DOMID_SELF
> +	};
> +	rc = -EINVAL;
> +	if (!xen_pv_domain())
> +		return rc;

No need to set rc here, just return -EINVAL;

And please add spaces between lines to separate logical blocks a little.

> +	pcpus = num_possible_cpus();
> +	mem_size =  pcpus * sizeof(struct vnuma_memarea);
> +	dist_size = pcpus * pcpus * sizeof(*numa_topo.vdistance);
> +	cpu_to_node_size = pcpus * sizeof(*numa_topo.cpu_to_node);
> +	phys = memblock_alloc(mem_size, PAGE_SIZE);
> +	physd = memblock_alloc(dist_size, PAGE_SIZE);
> +	physc = memblock_alloc(cpu_to_node_size, PAGE_SIZE);
> +	if (!phys || !physc || !physd)
> +		goto vnumaout;
> +	varea = __va(phys);
> +	vdistance  = __va(physd);
> +	cpu_to_node  = __va(physc);
> +	set_xen_guest_handle(numa_topo.vmemarea, varea);
> +	set_xen_guest_handle(numa_topo.vdistance, vdistance);
> +	set_xen_guest_handle(numa_topo.cpu_to_node, cpu_to_node);
> +	rc = HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo);
> +	if (rc < 0)
> +		goto vnumaout;
> +	rc = -EINVAL;
> +	if (numa_topo.nr_nodes == 0) {
> +		/* will pass to dummy_numa_init */
> +		goto vnumaout;
> +	}
> +	if (numa_topo.nr_nodes > num_possible_cpus()) {
> +		pr_debug("vNUMA: Node without cpu is not supported in this version.\n");
> +		goto vnumaout;
> +	}
> +	/*
> +	 * NUMA nodes memory ranges are in pfns, constructed and
> +	 * aligned based on e820 ram domain map
> +	*/
> +	for (i = 0; i < numa_topo.nr_nodes; i++) {
> +		if (numa_add_memblk(i, varea[i].start, varea[i].end))
> +			/* pass to numa_dummy_init */
> +			goto vnumaout;
> +		node_set(i, numa_nodes_parsed);
> +	}
> +	setup_nr_node_ids();
> +	/* Setting the cpu, apicid to node */
> +	for_each_cpu(cpu, cpu_possible_mask) {
> +		set_apicid_to_node(cpu, cpu_to_node[cpu]);
> +		numa_set_node(cpu, cpu_to_node[cpu]);
> +		__apicid_to_node[cpu] = cpu_to_node[cpu];

Isn't this what set_apicid_to_node() above will do?

-boris

> +		cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
> +	}
> +	for (i = 0; i < numa_topo.nr_nodes; i++) {
> +		for (j = 0; j < numa_topo.nr_nodes; j++) {
> +			idx = (j * numa_topo.nr_nodes) + i;
> +			numa_set_distance(i, j, *(vdistance + idx));
> +		}
> +	}
> +	rc = 0;
> +	xen_vnuma_init = 1;
> +vnumaout:
> +	if (phys)
> +		memblock_free(__pa(phys), mem_size);
> +	if (physd)
> +		memblock_free(__pa(physd), dist_size);
> +	if (physc)
> +		memblock_free(__pa(physc), cpu_to_node_size);
> +	return rc;
> +}
> +#endif
> diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
> index 2ecfe4f..4237f51 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -263,4 +263,31 @@ struct xen_remove_from_physmap {
>   };
>   DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
>   
> +/* vNUMA structures */
> +struct vnuma_memarea {
> +	uint64_t start, end;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_memarea);
> +
> +struct vnuma_topology_info {
> +	/* OUT */
> +	domid_t domid;
> +	/* IN */
> +	uint16_t nr_nodes; /* number of virtual numa nodes */
> +	uint32_t _pad;
> +	/* distance table */
> +	GUEST_HANDLE(uint) vdistance;
> +	/* cpu mapping to vnodes */
> +	GUEST_HANDLE(uint) cpu_to_node;
> +	/*
> +	* array of numa memory areas constructed by Xen
> +	* where start and end are pfn numbers of the area
> +	* Xen takes into account domains e820 map
> +	*/
> +	GUEST_HANDLE(vnuma_memarea) vmemarea;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
> +
> +#define XENMEM_get_vnuma_info	25
> +
>   #endif /* __XEN_PUBLIC_MEMORY_H__ */

  parent reply	other threads:[~2013-09-17 14:21 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-17  8:33 [PATCH RFC v2 0/2] linux/vnuma: vNUMA PV guest support introduction Elena Ufimtseva
2013-09-17  8:34 ` [PATCH RFC v2 1/2] linux/vnuma: vNUMA for PV domu guest Elena Ufimtseva
2013-09-17 14:10   ` David Vrabel
2013-09-18  6:16     ` Elena Ufimtseva
2013-09-18  7:17       ` Dario Faggioli
2013-09-18  7:41         ` Elena Ufimtseva
2013-09-18 12:23       ` David Vrabel
2013-09-17 14:21   ` Boris Ostrovsky [this message]
2013-09-18  6:30     ` Elena Ufimtseva
2013-09-18  7:33       ` Dario Faggioli
2013-09-18  7:39         ` Elena Ufimtseva
2013-09-18 16:04   ` Dario Faggioli
2013-09-17  8:34 ` [PATCH RFC v2 2/2] linux/vnuma: Enables NUMA for domu PV guest Elena Ufimtseva
2013-09-17 14:17   ` David Vrabel
2013-09-17 14:37     ` Dario Faggioli
2013-09-18  6:32       ` Elena Ufimtseva
2013-09-18 15:14   ` Dario Faggioli
2013-09-27 17:03     ` Konrad Rzeszutek Wilk
2013-09-18 16:16 ` [PATCH RFC v2 0/2] linux/vnuma: vNUMA PV guest support introduction Dario Faggioli
2013-09-18 16:20   ` Elena Ufimtseva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5238657F.3040209@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=anddavid.vrabel@citrix.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=lccycc123@gmail.com \
    --cc=msw@amazon.com \
    --cc=ufimtseva@gmail.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).