All of lore.kernel.org
 help / color / mirror / Atom feed
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: Chao Peng <chao.p.peng@linux.intel.com>, xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, andrew.cooper3@citrix.com,
	dario.faggioli@citrix.com, Ian.Jackson@eu.citrix.com,
	will.auld@intel.com, JBeulich@suse.com, wei.liu2@citrix.com,
	dgdegra@tycho.nsa.gov
Subject: Re: [PATCH v10 01/13] x86: add socket_cpumask
Date: Tue, 07 Jul 2015 18:32:55 -0400	[thread overview]
Message-ID: <559C5397.4090606@oracle.com> (raw)
In-Reply-To: <1435308227-30586-2-git-send-email-chao.p.peng@linux.intel.com>

On 06/26/2015 04:43 AM, Chao Peng wrote:
> Maintain socket_cpumask which contains all the HT and core siblings
> in the same socket.
>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> ---
> Changes in v9:
> * Add comments for set_nr_sockets.
> * Move set_nr_sockets() invocation from __start_xen() to smp_prepare_cpus().
> Changes in v8:
> * Remove total_cpus and retrofit the algorithm for calculating nr_sockets.
> * Change per-socket cpumask allocation as on demand.
> * socket_to_cpumask => socket_cpumask.
> Changes in v7:
> * Introduce total_cpus to calculate nr_sockets.
> * Minor code sequence improvement in set_cpu_sibling_map.
> * Improve comments for nr_sockets.
> ---
>   xen/arch/x86/mpparse.c    | 17 +++++++++++++++++
>   xen/arch/x86/smpboot.c    | 26 +++++++++++++++++++++++++-
>   xen/include/asm-x86/smp.h | 11 +++++++++++
>   3 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
> index 003c56e..8609f4a 100644
> --- a/xen/arch/x86/mpparse.c
> +++ b/xen/arch/x86/mpparse.c
> @@ -87,6 +87,23 @@ void __init set_nr_cpu_ids(unsigned int max_cpus)
>   #endif
>   }
>   
> +void __init set_nr_sockets(void)
> +{
> +    /*
> +     * Count the actual cpus in the socket 0 and use it to calculate nr_sockets
> +     * so that the latter will be always >= the actual socket number in the
> +     * system even when APIC IDs from MP table are too sparse.
> +     */
> +    unsigned int cpus = bitmap_weight(phys_cpu_present_map.mask,
> +                                      boot_cpu_data.x86_max_cores *
> +                                      boot_cpu_data.x86_num_siblings);
> +
> +    if ( cpus == 0 )
> +        cpus = 1;
> +
> +    nr_sockets = DIV_ROUND_UP(num_processors + disabled_cpus, cpus);
> +}
> +
>   /*
>    * Intel MP BIOS table parsing routines:
>    */
> diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
> index 2289284..e75bbd3 100644
> --- a/xen/arch/x86/smpboot.c
> +++ b/xen/arch/x86/smpboot.c
> @@ -60,6 +60,9 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask);
>   cpumask_t cpu_online_map __read_mostly;
>   EXPORT_SYMBOL(cpu_online_map);
>   
> +unsigned int __read_mostly nr_sockets;
> +cpumask_var_t *__read_mostly socket_cpumask;
> +
>   struct cpuinfo_x86 cpu_data[NR_CPUS];
>   
>   u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
> @@ -245,6 +248,8 @@ static void set_cpu_sibling_map(int cpu)
>   
>       cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
>   
> +    cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);

This patch crashes Xen on my 32-cpu Intel box here for cpu 16, which is 
the first CPU on the second socket (i.e. on socket 1).

The reason appears to be that cpu_to_socket(16) is (correctly) 1 here, 
but ...

> +
>       if ( c[cpu].x86_num_siblings > 1 )
>       {
>           for_each_cpu ( i, &cpu_sibling_setup_map )
> @@ -649,7 +654,13 @@ void cpu_exit_clear(unsigned int cpu)
>   
>   static void cpu_smpboot_free(unsigned int cpu)
>   {
> -    unsigned int order;
> +    unsigned int order, socket = cpu_to_socket(cpu);
> +
> +    if ( cpumask_empty(socket_cpumask[socket]) )
> +    {
> +        free_cpumask_var(socket_cpumask[socket]);
> +        socket_cpumask[socket] = NULL;
> +    }
>   
>       free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
>       free_cpumask_var(per_cpu(cpu_core_mask, cpu));
> @@ -694,6 +705,7 @@ static int cpu_smpboot_alloc(unsigned int cpu)
>       nodeid_t node = cpu_to_node(cpu);
>       struct desc_struct *gdt;
>       unsigned long stub_page;
> +    unsigned int socket = cpu_to_socket(cpu);

... is zero here, meaning that socket_cpumask[1] is NULL. I suspect that 
phys_proc_id is probably not set at this point but is by the time we get 
to set_cpu_sibling_map(). I haven't looked any further yet. I might do 
this tomorrow unless Chao does it before me.

Here is what I have

[root@ovs104 ~]# numactl --hardware
available: 2 nodes (0-1)
node 0 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
node 0 size: 15748 MB
node 0 free: 15466 MB
node 1 cpus: 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31
node 1 size: 16118 MB
node 1 free: 15896 MB
node distances:
node   0   1
   0:  10  21
   1:  21  10
[root@ovs104 ~]# grep processor /proc/cpuinfo |wc -l
32
[root@ovs104 ~]# head -20 /proc/cpuinfo
processor    : 0
vendor_id    : GenuineIntel
cpu family    : 6
model        : 45
model name    : Genuine Intel(R) CPU  @ 2.30GHz
stepping    : 2
microcode    : 0x8000020c
cpu MHz        : 1231.937
cache size    : 20480 KB
physical id    : 0
siblings    : 16
core id        : 0
cpu cores    : 8
apicid        : 0
initial apicid    : 0
fpu        : yes
fpu_exception    : yes
cpuid level    : 13
wp        : yes
flags        : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca 
cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall 
nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl 
xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor 
ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 
x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm ida arat epb pln 
pts dtherm tpr_shadow vnmi flexpriority ept vpid xsaveopt

-boris


>   
>       if ( node != NUMA_NO_NODE )
>           memflags = MEMF_node(node);
> @@ -736,6 +748,10 @@ static int cpu_smpboot_alloc(unsigned int cpu)
>           goto oom;
>       per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu);
>   
> +    if ( !socket_cpumask[socket] &&
> +         !zalloc_cpumask_var(socket_cpumask + socket) )
> +        goto oom;
> +
>       if ( zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) &&
>            zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) )
>           return 0;
> @@ -786,6 +802,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>   
>       stack_base[0] = stack_start;
>   
> +    set_nr_sockets();
> +
> +    socket_cpumask = xzalloc_array(cpumask_var_t, nr_sockets);
> +    if ( !socket_cpumask || !zalloc_cpumask_var(socket_cpumask) )
> +        panic("No memory for socket CPU siblings map");
> +
>       if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
>            !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) )
>           panic("No memory for boot CPU sibling/core maps");
> @@ -851,6 +873,8 @@ remove_siblinginfo(int cpu)
>       int sibling;
>       struct cpuinfo_x86 *c = cpu_data;
>   
> +    cpumask_clear_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
> +
>       for_each_cpu ( sibling, per_cpu(cpu_core_mask, cpu) )
>       {
>           cpumask_clear_cpu(cpu, per_cpu(cpu_core_mask, sibling));
> diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
> index 67518cf..e594062 100644
> --- a/xen/include/asm-x86/smp.h
> +++ b/xen/include/asm-x86/smp.h
> @@ -58,6 +58,17 @@ int hard_smp_processor_id(void);
>   
>   void __stop_this_cpu(void);
>   
> +/*
> + * The value may be greater than the actual socket number in the system and
> + * is required not to change from the initial startup.
> + */
> +extern unsigned int nr_sockets;
> +
> +void set_nr_sockets(void);
> +
> +/* Representing HT and core siblings in each socket. */
> +extern cpumask_var_t *socket_cpumask;
> +
>   #endif /* !__ASSEMBLY__ */
>   
>   #endif

  reply	other threads:[~2015-07-07 22:32 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-26  8:43 [PATCH v10 00/13] enable Cache Allocation Technology (CAT) for VMs Chao Peng
2015-06-26  8:43 ` [PATCH v10 01/13] x86: add socket_cpumask Chao Peng
2015-07-07 22:32   ` Boris Ostrovsky [this message]
2015-07-08  2:43     ` Chao Peng
2015-07-08  7:42       ` Jan Beulich
2015-06-26  8:43 ` [PATCH v10 02/13] x86: detect and initialize Intel CAT feature Chao Peng
2015-07-07 10:25   ` Jan Beulich
2015-07-08  2:24     ` Chao Peng
2015-06-26  8:43 ` [PATCH v10 03/13] x86: maintain COS to CBM mapping for each socket Chao Peng
2015-06-26  8:43 ` [PATCH v10 04/13] x86: add COS information for each domain Chao Peng
2015-06-26  8:43 ` [PATCH v10 05/13] x86: expose CBM length and COS number information Chao Peng
2015-06-26  8:43 ` [PATCH v10 06/13] x86: dynamically get/set CBM for a domain Chao Peng
2015-06-26  8:43 ` [PATCH v10 07/13] x86: add scheduling support for Intel CAT Chao Peng
2015-06-26  8:43 ` [PATCH v10 08/13] xsm: add CAT related xsm policies Chao Peng
2015-06-26  8:43 ` [PATCH v10 09/13] tools/libxl: minor name changes for CMT commands Chao Peng
2015-06-26  8:43 ` [PATCH v10 10/13] tools/libxl: add command to show PSR hardware info Chao Peng
2015-06-26  8:43 ` [PATCH v10 11/13] tools/libxl: introduce some socket helpers Chao Peng
2015-06-26  8:43 ` [PATCH v10 12/13] tools: add tools support for Intel CAT Chao Peng
2015-06-26  8:43 ` [PATCH v10 13/13] docs: add xl-psr.markdown Chao Peng
2015-07-07 14:46 ` [PATCH v10 00/13] enable Cache Allocation Technology (CAT) for VMs Ian Campbell
2015-07-08  9:40   ` Chao Peng
2015-07-08 10:02     ` Wei Liu
2015-07-09  1:29       ` Chao Peng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=559C5397.4090606@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=dario.faggioli@citrix.com \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=keir@xen.org \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=will.auld@intel.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.