linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
@ 2015-09-25 19:48 Denys Vlasenko
  2015-09-30 15:11 ` Thomas Gleixner
  2015-09-30 17:18 ` Jiang Liu
  0 siblings, 2 replies; 7+ messages in thread
From: Denys Vlasenko @ 2015-09-25 19:48 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Denys Vlasenko, Jiang Liu, Thomas Gleixner, Len Brown, x86,
	linux-kernel

Before this change MAX_LOCAL_APIC had the fixed value of 32*1024.
Such a big value causes several data arrays to be quite oversized:

phys_cpu_present_map is 4 kbytes (one bit per apic id),
__apicid_to_node[] is 64 kbytes,
apic_version[] is 128 kbytes.

On "usual" systems, APIC ids simply go from zero
to maximum logical CPU number, mirroring CPU ids.

On broken and unusual multi-socket systems
APIC ids can be non-contiguous.

This patch changes MAX_LOCAL_APIC definition as follows:

 = It is guaranteed to be at least 16.
 = If NR_CPUS > 16, then it's equal to NR_CPUS.
 = A new CONFIG_MAX_LAPIC_ID can be used to increase it
   (but not decrease).

MAX_IO_APICS was 128. This is a bit large too, making,
for example, ioapics[] array 9216 bytes big.

After this patch, MAX_IO_APICS is at least 8, at most 128.
If NR_CPUS is in this range, then MAX_IO_APICS = NR_CPUS.

apic_version[] array is changed from int to u8 -
APIC version values as of year 2015 are no larger than 0x1f
on all known CPUs.

A bit of code added to ensure that the statement
	apic_version[apicid] = version;
in generic_processor_info() is safe wrt bad values in both
'apicid' and 'version' variables.

This change reduces NR_CPUS=64 kernel's data size by 204661 bytes:

    text     data      bss       dec     hex filename
91353669 13825744 19021824 124201237 7672915 vmlinux.before
91353680 13760336 18882560 123996576 76409a0 vmlinux

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Ingo Molnar <mingo@kernel.org>
CC: Jiang Liu <jiang.liu@linux.intel.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Len Brown <len.brown@intel.com>
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
---
 arch/x86/Kconfig               | 11 +++++++++++
 arch/x86/include/asm/apicdef.h | 23 +++++++++++++++++------
 arch/x86/include/asm/mpspec.h  |  2 +-
 arch/x86/kernel/apic/apic.c    | 19 ++++++++++++++++++-
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 328c835..9e7c4c1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -872,6 +872,17 @@ config NR_CPUS
 	  This is purely to save memory - each supported CPU adds
 	  approximately eight kilobytes to the kernel image.
 
+config MAX_LAPIC_ID
+	int "Maximum APIC ID"
+	range 8 32768
+	default "8"
+	---help---
+	  Use this option to set maximum allowed Local APIC ID higher than
+	  maximum number of CPUs. This may be necessary for machines
+	  with large number of processor sockets and non-contiguous
+	  LAPIC numbering.
+	  This setting will be automatically rounded up, if necessary.
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index c46bb99..64e2476 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -147,15 +147,26 @@
 #define XAPIC_ENABLE	(1UL << 11)
 #define X2APIC_ENABLE	(1UL << 10)
 
-#ifdef CONFIG_X86_32
-# define MAX_IO_APICS 64
-# define MAX_LOCAL_APIC 256
-#else
-# define MAX_IO_APICS 128
-# define MAX_LOCAL_APIC 32768
+/*
+ * Allow non-contiguous APIC IDs for small machines:
+ * APIC ids 0..15 are valid in any config.
+ * Typical SMP machines have contiguous APIC IDs: 0..NR_CPUS-1.
+ * CONFIG_MAX_LAPIC_ID can override.
+ */
+#define MAX_LOCAL_APIC (NR_CPUS < 16 ? 16 : NR_CPUS)
+#if MAX_LOCAL_APIC < CONFIG_MAX_LAPIC_ID
+# undef  MAX_LOCAL_APIC
+# define MAX_LOCAL_APIC CONFIG_MAX_LAPIC_ID
 #endif
 
 /*
+ * Minimum is 8.
+ * For largish NR_CPUS, we expect to have no more IOAPICs than CPUs.
+ * No matter how large NR_CPUS is, max is 128.
+ */
+#define MAX_IO_APICS (NR_CPUS < 8 ? 8 : NR_CPUS < 128 ? NR_CPUS : 128)
+
+/*
  * All x86-64 systems are xAPIC compatible.
  * In the following, "apicid" is a physical APIC ID.
  */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index b07233b..8d0c2e6 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,7 +6,7 @@
 #include <asm/x86_init.h>
 #include <asm/apicdef.h>
 
-extern int apic_version[];
+extern u8 apic_version[];
 extern int pic_mode;
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24e94ce..f49a956 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1798,7 +1798,7 @@ void __init register_lapic_address(unsigned long address)
 	}
 }
 
-int apic_version[MAX_LOCAL_APIC];
+u8 apic_version[MAX_LOCAL_APIC];
 
 /*
  * Local APIC interrupts
@@ -2054,6 +2054,23 @@ int generic_processor_info(int apicid, int version)
 		return -EINVAL;
 	}
 
+	if ((unsigned)apicid >= ARRAY_SIZE(apic_version)) {
+		int thiscpu = max + disabled_cpus;
+		pr_warning("APIC: APIC id 0x%x is too large."
+			   " Processor %d ignored.\n",
+			   apicid, thiscpu);
+		disabled_cpus++;
+		return -EINVAL;
+	}
+	if ((unsigned)version > 255) {
+		int thiscpu = max + disabled_cpus;
+		pr_warning("APIC: APIC version 0x%x is too large."
+			   " Processor %d ignored.\n",
+			   version, thiscpu);
+		disabled_cpus++;
+		return -EINVAL;
+	}
+
 	num_processors++;
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
  2015-09-25 19:48 [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS Denys Vlasenko
@ 2015-09-30 15:11 ` Thomas Gleixner
  2015-09-30 15:49   ` Denys Vlasenko
  2015-09-30 17:18 ` Jiang Liu
  1 sibling, 1 reply; 7+ messages in thread
From: Thomas Gleixner @ 2015-09-30 15:11 UTC (permalink / raw)
  To: Denys Vlasenko; +Cc: Ingo Molnar, Jiang Liu, Len Brown, x86, linux-kernel

On Fri, 25 Sep 2015, Denys Vlasenko wrote:
>  
> +config MAX_LAPIC_ID
> +	int "Maximum APIC ID"
> +	range 8 32768
> +	default "8"
> +	---help---
> +	  Use this option to set maximum allowed Local APIC ID higher than
> +	  maximum number of CPUs. This may be necessary for machines
> +	  with large number of processor sockets and non-contiguous
> +	  LAPIC numbering.
> +	  This setting will be automatically rounded up, if necessary.

This is wrong. If you would limit the APIC IDs then you really break
stuff. You can only limit the number of APICs.

ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x02] enabled)

And that's not a really large machine..

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
  2015-09-30 15:11 ` Thomas Gleixner
@ 2015-09-30 15:49   ` Denys Vlasenko
  2015-09-30 17:03     ` Jiang Liu
  2015-09-30 17:43     ` Thomas Gleixner
  0 siblings, 2 replies; 7+ messages in thread
From: Denys Vlasenko @ 2015-09-30 15:49 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Ingo Molnar, Jiang Liu, Len Brown, x86, linux-kernel

On 09/30/2015 05:11 PM, Thomas Gleixner wrote:
> On Fri, 25 Sep 2015, Denys Vlasenko wrote:
>>  
>> +config MAX_LAPIC_ID
>> +	int "Maximum APIC ID"
>> +	range 8 32768
>> +	default "8"
>> +	---help---
>> +	  Use this option to set maximum allowed Local APIC ID higher than
>> +	  maximum number of CPUs. This may be necessary for machines
>> +	  with large number of processor sockets and non-contiguous
>> +	  LAPIC numbering.
>> +	  This setting will be automatically rounded up, if necessary.
> 
> This is wrong. If you would limit the APIC IDs then you really break
> stuff. You can only limit the number of APICs.

This CONFIG setting allows to _increase_ max APIC ID.

Check out this part of the patch:

+/*
+ * Allow non-contiguous APIC IDs for small machines:
+ * APIC ids 0..15 are valid in any config.
+ * Typical SMP machines have contiguous APIC IDs: 0..NR_CPUS-1.
+ * CONFIG_MAX_LAPIC_ID can override.
+ */
+#define MAX_LOCAL_APIC (NR_CPUS < 16 ? 16 : NR_CPUS)
+#if MAX_LOCAL_APIC < CONFIG_MAX_LAPIC_ID
+# undef  MAX_LOCAL_APIC
+# define MAX_LOCAL_APIC CONFIG_MAX_LAPIC_ID
 #endif


For example, if you'd build with NR_CPUS=128
(for example, Fedora kernels do that),
max accepted APIC id will be NR_CPUS-1 = 127
even if CONFIX_MAX_LAPIC_ID is 8.

If Fedora would want to support APIC ids up to
255, it will need to set CONFIG_MAX_LAPIC_ID=256.

Otherwise, if it's happy with "only" supporting up to 128,
it does not need to change CONFIG_MAX_LAPIC_ID from default.

With current kernels, max APIC id for any kernel is 32768,
which is in most cases way bigger than necessary.


Perhaps I need to update the text.
Something like:

- This setting will be automatically rounded up, if necessary
+ This setting will be increased to NR_CPUS, if necessary


> ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
> ACPI: LAPIC (acpi_id[0x01] lapic_id[0x02] enabled)

Does it mean that on a 2-CPU machine, CPU #1 has APIC_ID=2?

My patch will work fine for this machine,
with any CONFIG_MAX_LAPIC_ID.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
  2015-09-30 15:49   ` Denys Vlasenko
@ 2015-09-30 17:03     ` Jiang Liu
  2015-09-30 17:43     ` Thomas Gleixner
  1 sibling, 0 replies; 7+ messages in thread
From: Jiang Liu @ 2015-09-30 17:03 UTC (permalink / raw)
  To: Denys Vlasenko, Thomas Gleixner; +Cc: Ingo Molnar, Len Brown, x86, linux-kernel

On 2015/9/30 23:49, Denys Vlasenko wrote:
> On 09/30/2015 05:11 PM, Thomas Gleixner wrote:
>> On Fri, 25 Sep 2015, Denys Vlasenko wrote:
>>>  
>>> +config MAX_LAPIC_ID
>>> +	int "Maximum APIC ID"
>>> +	range 8 32768
>>> +	default "8"
>>> +	---help---
>>> +	  Use this option to set maximum allowed Local APIC ID higher than
>>> +	  maximum number of CPUs. This may be necessary for machines
>>> +	  with large number of processor sockets and non-contiguous
>>> +	  LAPIC numbering.
>>> +	  This setting will be automatically rounded up, if necessary.
>>
>> This is wrong. If you would limit the APIC IDs then you really break
>> stuff. You can only limit the number of APICs.
> 
> This CONFIG setting allows to _increase_ max APIC ID.
> 
> Check out this part of the patch:
> 
> +/*
> + * Allow non-contiguous APIC IDs for small machines:
> + * APIC ids 0..15 are valid in any config.
> + * Typical SMP machines have contiguous APIC IDs: 0..NR_CPUS-1.
> + * CONFIG_MAX_LAPIC_ID can override.
> + */
> +#define MAX_LOCAL_APIC (NR_CPUS < 16 ? 16 : NR_CPUS)
> +#if MAX_LOCAL_APIC < CONFIG_MAX_LAPIC_ID
> +# undef  MAX_LOCAL_APIC
> +# define MAX_LOCAL_APIC CONFIG_MAX_LAPIC_ID
>  #endif
> 
> 
> For example, if you'd build with NR_CPUS=128
> (for example, Fedora kernels do that),
> max accepted APIC id will be NR_CPUS-1 = 127
> even if CONFIX_MAX_LAPIC_ID is 8.
> 
> If Fedora would want to support APIC ids up to
> 255, it will need to set CONFIG_MAX_LAPIC_ID=256.
> 
> Otherwise, if it's happy with "only" supporting up to 128,
> it does not need to change CONFIG_MAX_LAPIC_ID from default.
> 
> With current kernels, max APIC id for any kernel is 32768,
> which is in most cases way bigger than necessary.
> 
> 
> Perhaps I need to update the text.
> Something like:
> 
> - This setting will be automatically rounded up, if necessary
> + This setting will be increased to NR_CPUS, if necessary
> 
> 
>> ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
>> ACPI: LAPIC (acpi_id[0x01] lapic_id[0x02] enabled)
> 
> Does it mean that on a 2-CPU machine, CPU #1 has APIC_ID=2?
Yes, APIC IDs are assigned by BIOS and may not be continuous.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
  2015-09-25 19:48 [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS Denys Vlasenko
  2015-09-30 15:11 ` Thomas Gleixner
@ 2015-09-30 17:18 ` Jiang Liu
  1 sibling, 0 replies; 7+ messages in thread
From: Jiang Liu @ 2015-09-30 17:18 UTC (permalink / raw)
  To: Denys Vlasenko, Ingo Molnar; +Cc: Thomas Gleixner, Len Brown, x86, linux-kernel

On 2015/9/26 3:48, Denys Vlasenko wrote:
> Before this change MAX_LOCAL_APIC had the fixed value of 32*1024.
> Such a big value causes several data arrays to be quite oversized:
> 
> phys_cpu_present_map is 4 kbytes (one bit per apic id),
> __apicid_to_node[] is 64 kbytes,
> apic_version[] is 128 kbytes.
> 
> On "usual" systems, APIC ids simply go from zero
> to maximum logical CPU number, mirroring CPU ids.
> 
> On broken and unusual multi-socket systems
> APIC ids can be non-contiguous.
> 
> This patch changes MAX_LOCAL_APIC definition as follows:
> 
>  = It is guaranteed to be at least 16.
>  = If NR_CPUS > 16, then it's equal to NR_CPUS.
>  = A new CONFIG_MAX_LAPIC_ID can be used to increase it
>    (but not decrease).
> 
> MAX_IO_APICS was 128. This is a bit large too, making,
> for example, ioapics[] array 9216 bytes big.
> 
> After this patch, MAX_IO_APICS is at least 8, at most 128.
> If NR_CPUS is in this range, then MAX_IO_APICS = NR_CPUS.
> 
> apic_version[] array is changed from int to u8 -
> APIC version values as of year 2015 are no larger than 0x1f
> on all known CPUs.
> 
> A bit of code added to ensure that the statement
> 	apic_version[apicid] = version;
> in generic_processor_info() is safe wrt bad values in both
> 'apicid' and 'version' variables.
> 
> This change reduces NR_CPUS=64 kernel's data size by 204661 bytes:
> 
>     text     data      bss       dec     hex filename
> 91353669 13825744 19021824 124201237 7672915 vmlinux.before
> 91353680 13760336 18882560 123996576 76409a0 vmlinux
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Ingo Molnar <mingo@kernel.org>
> CC: Jiang Liu <jiang.liu@linux.intel.com>
> CC: Thomas Gleixner <tglx@linutronix.de>
> CC: Len Brown <len.brown@intel.com>
> CC: x86@kernel.org
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/Kconfig               | 11 +++++++++++
>  arch/x86/include/asm/apicdef.h | 23 +++++++++++++++++------
>  arch/x86/include/asm/mpspec.h  |  2 +-
>  arch/x86/kernel/apic/apic.c    | 19 ++++++++++++++++++-
>  4 files changed, 47 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 328c835..9e7c4c1 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -872,6 +872,17 @@ config NR_CPUS
>  	  This is purely to save memory - each supported CPU adds
>  	  approximately eight kilobytes to the kernel image.
>  
> +config MAX_LAPIC_ID
> +	int "Maximum APIC ID"
> +	range 8 32768
> +	default "8"
> +	---help---
> +	  Use this option to set maximum allowed Local APIC ID higher than
> +	  maximum number of CPUs. This may be necessary for machines
> +	  with large number of processor sockets and non-contiguous
> +	  LAPIC numbering.
> +	  This setting will be automatically rounded up, if necessary.
> +
>  config SCHED_SMT
>  	bool "SMT (Hyperthreading) scheduler support"
>  	depends on SMP
> diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
> index c46bb99..64e2476 100644
> --- a/arch/x86/include/asm/apicdef.h
> +++ b/arch/x86/include/asm/apicdef.h
> @@ -147,15 +147,26 @@
>  #define XAPIC_ENABLE	(1UL << 11)
>  #define X2APIC_ENABLE	(1UL << 10)
>  
> -#ifdef CONFIG_X86_32
> -# define MAX_IO_APICS 64
> -# define MAX_LOCAL_APIC 256
> -#else
> -# define MAX_IO_APICS 128
> -# define MAX_LOCAL_APIC 32768
> +/*
> + * Allow non-contiguous APIC IDs for small machines:
> + * APIC ids 0..15 are valid in any config.
> + * Typical SMP machines have contiguous APIC IDs: 0..NR_CPUS-1.
> + * CONFIG_MAX_LAPIC_ID can override.
> + */
> +#define MAX_LOCAL_APIC (NR_CPUS < 16 ? 16 : NR_CPUS)
> +#if MAX_LOCAL_APIC < CONFIG_MAX_LAPIC_ID
> +# undef  MAX_LOCAL_APIC
> +# define MAX_LOCAL_APIC CONFIG_MAX_LAPIC_ID
>  #endif
>  
>  /*
> + * Minimum is 8.
> + * For largish NR_CPUS, we expect to have no more IOAPICs than CPUs.
> + * No matter how large NR_CPUS is, max is 128.
> + */
> +#define MAX_IO_APICS (NR_CPUS < 8 ? 8 : NR_CPUS < 128 ? NR_CPUS : 128)
This is a little risky. For example, a typical eight-socket Intel
platform will have nine IOAPICs. IO devices may get inaccessible
if some IOAPICs are ignored due to MAX_IO_APICS limitation. It's
a surprising if IO devices get lost if user runs a kernel built with
low NR_CPUS.
Thanks!
Gerry

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
  2015-09-30 15:49   ` Denys Vlasenko
  2015-09-30 17:03     ` Jiang Liu
@ 2015-09-30 17:43     ` Thomas Gleixner
  1 sibling, 0 replies; 7+ messages in thread
From: Thomas Gleixner @ 2015-09-30 17:43 UTC (permalink / raw)
  To: Denys Vlasenko; +Cc: Ingo Molnar, Jiang Liu, Len Brown, x86, linux-kernel

On Wed, 30 Sep 2015, Denys Vlasenko wrote:
> On 09/30/2015 05:11 PM, Thomas Gleixner wrote:
> > On Fri, 25 Sep 2015, Denys Vlasenko wrote:
> >>  
> >> +config MAX_LAPIC_ID
> >> +	int "Maximum APIC ID"
> >> +	range 8 32768
> >> +	default "8"
> >> +	---help---
> >> +	  Use this option to set maximum allowed Local APIC ID higher than
> >> +	  maximum number of CPUs. This may be necessary for machines
> >> +	  with large number of processor sockets and non-contiguous
> >> +	  LAPIC numbering.
> >> +	  This setting will be automatically rounded up, if necessary.
> > 
> > This is wrong. If you would limit the APIC IDs then you really break
> > stuff. You can only limit the number of APICs.
> 
> This CONFIG setting allows to _increase_ max APIC ID.

NO. This is crap. I don't want to tweak a gazillion of knobs just to
build a kernel with CONFIG_NR_CPUS=8. Really not.

If you really want to make that space saving, then make it a runtime
allocation.
 
Thanks,

	tglx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS
@ 2015-10-02  7:31 Daniel J Blueman
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel J Blueman @ 2015-10-02  7:31 UTC (permalink / raw)
  To: Denys Vlasenko, Thomas Gleixner, Jiang Liu, Len Brown
  Cc: the arch/x86 maintainers, Linux Kernel Mailing List,
	Steffen Persvold

On Saturday, September 26, 2015 at 4:40:07 AM UTC+8, Denys Vlasenko 
wrote:
 > Before this change MAX_LOCAL_APIC had the fixed value of 32*1024.
 > Such a big value causes several data arrays to be quite oversized:
 >
 > phys_cpu_present_map is 4 kbytes (one bit per apic id),
 > __apicid_to_node[] is 64 kbytes,
 > apic_version[] is 128 kbytes.
 >
 > On "usual" systems, APIC ids simply go from zero
 > to maximum logical CPU number, mirroring CPU ids.
 >
 > On broken and unusual multi-socket systems
 > APIC ids can be non-contiguous.

The Intel x2APIC spec states the upper 16-bits of APIC ID is the 
cluster ID [1, p2-12], intended for future distributed systems. Beyond 
the legacy 8-bit APIC ID, Numascale NumaConnect uses 4-bits for the 
position of a server on each axis of a multi-dimension torus; SGI 
NUMAlink also structures the APIC ID space.

Instead, define an array based on NR_CPUs to achieve a 1:1 mapping and 
perform linear search; this addresses the binary bloat and the present 
artificial APIC ID limits. With CONFIG_NR_CPUS=256:

$ size vmlinux vmlinux-patched
  text    data     bss     dec     hex filename
18232877        1849656 2281472 22364005        1553f65 vmlinux
18233034        1786168 2281472 22300674        1544802 vmlinux-patched

Works peachy on a 256-core system with a 20-bit APIC ID space, and on a 
48-core legacy 8-bit APIC ID system. If we care, I can make 
numa_cpu_node O(1) lookup for typical cases.

Signed-off-by: Daniel J Blueman <daniel@numascale.com>

Daniel

[1] 
http://www.intel.com/content/dam/doc/specification-update/64-architecture-x2apic-specification.pdf

---
arch/x86/include/asm/numa.h | 13 +++++++------
arch/x86/kernel/cpu/amd.c   |  8 ++++----
arch/x86/mm/numa.c          | 31 +++++++++++++++++++++++--------
3 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 01b493e..33becb8 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -17,6 +17,11 @@
 */
#define NODE_MIN_SIZE (4*1024*1024)

+struct apicid_to_node {
+       int apicid;
+       s16 node;
+};
+
extern int numa_off;

/*
@@ -27,17 +32,13 @@ extern int numa_off;
 * should be accessed by the accessors - set_apicid_to_node() and
 * numa_cpu_node().
 */
-extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+extern struct apicid_to_node __apicid_to_node[NR_CPUS];
extern nodemask_t numa_nodes_parsed __initdata;

extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern void __init numa_set_distance(int from, int to, int distance);

-static inline void set_apicid_to_node(int apicid, s16 node)
-{
-       __apicid_to_node[apicid] = node;
-}
-
+extern void set_apicid_to_node(int apicid, s16 node);
extern int numa_cpu_node(int cpu);

#else  /* CONFIG_NUMA */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4a70fc6..e65c01c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -277,12 +277,12 @@ static int nearby_node(int apicid)
       int i, node;

       for (i = apicid - 1; i >= 0; i--) {
-               node = __apicid_to_node[i];
+               node = __apicid_to_node[i].node;
               if (node != NUMA_NO_NODE && node_online(node))
                       return node;
       }
       for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
-               node = __apicid_to_node[i];
+               node = __apicid_to_node[i].node;
               if (node != NUMA_NO_NODE && node_online(node))
                       return node;
       }
@@ -422,8 +422,8 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
               int ht_nodeid = c->initial_apicid;

               if (ht_nodeid >= 0 &&
-                   __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-                       node = __apicid_to_node[ht_nodeid];
+                   __apicid_to_node[ht_nodeid].node != NUMA_NO_NODE)
+                       node = __apicid_to_node[ht_nodeid].node;
               /* Pick a nearby node */
               if (!node_online(node))
                       node = nearby_node(apicid);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index c3b3f65..70f03a0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -56,16 +56,34 @@ early_param("numa", numa_setup);
/*
 * apicid, cpu, node mappings
 */
-s16 __apicid_to_node[MAX_LOCAL_APIC] = {
-       [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+
+struct apicid_to_node __apicid_to_node[NR_CPUS] = {
+       [0 ... NR_CPUS-1] = {-1, NUMA_NO_NODE}
};

+void set_apicid_to_node(int apicid, s16 node)
+{
+       static int ent;
+
+       /* Protect against small kernel on large system */
+       if (ent >= NR_CPUS)
+               return;
+
+       __apicid_to_node[ent].apicid = apicid;
+       __apicid_to_node[ent].node = node;
+       ent++;
+}
+
int numa_cpu_node(int cpu)
{
-       int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+       int ent, apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+       if (apicid == BAD_APICID)
+               return NUMA_NO_NODE;
+
+       for (ent = 0; ent < NR_CPUS; ent++)
+               if (__apicid_to_node[ent].apicid == apicid)
+                       return __apicid_to_node[ent].node;

-       if (apicid != BAD_APICID)
-               return __apicid_to_node[apicid];
       return NUMA_NO_NODE;
}

@@ -607,9 +625,6 @@ static int __init numa_init(int (*init_func)(void))
       int i;
       int ret;

-       for (i = 0; i < MAX_LOCAL_APIC; i++)
-               set_apicid_to_node(i, NUMA_NO_NODE);
-
       nodes_clear(numa_nodes_parsed);
       nodes_clear(node_possible_map);
       nodes_clear(node_online_map);



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-10-02  7:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-25 19:48 [PATCH RFC] x86: Reduce MAX_LOCAL_APIC and MAX_IO_APICS Denys Vlasenko
2015-09-30 15:11 ` Thomas Gleixner
2015-09-30 15:49   ` Denys Vlasenko
2015-09-30 17:03     ` Jiang Liu
2015-09-30 17:43     ` Thomas Gleixner
2015-09-30 17:18 ` Jiang Liu
  -- strict thread matches above, loose matches on Subject: below --
2015-10-02  7:31 Daniel J Blueman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).