From mboxrd@z Thu Jan 1 00:00:00 1970 From: Waiman Long Subject: [PATCH v2 1/5] x86/smp: Add saturated +1/+2 1-byte cpu numbers Date: Thu, 16 Jul 2020 15:29:23 -0400 Message-ID: <20200716192927.12944-2-longman@redhat.com> References: <20200716192927.12944-1-longman@redhat.com> Return-path: In-Reply-To: <20200716192927.12944-1-longman@redhat.com> Sender: linux-kernel-owner@vger.kernel.org To: Peter Zijlstra , Ingo Molnar , Will Deacon , Thomas Gleixner , Borislav Petkov , Arnd Bergmann Cc: linux-kernel@vger.kernel.org, x86@kernel.org, linux-arch@vger.kernel.org, Nicholas Piggin , Davidlohr Bueso , Waiman Long List-Id: linux-arch.vger.kernel.org Both qspinlock and qrwlock use one whole byte to store the binary lock/unlock state. We can actually store more information in the lock byte like an encoded lock holder cpu number to aid debugging and crash dump analysis. To make that possible, a saturated +1 and +2 1-byte per-cpu cpu numbers are added. The qrwlock can use the +1 number for the lock holding writer and the qspinlock can use the +2 number for the lock holder. The new per-cpu numbers are placed right after the commonly used cpu_number (smp_processor_id()) which has more 1700 references in the kernel. Therefore these new cpu numbers are very likely to be located in the same hot cacheline as cpu_number. As these numbers are before the unsigned long this_cpu_off, no additional percpu space will be consumed in x86-64. Signed-off-by: Waiman Long --- arch/x86/include/asm/spinlock.h | 5 +++++ arch/x86/kernel/setup_percpu.c | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 5b6bc7016c22..319fa58caa9b 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -10,6 +10,11 @@ #include #include +DECLARE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd1); +DECLARE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd2); +#define __cpu_number_sadd1 this_cpu_read(cpu_number_sadd1) +#define __cpu_number_sadd2 this_cpu_read(cpu_number_sadd2) + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere * diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index fd945ce78554..859c5b950d08 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -26,6 +26,14 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +/* + * Saturated +1 and +2 1-byte cpu numbers + */ +DEFINE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd1); /* +1 saturated cpu# */ +DEFINE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd2); /* +2 saturated cpu# */ +EXPORT_PER_CPU_SYMBOL(cpu_number_sadd1); +EXPORT_PER_CPU_SYMBOL(cpu_number_sadd2); + #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else @@ -223,6 +231,9 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + per_cpu(cpu_number_sadd1, cpu) = (cpu + 1 < 0x100) ? cpu + 1 : 0xff; + per_cpu(cpu_number_sadd2, cpu) = (cpu + 2 < 0x100) ? cpu + 2 : 0xff; + setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); /* -- 2.18.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-1.mimecast.com ([207.211.31.81]:59297 "EHLO us-smtp-delivery-1.mimecast.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1729136AbgGPTbA (ORCPT ); Thu, 16 Jul 2020 15:31:00 -0400 From: Waiman Long Subject: [PATCH v2 1/5] x86/smp: Add saturated +1/+2 1-byte cpu numbers Date: Thu, 16 Jul 2020 15:29:23 -0400 Message-ID: <20200716192927.12944-2-longman@redhat.com> In-Reply-To: <20200716192927.12944-1-longman@redhat.com> References: <20200716192927.12944-1-longman@redhat.com> Sender: linux-arch-owner@vger.kernel.org List-ID: To: Peter Zijlstra , Ingo Molnar , Will Deacon , Thomas Gleixner , Borislav Petkov , Arnd Bergmann Cc: linux-kernel@vger.kernel.org, x86@kernel.org, linux-arch@vger.kernel.org, Nicholas Piggin , Davidlohr Bueso , Waiman Long Message-ID: <20200716192923.-QSygXLJf0P5jJd1teclwlOx41yxPEC1ULp1PThss34@z> Both qspinlock and qrwlock use one whole byte to store the binary lock/unlock state. We can actually store more information in the lock byte like an encoded lock holder cpu number to aid debugging and crash dump analysis. To make that possible, a saturated +1 and +2 1-byte per-cpu cpu numbers are added. The qrwlock can use the +1 number for the lock holding writer and the qspinlock can use the +2 number for the lock holder. The new per-cpu numbers are placed right after the commonly used cpu_number (smp_processor_id()) which has more 1700 references in the kernel. Therefore these new cpu numbers are very likely to be located in the same hot cacheline as cpu_number. As these numbers are before the unsigned long this_cpu_off, no additional percpu space will be consumed in x86-64. Signed-off-by: Waiman Long --- arch/x86/include/asm/spinlock.h | 5 +++++ arch/x86/kernel/setup_percpu.c | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 5b6bc7016c22..319fa58caa9b 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -10,6 +10,11 @@ #include #include +DECLARE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd1); +DECLARE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd2); +#define __cpu_number_sadd1 this_cpu_read(cpu_number_sadd1) +#define __cpu_number_sadd2 this_cpu_read(cpu_number_sadd2) + /* * Your basic SMP spinlocks, allowing only a single CPU anywhere * diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index fd945ce78554..859c5b950d08 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -26,6 +26,14 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +/* + * Saturated +1 and +2 1-byte cpu numbers + */ +DEFINE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd1); /* +1 saturated cpu# */ +DEFINE_PER_CPU_READ_MOSTLY(u8, cpu_number_sadd2); /* +2 saturated cpu# */ +EXPORT_PER_CPU_SYMBOL(cpu_number_sadd1); +EXPORT_PER_CPU_SYMBOL(cpu_number_sadd2); + #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else @@ -223,6 +231,9 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + per_cpu(cpu_number_sadd1, cpu) = (cpu + 1 < 0x100) ? cpu + 1 : 0xff; + per_cpu(cpu_number_sadd2, cpu) = (cpu + 2 < 0x100) ? cpu + 2 : 0xff; + setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); /* -- 2.18.1