* [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
@ 2020-09-03 7:01 Alex Shi
2020-09-03 7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
` (3 more replies)
0 siblings, 4 replies; 21+ messages in thread
From: Alex Shi @ 2020-09-03 7:01 UTC (permalink / raw)
To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
Vlastimil Babka, Alexander Duyck
Cc: Andrew Morton, Mel Gorman, linux-mm, linux-kernel
pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.
If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief the false sharing in cmpxchg.
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
include/linux/mmzone.h | 6 +++---
include/linux/pageblock-flags.h | 2 +-
mm/page_alloc.c | 38 +++++++++++++++++++-------------------
3 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8379432f4f2f..be676e659fb7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -437,7 +437,7 @@ struct zone {
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
* In SPARSEMEM, this map is stored in struct mem_section
*/
- unsigned long *pageblock_flags;
+ unsigned char *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
#endif
/* See declaration of similar field in struct zone */
- unsigned long pageblock_flags[0];
+ unsigned char pageblock_flags[0];
};
void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
{
return ms->usage->pageblock_flags;
}
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
/* Forward declaration */
struct page;
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fab5e97dc9ca..81e96d4d9c42 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
#endif
/* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
unsigned long pfn)
{
#ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
* Return: pageblock_bits flags
*/
static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long word;
+ unsigned char *bitmap;
+ unsigned long bitidx, byte_bitidx;
+ unsigned char byte;
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ byte_bitidx = bitidx / BITS_PER_BYTE;
+ bitidx &= (BITS_PER_BYTE-1);
- word = bitmap[word_bitidx];
- return (word >> bitidx) & mask;
+ byte = bitmap[byte_bitidx];
+ return (byte >> bitidx) & mask;
}
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
unsigned long mask)
{
return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,29 +513,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long pfn,
unsigned long mask)
{
- unsigned long *bitmap;
- unsigned long bitidx, word_bitidx;
- unsigned long old_word, word;
+ unsigned char *bitmap;
+ unsigned long bitidx, byte_bitidx;
+ unsigned char old_byte, byte;
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
- word_bitidx = bitidx / BITS_PER_LONG;
- bitidx &= (BITS_PER_LONG-1);
+ byte_bitidx = bitidx / BITS_PER_BYTE;
+ bitidx &= (BITS_PER_BYTE-1);
VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
mask <<= bitidx;
flags <<= bitidx;
- word = READ_ONCE(bitmap[word_bitidx]);
+ byte = READ_ONCE(bitmap[byte_bitidx]);
for (;;) {
- old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
- if (word == old_word)
+ old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+ if (byte == old_byte)
break;
- word = old_word;
+ byte = old_byte;
}
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags 2020-09-03 7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi @ 2020-09-03 7:01 ` Alex Shi 2020-09-03 7:01 ` Alex Shi ` (2 subsequent siblings) 3 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 7:01 UTC (permalink / raw) To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck Cc: Andrew Morton, Mel Gorman, linux-kernel, linux-mm Current pageblock_flags is only 4 bits, so it has to share a char size in cmpxchg when get set, the false sharing cause perf drop. If we incrase the bits up to 8, false sharing would gone in cmpxchg. and the only cost is half char per pageblock, which is half char per 128MB on x86, 4 chars in 1 GB. Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org --- include/linux/page-isolation.h | 2 +- include/linux/pageblock-flags.h | 2 +- mm/page_alloc.c | 10 +++------- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 572458016331..baa2e1ecc134 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -35,7 +35,7 @@ static inline bool is_migrate_isolate(int migratetype) struct page *has_unmovable_pages(struct zone *zone, struct page *page, int migratetype, int flags); -void set_pageblock_migratetype(struct page *page, int migratetype); +void set_pageblock_migratetype(struct page *page, unsigned char migratetype); int move_freepages_block(struct zone *zone, struct page *page, int migratetype, int *num_movable); diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index d189441568eb..f785c9d6d68c 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,7 +25,7 @@ enum pageblock_bits { * Assume the bits will always align on a word. If this assumption * changes then get/set pageblock needs updating. */ - NR_PAGEBLOCK_BITS + NR_PAGEBLOCK_BITS = BITS_PER_BYTE }; #ifdef CONFIG_HUGETLB_PAGE diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 81e96d4d9c42..3688e6b83318 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -517,19 +517,15 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long bitidx, byte_bitidx; unsigned char old_byte, byte; - BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE); BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits)); bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); byte_bitidx = bitidx / BITS_PER_BYTE; - bitidx &= (BITS_PER_BYTE-1); VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); - mask <<= bitidx; - flags <<= bitidx; - byte = READ_ONCE(bitmap[byte_bitidx]); for (;;) { old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags); @@ -539,13 +535,13 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, } } -void set_pageblock_migratetype(struct page *page, int migratetype) +void set_pageblock_migratetype(struct page *page, unsigned char migratetype) { if (unlikely(page_group_by_mobility_disabled && migratetype < MIGRATE_PCPTYPES)) migratetype = MIGRATE_UNMOVABLE; - set_pfnblock_flags_mask(page, (unsigned long)migratetype, + set_pfnblock_flags_mask(page, migratetype, page_to_pfn(page), MIGRATETYPE_MASK); } -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue 2020-09-03 7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi 2020-09-03 7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi @ 2020-09-03 7:01 ` Alex Shi 2020-09-03 7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman 2020-09-03 8:19 ` David Hildenbrand 3 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 7:01 UTC (permalink / raw) To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck Cc: Chris Zankel, Rich Felker, Yoshinori Sato, linux-sh, linux-xtensa, Russell King, linux-kernel, linux-mm, Max Filippov, Baolin Wang, sparclinux, Andrew Morton, David S. Miller, linux-arm-kernel Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use cmpxchg4 on it. Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we found. This is the first usages of cmpxchg flase sharing change. We'd better check more cmpxchg usages in current kernel... Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Rich Felker <dalias@libc.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-mm@kvack.org --- arch/Kconfig | 3 +++ arch/arm/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/xtensa/Kconfig | 1 + include/linux/mmzone.h | 15 ++++++++++++--- mm/page_alloc.c | 22 +++++++++++----------- 7 files changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..3514570c0f5f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -431,6 +431,9 @@ config HAVE_CMPXCHG_LOCAL config HAVE_CMPXCHG_DOUBLE bool +config NO_CMPXCHG_BYTE + bool + config ARCH_WEAK_RELEASE_ACQUIRE bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e00d94b16658..03a6c7fd999d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -48,6 +48,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI + select NO_CMPXCHG_BYTE if CPU_V6 select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d20927128fce..4c7f0ad5b93f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -155,6 +155,7 @@ menu "System type" config CPU_SH2 bool select SH_INTC + select NO_CMPXCHG_BYTE config CPU_SH2A bool diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efeff2c896a5..51ae5c8ede87 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -58,6 +58,7 @@ config SPARC32 select CLZ_TAB select HAVE_UID16 select OLD_SIGACTION + select NO_CMPXCHG_BYTE config SPARC64 def_bool 64BIT diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index e997e0119c02..862b008ab09e 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -42,6 +42,7 @@ config XTENSA select MODULES_USE_ELF_RELA select PERF_USE_VMALLOC select VIRT_TO_BUS + select NO_CMPXCHG_BYTE help Xtensa processors are 32-bit RISC machines designed by Tensilica primarily for embedded systems. These processors are both diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index be676e659fb7..0bc5ac0f8cd7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -406,6 +406,15 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H +/* cmpxchg only support 32-bits operands on ARMv6, SPARC32, sh2, XTENSA.*/ +#ifdef CONFIG_NO_CMPXCHG_BYTE +#define BITS_PER_FLAGS BITS_PER_LONG +typedef unsigned long pageblockflags_t; +#else +#define BITS_PER_FLAGS BITS_PER_BYTE +typedef unsigned char pageblockflags_t; +#endif + struct zone { /* Read-mostly fields */ @@ -437,7 +446,7 @@ struct zone { * Flags for a pageblock_nr_pages block. See pageblock-flags.h. * In SPARSEMEM, this map is stored in struct mem_section */ - unsigned char *pageblock_flags; + pageblockflags_t *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ /* zone_start_pfn = zone_start_paddr >> PAGE_SHIFT */ @@ -1159,7 +1168,7 @@ struct mem_section_usage { DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif /* See declaration of similar field in struct zone */ - unsigned char pageblock_flags[0]; + pageblockflags_t pageblock_flags[0]; }; void subsection_map_init(unsigned long pfn, unsigned long nr_pages); @@ -1212,7 +1221,7 @@ struct mem_section { extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif -static inline unsigned char *section_to_usemap(struct mem_section *ms) +static inline pageblockflags_t *section_to_usemap(struct mem_section *ms) { return ms->usage->pageblock_flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3688e6b83318..8b65d83d8be6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn) #endif /* Return a pointer to the bitmap storing bits affecting a block of pages */ -static inline unsigned char *get_pageblock_bitmap(struct page *page, +static inline pageblockflags_t *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM @@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn) * Return: pageblock_bits flags */ static __always_inline -unsigned char __get_pfnblock_flags_mask(struct page *page, +pageblockflags_t __get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long mask) { - unsigned char *bitmap; + pageblockflags_t *bitmap; unsigned long bitidx, byte_bitidx; - unsigned char byte; + pageblockflags_t byte; bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); - byte_bitidx = bitidx / BITS_PER_BYTE; - bitidx &= (BITS_PER_BYTE-1); + byte_bitidx = bitidx / BITS_PER_FLAGS; + bitidx &= (BITS_PER_FLAGS - 1); byte = bitmap[byte_bitidx]; return (byte >> bitidx) & mask; } -unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn, +pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long mask) { return __get_pfnblock_flags_mask(page, pfn, mask); @@ -513,16 +513,16 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long pfn, unsigned long mask) { - unsigned char *bitmap; + pageblockflags_t *bitmap; unsigned long bitidx, byte_bitidx; - unsigned char old_byte, byte; + pageblockflags_t old_byte, byte; - BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE); + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS); BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits)); bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); - byte_bitidx = bitidx / BITS_PER_BYTE; + byte_bitidx = bitidx / BITS_PER_FLAGS; VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-03 7:01 ` Alex Shi 0 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 7:01 UTC (permalink / raw) To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck Cc: Andrew Morton, Baolin Wang, Russell King, Yoshinori Sato, Rich Felker, David S. Miller, Chris Zankel, Max Filippov, linux-kernel, linux-arm-kernel, linux-sh, sparclinux, linux-xtensa, linux-mm Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use cmpxchg4 on it. Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we found. This is the first usages of cmpxchg flase sharing change. We'd better check more cmpxchg usages in current kernel... Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Rich Felker <dalias@libc.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-mm@kvack.org --- arch/Kconfig | 3 +++ arch/arm/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/xtensa/Kconfig | 1 + include/linux/mmzone.h | 15 ++++++++++++--- mm/page_alloc.c | 22 +++++++++++----------- 7 files changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..3514570c0f5f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -431,6 +431,9 @@ config HAVE_CMPXCHG_LOCAL config HAVE_CMPXCHG_DOUBLE bool +config NO_CMPXCHG_BYTE + bool + config ARCH_WEAK_RELEASE_ACQUIRE bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e00d94b16658..03a6c7fd999d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -48,6 +48,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI + select NO_CMPXCHG_BYTE if CPU_V6 select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d20927128fce..4c7f0ad5b93f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -155,6 +155,7 @@ menu "System type" config CPU_SH2 bool select SH_INTC + select NO_CMPXCHG_BYTE config CPU_SH2A bool diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efeff2c896a5..51ae5c8ede87 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -58,6 +58,7 @@ config SPARC32 select CLZ_TAB select HAVE_UID16 select OLD_SIGACTION + select NO_CMPXCHG_BYTE config SPARC64 def_bool 64BIT diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index e997e0119c02..862b008ab09e 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -42,6 +42,7 @@ config XTENSA select MODULES_USE_ELF_RELA select PERF_USE_VMALLOC select VIRT_TO_BUS + select NO_CMPXCHG_BYTE help Xtensa processors are 32-bit RISC machines designed by Tensilica primarily for embedded systems. These processors are both diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index be676e659fb7..0bc5ac0f8cd7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -406,6 +406,15 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H +/* cmpxchg only support 32-bits operands on ARMv6, SPARC32, sh2, XTENSA.*/ +#ifdef CONFIG_NO_CMPXCHG_BYTE +#define BITS_PER_FLAGS BITS_PER_LONG +typedef unsigned long pageblockflags_t; +#else +#define BITS_PER_FLAGS BITS_PER_BYTE +typedef unsigned char pageblockflags_t; +#endif + struct zone { /* Read-mostly fields */ @@ -437,7 +446,7 @@ struct zone { * Flags for a pageblock_nr_pages block. See pageblock-flags.h. * In SPARSEMEM, this map is stored in struct mem_section */ - unsigned char *pageblock_flags; + pageblockflags_t *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ @@ -1159,7 +1168,7 @@ struct mem_section_usage { DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif /* See declaration of similar field in struct zone */ - unsigned char pageblock_flags[0]; + pageblockflags_t pageblock_flags[0]; }; void subsection_map_init(unsigned long pfn, unsigned long nr_pages); @@ -1212,7 +1221,7 @@ struct mem_section { extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif -static inline unsigned char *section_to_usemap(struct mem_section *ms) +static inline pageblockflags_t *section_to_usemap(struct mem_section *ms) { return ms->usage->pageblock_flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3688e6b83318..8b65d83d8be6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn) #endif /* Return a pointer to the bitmap storing bits affecting a block of pages */ -static inline unsigned char *get_pageblock_bitmap(struct page *page, +static inline pageblockflags_t *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM @@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn) * Return: pageblock_bits flags */ static __always_inline -unsigned char __get_pfnblock_flags_mask(struct page *page, +pageblockflags_t __get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long mask) { - unsigned char *bitmap; + pageblockflags_t *bitmap; unsigned long bitidx, byte_bitidx; - unsigned char byte; + pageblockflags_t byte; bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); - byte_bitidx = bitidx / BITS_PER_BYTE; - bitidx &= (BITS_PER_BYTE-1); + byte_bitidx = bitidx / BITS_PER_FLAGS; + bitidx &= (BITS_PER_FLAGS - 1); byte = bitmap[byte_bitidx]; return (byte >> bitidx) & mask; } -unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn, +pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long mask) { return __get_pfnblock_flags_mask(page, pfn, mask); @@ -513,16 +513,16 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long pfn, unsigned long mask) { - unsigned char *bitmap; + pageblockflags_t *bitmap; unsigned long bitidx, byte_bitidx; - unsigned char old_byte, byte; + pageblockflags_t old_byte, byte; - BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE); + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS); BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits)); bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); - byte_bitidx = bitidx / BITS_PER_BYTE; + byte_bitidx = bitidx / BITS_PER_FLAGS; VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-03 7:01 ` Alex Shi 0 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 7:01 UTC (permalink / raw) To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck Cc: Chris Zankel, Rich Felker, Yoshinori Sato, linux-sh, linux-xtensa, Russell King, linux-kernel, linux-mm, Max Filippov, Baolin Wang, sparclinux, Andrew Morton, David S. Miller, linux-arm-kernel Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use cmpxchg4 on it. Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we found. This is the first usages of cmpxchg flase sharing change. We'd better check more cmpxchg usages in current kernel... Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Rich Felker <dalias@libc.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Zankel <chris@zankel.net> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: linux-mm@kvack.org --- arch/Kconfig | 3 +++ arch/arm/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/xtensa/Kconfig | 1 + include/linux/mmzone.h | 15 ++++++++++++--- mm/page_alloc.c | 22 +++++++++++----------- 7 files changed, 30 insertions(+), 14 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..3514570c0f5f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -431,6 +431,9 @@ config HAVE_CMPXCHG_LOCAL config HAVE_CMPXCHG_DOUBLE bool +config NO_CMPXCHG_BYTE + bool + config ARCH_WEAK_RELEASE_ACQUIRE bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e00d94b16658..03a6c7fd999d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -48,6 +48,7 @@ config ARM select GENERIC_ALLOCATOR select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI + select NO_CMPXCHG_BYTE if CPU_V6 select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d20927128fce..4c7f0ad5b93f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -155,6 +155,7 @@ menu "System type" config CPU_SH2 bool select SH_INTC + select NO_CMPXCHG_BYTE config CPU_SH2A bool diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efeff2c896a5..51ae5c8ede87 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -58,6 +58,7 @@ config SPARC32 select CLZ_TAB select HAVE_UID16 select OLD_SIGACTION + select NO_CMPXCHG_BYTE config SPARC64 def_bool 64BIT diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index e997e0119c02..862b008ab09e 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -42,6 +42,7 @@ config XTENSA select MODULES_USE_ELF_RELA select PERF_USE_VMALLOC select VIRT_TO_BUS + select NO_CMPXCHG_BYTE help Xtensa processors are 32-bit RISC machines designed by Tensilica primarily for embedded systems. These processors are both diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index be676e659fb7..0bc5ac0f8cd7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -406,6 +406,15 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H +/* cmpxchg only support 32-bits operands on ARMv6, SPARC32, sh2, XTENSA.*/ +#ifdef CONFIG_NO_CMPXCHG_BYTE +#define BITS_PER_FLAGS BITS_PER_LONG +typedef unsigned long pageblockflags_t; +#else +#define BITS_PER_FLAGS BITS_PER_BYTE +typedef unsigned char pageblockflags_t; +#endif + struct zone { /* Read-mostly fields */ @@ -437,7 +446,7 @@ struct zone { * Flags for a pageblock_nr_pages block. See pageblock-flags.h. * In SPARSEMEM, this map is stored in struct mem_section */ - unsigned char *pageblock_flags; + pageblockflags_t *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ @@ -1159,7 +1168,7 @@ struct mem_section_usage { DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); #endif /* See declaration of similar field in struct zone */ - unsigned char pageblock_flags[0]; + pageblockflags_t pageblock_flags[0]; }; void subsection_map_init(unsigned long pfn, unsigned long nr_pages); @@ -1212,7 +1221,7 @@ struct mem_section { extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif -static inline unsigned char *section_to_usemap(struct mem_section *ms) +static inline pageblockflags_t *section_to_usemap(struct mem_section *ms) { return ms->usage->pageblock_flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3688e6b83318..8b65d83d8be6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn) #endif /* Return a pointer to the bitmap storing bits affecting a block of pages */ -static inline unsigned char *get_pageblock_bitmap(struct page *page, +static inline pageblockflags_t *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM @@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn) * Return: pageblock_bits flags */ static __always_inline -unsigned char __get_pfnblock_flags_mask(struct page *page, +pageblockflags_t __get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long mask) { - unsigned char *bitmap; + pageblockflags_t *bitmap; unsigned long bitidx, byte_bitidx; - unsigned char byte; + pageblockflags_t byte; bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); - byte_bitidx = bitidx / BITS_PER_BYTE; - bitidx &= (BITS_PER_BYTE-1); + byte_bitidx = bitidx / BITS_PER_FLAGS; + bitidx &= (BITS_PER_FLAGS - 1); byte = bitmap[byte_bitidx]; return (byte >> bitidx) & mask; } -unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn, +pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long mask) { return __get_pfnblock_flags_mask(page, pfn, mask); @@ -513,16 +513,16 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long pfn, unsigned long mask) { - unsigned char *bitmap; + pageblockflags_t *bitmap; unsigned long bitidx, byte_bitidx; - unsigned char old_byte, byte; + pageblockflags_t old_byte, byte; - BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE); + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS); BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits)); bitmap = get_pageblock_bitmap(page, pfn); bitidx = pfn_to_bitidx(page, pfn); - byte_bitidx = bitidx / BITS_PER_BYTE; + byte_bitidx = bitidx / BITS_PER_FLAGS; VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); -- 1.8.3.1 _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue 2020-09-03 7:01 ` Alex Shi (?) @ 2020-09-03 7:29 ` Max Filippov -1 siblings, 0 replies; 21+ messages in thread From: Max Filippov @ 2020-09-03 7:29 UTC (permalink / raw) To: Alex Shi Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato, David Hildenbrand, open list:SUPERH, open list:TENSILICA XTENSA PORT (xtensa), Anshuman Khandual, Russell King, Alexander Duyck, LKML, Linux Memory Management List, Matthew Wilcox, Baolin Wang, open list:SPARC + UltraSPAR..., Andrew Morton, David S. Miller, Vlastimil Babka On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote: > > Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use > cmpxchg4 on it. [...] > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -48,6 +48,7 @@ config ARM > select GENERIC_ALLOCATOR > select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY > select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI > + select NO_CMPXCHG_BYTE if CPU_V6 > select GENERIC_CLOCKEVENTS_BROADCAST if SMP > select GENERIC_CPU_AUTOPROBE > select GENERIC_EARLY_IOREMAP > diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig > index d20927128fce..4c7f0ad5b93f 100644 > --- a/arch/sh/Kconfig > +++ b/arch/sh/Kconfig > @@ -155,6 +155,7 @@ menu "System type" > config CPU_SH2 > bool > select SH_INTC > + select NO_CMPXCHG_BYTE > > config CPU_SH2A > bool > diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig > index efeff2c896a5..51ae5c8ede87 100644 > --- a/arch/sparc/Kconfig > +++ b/arch/sparc/Kconfig > @@ -58,6 +58,7 @@ config SPARC32 > select CLZ_TAB > select HAVE_UID16 > select OLD_SIGACTION > + select NO_CMPXCHG_BYTE > > config SPARC64 > def_bool 64BIT > diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig > index e997e0119c02..862b008ab09e 100644 > --- a/arch/xtensa/Kconfig > +++ b/arch/xtensa/Kconfig > @@ -42,6 +42,7 @@ config XTENSA > select MODULES_USE_ELF_RELA > select PERF_USE_VMALLOC > select VIRT_TO_BUS > + select NO_CMPXCHG_BYTE Please keep the lists of select statements in Kconfig files above alphabetically sorted. -- Thanks. -- Max ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-03 7:29 ` Max Filippov 0 siblings, 0 replies; 21+ messages in thread From: Max Filippov @ 2020-09-03 7:29 UTC (permalink / raw) To: Alex Shi Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang, Russell King, Yoshinori Sato, Rich Felker, David S. Miller, Chris Zankel, LKML, linux-arm-kernel, open list:SUPERH, open list:SPARC + UltraSPAR..., open list:TENSILICA XTENSA PORT (xtensa), Linux Memory Management List On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote: > > Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use > cmpxchg4 on it. [...] > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -48,6 +48,7 @@ config ARM > select GENERIC_ALLOCATOR > select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY > select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI > + select NO_CMPXCHG_BYTE if CPU_V6 > select GENERIC_CLOCKEVENTS_BROADCAST if SMP > select GENERIC_CPU_AUTOPROBE > select GENERIC_EARLY_IOREMAP > diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig > index d20927128fce..4c7f0ad5b93f 100644 > --- a/arch/sh/Kconfig > +++ b/arch/sh/Kconfig > @@ -155,6 +155,7 @@ menu "System type" > config CPU_SH2 > bool > select SH_INTC > + select NO_CMPXCHG_BYTE > > config CPU_SH2A > bool > diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig > index efeff2c896a5..51ae5c8ede87 100644 > --- a/arch/sparc/Kconfig > +++ b/arch/sparc/Kconfig > @@ -58,6 +58,7 @@ config SPARC32 > select CLZ_TAB > select HAVE_UID16 > select OLD_SIGACTION > + select NO_CMPXCHG_BYTE > > config SPARC64 > def_bool 64BIT > diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig > index e997e0119c02..862b008ab09e 100644 > --- a/arch/xtensa/Kconfig > +++ b/arch/xtensa/Kconfig > @@ -42,6 +42,7 @@ config XTENSA > select MODULES_USE_ELF_RELA > select PERF_USE_VMALLOC > select VIRT_TO_BUS > + select NO_CMPXCHG_BYTE Please keep the lists of select statements in Kconfig files above alphabetically sorted. -- Thanks. -- Max ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-03 7:29 ` Max Filippov 0 siblings, 0 replies; 21+ messages in thread From: Max Filippov @ 2020-09-03 7:29 UTC (permalink / raw) To: Alex Shi Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato, David Hildenbrand, open list:SUPERH, open list:TENSILICA XTENSA PORT (xtensa), Anshuman Khandual, Russell King, Alexander Duyck, LKML, Linux Memory Management List, Matthew Wilcox, Baolin Wang, open list:SPARC + UltraSPAR..., Andrew Morton, David S. Miller, Vlastimil Babka On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote: > > Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use > cmpxchg4 on it. [...] > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -48,6 +48,7 @@ config ARM > select GENERIC_ALLOCATOR > select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY > select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI > + select NO_CMPXCHG_BYTE if CPU_V6 > select GENERIC_CLOCKEVENTS_BROADCAST if SMP > select GENERIC_CPU_AUTOPROBE > select GENERIC_EARLY_IOREMAP > diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig > index d20927128fce..4c7f0ad5b93f 100644 > --- a/arch/sh/Kconfig > +++ b/arch/sh/Kconfig > @@ -155,6 +155,7 @@ menu "System type" > config CPU_SH2 > bool > select SH_INTC > + select NO_CMPXCHG_BYTE > > config CPU_SH2A > bool > diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig > index efeff2c896a5..51ae5c8ede87 100644 > --- a/arch/sparc/Kconfig > +++ b/arch/sparc/Kconfig > @@ -58,6 +58,7 @@ config SPARC32 > select CLZ_TAB > select HAVE_UID16 > select OLD_SIGACTION > + select NO_CMPXCHG_BYTE > > config SPARC64 > def_bool 64BIT > diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig > index e997e0119c02..862b008ab09e 100644 > --- a/arch/xtensa/Kconfig > +++ b/arch/xtensa/Kconfig > @@ -42,6 +42,7 @@ config XTENSA > select MODULES_USE_ELF_RELA > select PERF_USE_VMALLOC > select VIRT_TO_BUS > + select NO_CMPXCHG_BYTE Please keep the lists of select statements in Kconfig files above alphabetically sorted. -- Thanks. -- Max _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue 2020-09-03 7:29 ` Max Filippov (?) @ 2020-09-03 8:50 ` Alex Shi -1 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 8:50 UTC (permalink / raw) To: Max Filippov Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato, David Hildenbrand, open list:SUPERH, open list:TENSILICA XTENSA PORT (xtensa), Anshuman Khandual, Russell King, Alexander Duyck, LKML, Linux Memory Management List, Matthew Wilcox, Baolin Wang, open list:SPARC + UltraSPAR..., Andrew Morton, David S. Miller, Vlastimil Babka 在 2020/9/3 下午3:29, Max Filippov 写道: >> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig >> index e997e0119c02..862b008ab09e 100644 >> --- a/arch/xtensa/Kconfig >> +++ b/arch/xtensa/Kconfig >> @@ -42,6 +42,7 @@ config XTENSA >> select MODULES_USE_ELF_RELA >> select PERF_USE_VMALLOC >> select VIRT_TO_BUS >> + select NO_CMPXCHG_BYTE > Please keep the lists of select statements in Kconfig files above > alphabetically sorted. Hi Max, Thanks for the reminder, Let's comments from Mel Gorman. Thanks! ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-03 8:50 ` Alex Shi 0 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 8:50 UTC (permalink / raw) To: Max Filippov Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang, Russell King, Yoshinori Sato, Rich Felker, David S. Miller, Chris Zankel, LKML, linux-arm-kernel, open list:SUPERH, open list:SPARC + UltraSPAR..., open list:TENSILICA XTENSA PORT (xtensa), Linux Memory Management List 在 2020/9/3 下午3:29, Max Filippov 写道: >> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig >> index e997e0119c02..862b008ab09e 100644 >> --- a/arch/xtensa/Kconfig >> +++ b/arch/xtensa/Kconfig >> @@ -42,6 +42,7 @@ config XTENSA >> select MODULES_USE_ELF_RELA >> select PERF_USE_VMALLOC >> select VIRT_TO_BUS >> + select NO_CMPXCHG_BYTE > Please keep the lists of select statements in Kconfig files above > alphabetically sorted. Hi Max, Thanks for the reminder, Let's comments from Mel Gorman. Thanks! ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-03 8:50 ` Alex Shi 0 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 8:50 UTC (permalink / raw) To: Max Filippov Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato, David Hildenbrand, open list:SUPERH, open list:TENSILICA XTENSA PORT (xtensa), Anshuman Khandual, Russell King, Alexander Duyck, LKML, Linux Memory Management List, Matthew Wilcox, Baolin Wang, open list:SPARC + UltraSPAR..., Andrew Morton, David S. Miller, Vlastimil Babka 在 2020/9/3 下午3:29, Max Filippov 写道: >> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig >> index e997e0119c02..862b008ab09e 100644 >> --- a/arch/xtensa/Kconfig >> +++ b/arch/xtensa/Kconfig >> @@ -42,6 +42,7 @@ config XTENSA >> select MODULES_USE_ELF_RELA >> select PERF_USE_VMALLOC >> select VIRT_TO_BUS >> + select NO_CMPXCHG_BYTE > Please keep the lists of select statements in Kconfig files above > alphabetically sorted. Hi Max, Thanks for the reminder, Let's comments from Mel Gorman. Thanks! _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue 2020-09-03 7:01 ` Alex Shi (?) @ 2020-09-10 5:51 ` Christoph Hellwig -1 siblings, 0 replies; 21+ messages in thread From: Christoph Hellwig @ 2020-09-10 5:51 UTC (permalink / raw) To: Alex Shi Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato, David Hildenbrand, linux-sh, linux-xtensa, Anshuman Khandual, Russell King, Alexander Duyck, linux-kernel, linux-mm, Max Filippov, Matthew Wilcox, Baolin Wang, sparclinux, Andrew Morton, David S. Miller, Vlastimil Babka On Thu, Sep 03, 2020 at 03:01:22PM +0800, Alex Shi wrote: > Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use > cmpxchg4 on it. > > Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we > found. > > This is the first usages of cmpxchg flase sharing change. We'd better > check more cmpxchg usages in current kernel... I think a positive symbol, e.g. HAVE_CMPXCHG_BYTE is a lot easier to understand, and also fool-proof. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-10 5:51 ` Christoph Hellwig 0 siblings, 0 replies; 21+ messages in thread From: Christoph Hellwig @ 2020-09-10 5:51 UTC (permalink / raw) To: Alex Shi Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang, Russell King, Yoshinori Sato, Rich Felker, David S. Miller, Chris Zankel, Max Filippov, linux-kernel, linux-arm-kernel, linux-sh, sparclinux, linux-xtensa, linux-mm On Thu, Sep 03, 2020 at 03:01:22PM +0800, Alex Shi wrote: > Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use > cmpxchg4 on it. > > Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we > found. > > This is the first usages of cmpxchg flase sharing change. We'd better > check more cmpxchg usages in current kernel... I think a positive symbol, e.g. HAVE_CMPXCHG_BYTE is a lot easier to understand, and also fool-proof. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue @ 2020-09-10 5:51 ` Christoph Hellwig 0 siblings, 0 replies; 21+ messages in thread From: Christoph Hellwig @ 2020-09-10 5:51 UTC (permalink / raw) To: Alex Shi Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato, David Hildenbrand, linux-sh, linux-xtensa, Anshuman Khandual, Russell King, Alexander Duyck, linux-kernel, linux-mm, Max Filippov, Matthew Wilcox, Baolin Wang, sparclinux, Andrew Morton, David S. Miller, Vlastimil Babka On Thu, Sep 03, 2020 at 03:01:22PM +0800, Alex Shi wrote: > Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use > cmpxchg4 on it. > > Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we > found. > > This is the first usages of cmpxchg flase sharing change. We'd better > check more cmpxchg usages in current kernel... I think a positive symbol, e.g. HAVE_CMPXCHG_BYTE is a lot easier to understand, and also fool-proof. _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi 2020-09-03 7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi 2020-09-03 7:01 ` Alex Shi @ 2020-09-03 7:24 ` Mel Gorman 2020-09-03 8:32 ` Alex Shi 2020-09-03 8:19 ` David Hildenbrand 3 siblings, 1 reply; 21+ messages in thread From: Mel Gorman @ 2020-09-03 7:24 UTC (permalink / raw) To: Alex Shi Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm, linux-kernel On Thu, Sep 03, 2020 at 03:01:20PM +0800, Alex Shi wrote: > pageblock_flags is used as long, since every pageblock_flags is just 4 > bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags, > that flag setting has to sync in cmpxchg with 7 or 15 other pageblock > flags. It would cause long waiting for sync. > > If we could change the pageblock_flags variable as char, we could use > char size cmpxchg, which just sync up with 2 pageblock flags. it could > relief the false sharing in cmpxchg. > > Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> Page block types were not known to change at high frequency that would cause a measurable performance drop. If anything, the performance hit from pageblocks is the lookup paths which is a lot more frequent. What was the workload you were running that altered pageblocks at a high enough frequency for collisions to occur when updating adjacent pageblocks? -- Mel Gorman SUSE Labs ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman @ 2020-09-03 8:32 ` Alex Shi 2020-09-03 8:40 ` Alex Shi 2020-09-03 9:31 ` Mel Gorman 0 siblings, 2 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 8:32 UTC (permalink / raw) To: Mel Gorman Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm, linux-kernel 在 2020/9/3 下午3:24, Mel Gorman 写道: > On Thu, Sep 03, 2020 at 03:01:20PM +0800, Alex Shi wrote: >> pageblock_flags is used as long, since every pageblock_flags is just 4 >> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags, >> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock >> flags. It would cause long waiting for sync. >> >> If we could change the pageblock_flags variable as char, we could use >> char size cmpxchg, which just sync up with 2 pageblock flags. it could >> relief the false sharing in cmpxchg. >> >> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> > > Page block types were not known to change at high frequency that would > cause a measurable performance drop. If anything, the performance hit > from pageblocks is the lookup paths which is a lot more frequent. Yes, it is not hot path. But it's still a meaningful points to reduce cmpxchg level false sharing which isn't right on logical. > > What was the workload you were running that altered pageblocks at a high > enough frequency for collisions to occur when updating adjacent > pageblocks? > I have run thpscale with 'always' defrag setting of THP. The Amean stddev is much larger than a very little average run time reducing. But the left patch 4 could show the cmpxchg retry reduce from thousands to hundreds or less. Subject: [PATCH v4 4/4] add cmpxchg tracing Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> --- include/trace/events/pageblock.h | 30 ++++++++++++++++++++++++++++++ mm/page_alloc.c | 4 ++++ 2 files changed, 34 insertions(+) create mode 100644 include/trace/events/pageblock.h diff --git a/include/trace/events/pageblock.h b/include/trace/events/pageblock.h new file mode 100644 index 000000000000..003c2d716f82 --- /dev/null +++ b/include/trace/events/pageblock.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pageblock + +#if !defined(_TRACE_PAGEBLOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGEBLOCK_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(hit_cmpxchg, + + TP_PROTO(char byte), + + TP_ARGS(byte), + + TP_STRUCT__entry( + __field(char, byte) + ), + + TP_fast_assign( + __entry->byte = byte; + ), + + TP_printk("%d", __entry->byte) +); + +#endif /* _TRACE_PAGE_ISOLATION_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8b65d83d8be6..a6d7159295bc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -509,6 +509,9 @@ static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned * @pfn: The target page frame number * @mask: mask of bits that the caller is interested in */ +#define CREATE_TRACE_POINTS +#include <trace/events/pageblock.h> + void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long pfn, unsigned long mask) @@ -532,6 +535,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, if (byte == old_byte) break; byte = old_byte; + trace_hit_cmpxchg(byte); } } -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 8:32 ` Alex Shi @ 2020-09-03 8:40 ` Alex Shi 2020-09-03 9:00 ` Vlastimil Babka 2020-09-03 9:31 ` Mel Gorman 1 sibling, 1 reply; 21+ messages in thread From: Alex Shi @ 2020-09-03 8:40 UTC (permalink / raw) To: Mel Gorman Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm, linux-kernel 在 2020/9/3 下午4:32, Alex Shi 写道: >> > I have run thpscale with 'always' defrag setting of THP. The Amean stddev is much > larger than a very little average run time reducing. > > But the left patch 4 could show the cmpxchg retry reduce from thousands to hundreds > or less. > > Subject: [PATCH v4 4/4] add cmpxchg tracing It's a typical result with the patchset: Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale pageblock-c': 9,564 compaction:mm_compaction_isolate_migratepages 6,430 compaction:mm_compaction_isolate_freepages 5,287 compaction:mm_compaction_migratepages 45,299 compaction:mm_compaction_begin 45,299 compaction:mm_compaction_end 30,557 compaction:mm_compaction_try_to_compact_pages 95,540 compaction:mm_compaction_finished 149,379 compaction:mm_compaction_suitable 0 compaction:mm_compaction_deferred 0 compaction:mm_compaction_defer_compaction 3,949 compaction:mm_compaction_defer_reset 0 compaction:mm_compaction_kcompactd_sleep 0 compaction:mm_compaction_wakeup_kcompactd 0 compaction:mm_compaction_kcompactd_wake 68 pageblock:hit_cmpxchg 113.570974583 seconds time elapsed 14.664451000 seconds user 96.847116000 seconds sys It's 5.9-rc2 base kernel result: Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale rc2-e': 15,920 compaction:mm_compaction_isolate_migratepages 20,523 compaction:mm_compaction_isolate_freepages 9,752 compaction:mm_compaction_migratepages 27,773 compaction:mm_compaction_begin 27,773 compaction:mm_compaction_end 16,391 compaction:mm_compaction_try_to_compact_pages 62,809 compaction:mm_compaction_finished 69,821 compaction:mm_compaction_suitable 0 compaction:mm_compaction_deferred 0 compaction:mm_compaction_defer_compaction 7,875 compaction:mm_compaction_defer_reset 0 compaction:mm_compaction_kcompactd_sleep 0 compaction:mm_compaction_wakeup_kcompactd 0 compaction:mm_compaction_kcompactd_wake 1,208 pageblock:hit_cmpxchg 116.440414591 seconds time elapsed 15.326913000 seconds user 103.752758000 seconds sys ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 8:40 ` Alex Shi @ 2020-09-03 9:00 ` Vlastimil Babka 0 siblings, 0 replies; 21+ messages in thread From: Vlastimil Babka @ 2020-09-03 9:00 UTC (permalink / raw) To: Alex Shi, Mel Gorman Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Alexander Duyck, Andrew Morton, linux-mm, linux-kernel On 9/3/20 10:40 AM, Alex Shi wrote: > > > 在 2020/9/3 下午4:32, Alex Shi 写道: >>> >> I have run thpscale with 'always' defrag setting of THP. The Amean stddev is much >> larger than a very little average run time reducing. >> >> But the left patch 4 could show the cmpxchg retry reduce from thousands to hundreds >> or less. >> >> Subject: [PATCH v4 4/4] add cmpxchg tracing > > > It's a typical result with the patchset: > > Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale pageblock-c': > > 9,564 compaction:mm_compaction_isolate_migratepages > 6,430 compaction:mm_compaction_isolate_freepages > 5,287 compaction:mm_compaction_migratepages > 45,299 compaction:mm_compaction_begin > 45,299 compaction:mm_compaction_end > 30,557 compaction:mm_compaction_try_to_compact_pages > 95,540 compaction:mm_compaction_finished > 149,379 compaction:mm_compaction_suitable > 0 compaction:mm_compaction_deferred > 0 compaction:mm_compaction_defer_compaction > 3,949 compaction:mm_compaction_defer_reset > 0 compaction:mm_compaction_kcompactd_sleep > 0 compaction:mm_compaction_wakeup_kcompactd > 0 compaction:mm_compaction_kcompactd_wake > 68 pageblock:hit_cmpxchg > > 113.570974583 seconds time elapsed > > 14.664451000 seconds user > 96.847116000 seconds sys > > It's 5.9-rc2 base kernel result: > > Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale rc2-e': > > 15,920 compaction:mm_compaction_isolate_migratepages > 20,523 compaction:mm_compaction_isolate_freepages > 9,752 compaction:mm_compaction_migratepages > 27,773 compaction:mm_compaction_begin > 27,773 compaction:mm_compaction_end > 16,391 compaction:mm_compaction_try_to_compact_pages > 62,809 compaction:mm_compaction_finished > 69,821 compaction:mm_compaction_suitable > 0 compaction:mm_compaction_deferred > 0 compaction:mm_compaction_defer_compaction > 7,875 compaction:mm_compaction_defer_reset > 0 compaction:mm_compaction_kcompactd_sleep > 0 compaction:mm_compaction_wakeup_kcompactd > 0 compaction:mm_compaction_kcompactd_wake > 1,208 pageblock:hit_cmpxchg > > 116.440414591 seconds time elapsed > > 15.326913000 seconds user > 103.752758000 seconds sys The runs wildly differ in many of other stats, so I'm not sure they are really comparable. I guess you could show the fraction of hit_cmpxchg to all cmpxchg. But there's also danger of tracepoints widening the race window. In the end what matters is how these 1208 retries contribute to runtime. I doubt they could be really visible in a 100+ seconds run though. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 8:32 ` Alex Shi 2020-09-03 8:40 ` Alex Shi @ 2020-09-03 9:31 ` Mel Gorman 1 sibling, 0 replies; 21+ messages in thread From: Mel Gorman @ 2020-09-03 9:31 UTC (permalink / raw) To: Alex Shi Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox, Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm, linux-kernel On Thu, Sep 03, 2020 at 04:32:54PM +0800, Alex Shi wrote: > > > ??? 2020/9/3 ??????3:24, Mel Gorman ??????: > > On Thu, Sep 03, 2020 at 03:01:20PM +0800, Alex Shi wrote: > >> pageblock_flags is used as long, since every pageblock_flags is just 4 > >> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags, > >> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock > >> flags. It would cause long waiting for sync. > >> > >> If we could change the pageblock_flags variable as char, we could use > >> char size cmpxchg, which just sync up with 2 pageblock flags. it could > >> relief the false sharing in cmpxchg. > >> > >> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> > > > > Page block types were not known to change at high frequency that would > > cause a measurable performance drop. If anything, the performance hit > > from pageblocks is the lookup paths which is a lot more frequent. > > Yes, it is not hot path. But it's still a meaningful points to reduce cmpxchg > level false sharing which isn't right on logical. > Except there is no guarantee that false sharing was reduced. cmpxchg is still used except using the byte as the comparison for the old value and in some cases, that width will still be 32-bit for the exchange. It would be up to each architecture to see if that can be translated to a better instruction but it may not even matter. As the instruction will be prefixed with the lock instruction, the bus will be locked and bus locking is probably on the cache line boundary so there is a collision anyway while the atomic update takes place. End result -- reducing false sharing in this case is not guaranteed to help performance and may not be detectable when it's a low frequency operation but the code will behave differently depending on the architecture and CPU family. Your justification path measured the number of times a cmpxchg was retried but it did not track how many pageblock updates there were or how many potentially collided. As the workload is uncontrolled with respect to pageblock updates, you do not know if the difference in retries is due to a real reduction in collisions or a difference in the number of pageblock updates that potentially collide. Either way, because the frequency of the operation was so low relative too your target load, any difference in performance would be indistinguishable from noise. I don't think it's worth the churn in this case for an impact that will be very difficult to detect and variable across architectures and CPU families. -- Mel Gorman SUSE Labs ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi ` (2 preceding siblings ...) 2020-09-03 7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman @ 2020-09-03 8:19 ` David Hildenbrand 2020-09-03 9:14 ` Alex Shi 3 siblings, 1 reply; 21+ messages in thread From: David Hildenbrand @ 2020-09-03 8:19 UTC (permalink / raw) To: Alex Shi, Anshuman Khandual, Matthew Wilcox, Vlastimil Babka, Alexander Duyck Cc: Andrew Morton, Mel Gorman, linux-mm, linux-kernel On 03.09.20 09:01, Alex Shi wrote: > pageblock_flags is used as long, since every pageblock_flags is just 4 > bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags, > that flag setting has to sync in cmpxchg with 7 or 15 other pageblock > flags. It would cause long waiting for sync. > > If we could change the pageblock_flags variable as char, we could use > char size cmpxchg, which just sync up with 2 pageblock flags. it could > relief the false sharing in cmpxchg. > > Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: Mel Gorman <mgorman@techsingularity.net> > Cc: linux-mm@kvack.org > Cc: linux-kernel@vger.kernel.org Could you please 1. Send a cover letter and indicate the changees between versions. I cannot find any in my mailbox or on -mm - is there any? (also, is there a patch 4 ?) 2. Report proper performance numbers as requested, especially, over multiple runs. This should go into patch 1/2. Are they buried somewhere? 3. Clarify what patch 3 does: Do we actually waste 8*sizeof(long) where we only need 4 bits? Also, breaking stuff in patch 1 and fixing it in patch 3 is not acceptable. This breaks git bisect. Skimming over the patches I think this is the case. I am not convinced yet that we need and want this. As Alex mentioned, we touch pageblock flags while holding the zone lock already in most cases - and as Mel mentiones, updates should be rare. -- Thanks, David / dhildenb ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags 2020-09-03 8:19 ` David Hildenbrand @ 2020-09-03 9:14 ` Alex Shi 0 siblings, 0 replies; 21+ messages in thread From: Alex Shi @ 2020-09-03 9:14 UTC (permalink / raw) To: David Hildenbrand, Anshuman Khandual, Matthew Wilcox, Vlastimil Babka, Alexander Duyck Cc: Andrew Morton, Mel Gorman, linux-mm, linux-kernel 在 2020/9/3 下午4:19, David Hildenbrand 写道: > On 03.09.20 09:01, Alex Shi wrote: >> pageblock_flags is used as long, since every pageblock_flags is just 4 >> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags, >> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock >> flags. It would cause long waiting for sync. >> >> If we could change the pageblock_flags variable as char, we could use >> char size cmpxchg, which just sync up with 2 pageblock flags. it could >> relief the false sharing in cmpxchg. >> >> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com> >> Cc: Andrew Morton <akpm@linux-foundation.org> >> Cc: Mel Gorman <mgorman@techsingularity.net> >> Cc: linux-mm@kvack.org >> Cc: linux-kernel@vger.kernel.org > > Could you please > > 1. Send a cover letter and indicate the changees between versions. I > cannot find any in my mailbox or on -mm - is there any? (also, is there > a patch 4 ?) Hi David, Thanks for comments! I thought a short patchset don't need a cover. Here it's: cmpxchg is a lockless way to update data by check and compare the target data after updated and retry if target data is changed during that action. So we should just compare the exact size of target data. If the data is only byte, but cmpxchg compare a long word, that leads to a cmpxchg level flase sharing, cause more try which lock memory more times. That's a clearly logical error and should be fixed. v1, the initial version v2, fix the armv6 cmpxchgb missing issue. v3, fix more arch cmpxchg missing issue on armv6, sh2, sparc32, xtensa v4, redo cmpxchgb fix by NO_CMPXCHG_BYTE into arch/Kconfig > > 2. Report proper performance numbers as requested, especially, over > multiple runs. This should go into patch 1/2. Are they buried somewhere? There are some result sent on https://lkml.org/lkml/2020/8/30/95 > > 3. Clarify what patch 3 does: Do we actually waste 8*sizeof(long) where > we only need 4 bits? No, no waste, current kernel pack the 4 bits into long, that cause cmpxchg compare 8 pageblock flags which 7 of them are not needed. > > Also, breaking stuff in patch 1 and fixing it in patch 3 is not > acceptable. This breaks git bisect. Skimming over the patches I think > this is the case. Got it, thanks! > > I am not convinced yet that we need and want this. As Alex mentioned, we > touch pageblock flags while holding the zone lock already in most cases > - and as Mel mentiones, updates should be rare. > yes, not too much, but there are still a few. and cmpxchg retry will lock memory which I believe the less the better. Thanks Alex ^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2020-09-10 5:52 UTC | newest] Thread overview: 21+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2020-09-03 7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi 2020-09-03 7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi 2020-09-03 7:01 ` [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue Alex Shi 2020-09-03 7:01 ` Alex Shi 2020-09-03 7:01 ` Alex Shi 2020-09-03 7:29 ` Max Filippov 2020-09-03 7:29 ` Max Filippov 2020-09-03 7:29 ` Max Filippov 2020-09-03 8:50 ` Alex Shi 2020-09-03 8:50 ` Alex Shi 2020-09-03 8:50 ` Alex Shi 2020-09-10 5:51 ` Christoph Hellwig 2020-09-10 5:51 ` Christoph Hellwig 2020-09-10 5:51 ` Christoph Hellwig 2020-09-03 7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman 2020-09-03 8:32 ` Alex Shi 2020-09-03 8:40 ` Alex Shi 2020-09-03 9:00 ` Vlastimil Babka 2020-09-03 9:31 ` Mel Gorman 2020-09-03 8:19 ` David Hildenbrand 2020-09-03 9:14 ` Alex Shi
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.