From: Mel Gorman <mgorman@techsingularity.net>
To: Linux-MM <linux-mm@kvack.org>
Cc: Linux-RT-Users <linux-rt-users@vger.kernel.org>,
LKML <linux-kernel@vger.kernel.org>,
Chuck Lever <chuck.lever@oracle.com>,
Jesper Dangaard Brouer <brouer@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@techsingularity.net>
Subject: [PATCH 2/6] mm/page_alloc: Convert per-cpu list protection to local_lock
Date: Mon, 29 Mar 2021 13:06:44 +0100 [thread overview]
Message-ID: <20210329120648.19040-3-mgorman@techsingularity.net> (raw)
In-Reply-To: <20210329120648.19040-1-mgorman@techsingularity.net>
There is a lack of clarity of what exactly local_irq_save/local_irq_restore
protects in page_alloc.c . It conflates the protection of per-cpu page
allocation structures with per-cpu vmstat deltas.
This patch protects the PCP structure using local_lock which
for most configurations is identical to IRQ enabling/disabling.
The scope of the lock is still wider than it should be but this is
decreased in later patches. The per-cpu vmstat deltas are protected by
preempt_disable/preempt_enable where necessary instead of relying on
IRQ disable/enable.
[lkp@intel.com: Make pagesets static]
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
include/linux/mmzone.h | 2 ++
mm/page_alloc.c | 43 ++++++++++++++++++++++++------------------
mm/vmstat.c | 4 ++++
3 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4393ac27336..106da8fbc72a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
+#include <linux/local_lock.h>
#include <asm/page.h>
/* Free memory management - zoned buddy allocator. */
@@ -337,6 +338,7 @@ enum zone_watermarks {
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
+/* Fields and list protected by pagesets local_lock in page_alloc.c */
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 32006e66564a..7f8c73020688 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -112,6 +112,13 @@ typedef int __bitwise fpi_t;
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
+struct pagesets {
+ local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct pagesets, pagesets) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
+
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -2962,12 +2969,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
unsigned long flags;
int to_drain, batch;
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0)
free_pcppages_bulk(zone, to_drain, pcp);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
}
#endif
@@ -2983,13 +2990,13 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
unsigned long flags;
struct per_cpu_pages *pcp;
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
if (pcp->count)
free_pcppages_bulk(zone, pcp->count, pcp);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
}
/*
@@ -3252,9 +3259,9 @@ void free_unref_page(struct page *page)
if (!free_unref_page_prepare(page, pfn))
return;
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
free_unref_page_commit(page, pfn);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
}
/*
@@ -3274,7 +3281,7 @@ void free_unref_page_list(struct list_head *list)
set_page_private(page, pfn);
}
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
@@ -3287,12 +3294,12 @@ void free_unref_page_list(struct list_head *list)
* a large list of pages to free.
*/
if (++batch_count == SWAP_CLUSTER_MAX) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
batch_count = 0;
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
}
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
}
/*
@@ -3449,7 +3456,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct page *page;
unsigned long flags;
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
pcp = this_cpu_ptr(zone->per_cpu_pageset);
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
@@ -3457,7 +3464,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
zone_statistics(preferred_zone, zone);
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
return page;
}
@@ -5052,7 +5059,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
goto failed;
/* Attempt the batch allocation */
- local_irq_save(flags);
+ local_lock_irqsave(&pagesets.lock, flags);
pcp = this_cpu_ptr(zone->per_cpu_pageset);
pcp_list = &pcp->lists[ac.migratetype];
@@ -5090,7 +5097,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nr_populated++;
}
- local_irq_restore(flags);
+ local_unlock_irqrestore(&pagesets.lock, flags);
return nr_populated;
@@ -8958,12 +8965,13 @@ void zone_pcp_enable(struct zone *zone)
void zone_pcp_reset(struct zone *zone)
{
- unsigned long flags;
int cpu;
struct per_cpu_zonestat *pzstats;
- /* avoid races with drain_pages() */
- local_irq_save(flags);
+ /*
+ * No race with drain_pages. drain_zonestat disables preemption
+ * and drain_pages relies on the pcp local_lock.
+ */
if (zone->per_cpu_pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
@@ -8974,7 +8982,6 @@ void zone_pcp_reset(struct zone *zone)
zone->per_cpu_pageset = &boot_pageset;
zone->per_cpu_zonestats = &boot_zonestats;
}
- local_irq_restore(flags);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8a8f1a26b231..01b74ff73549 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -887,6 +887,7 @@ void cpu_vm_stats_fold(int cpu)
pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
+ preempt_disable();
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
if (pzstats->vm_stat_diff[i]) {
int v;
@@ -908,6 +909,7 @@ void cpu_vm_stats_fold(int cpu)
global_numa_diff[i] += v;
}
#endif
+ preempt_enable();
}
for_each_online_pgdat(pgdat) {
@@ -941,6 +943,7 @@ void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
{
int i;
+ preempt_disable();
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
if (pzstats->vm_stat_diff[i]) {
int v = pzstats->vm_stat_diff[i];
@@ -959,6 +962,7 @@ void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
atomic_long_add(v, &vm_numa_stat[i]);
}
#endif
+ preempt_enable();
}
#endif
--
2.26.2
next prev parent reply other threads:[~2021-03-29 12:17 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-29 12:06 [RFC PATCH 0/6] Use local_lock for pcp protection and reduce stat overhead Mel Gorman
2021-03-29 12:06 ` [PATCH 1/6] mm/page_alloc: Split per cpu page lists and zone stats Mel Gorman
2021-03-29 12:06 ` Mel Gorman [this message]
2021-03-31 9:55 ` [PATCH 2/6] mm/page_alloc: Convert per-cpu list protection to local_lock Thomas Gleixner
2021-03-31 9:55 ` Thomas Gleixner
2021-03-31 11:01 ` Mel Gorman
2021-03-31 17:42 ` Thomas Gleixner
2021-03-31 17:46 ` Thomas Gleixner
2021-03-31 20:42 ` Mel Gorman
2021-03-29 12:06 ` [PATCH 3/6] mm/vmstat: Convert NUMA statistics to basic NUMA counters Mel Gorman
2021-03-29 12:06 ` [PATCH 4/6] mm/vmstat: Inline NUMA event counter updates Mel Gorman
2021-03-29 12:06 ` [PATCH 5/6] mm/page_alloc: Batch the accounting updates in the bulk allocator Mel Gorman
2021-03-29 12:06 ` [PATCH 6/6] mm/page_alloc: Reduce duration that IRQs are disabled for VM counters Mel Gorman
2021-03-30 18:51 ` [RFC PATCH 0/6] Use local_lock for pcp protection and reduce stat overhead Jesper Dangaard Brouer
2021-03-31 7:38 ` Mel Gorman
2021-03-31 8:17 ` Jesper Dangaard Brouer
2021-03-31 8:52 ` Mel Gorman
2021-03-31 9:51 ` Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210329120648.19040-3-mgorman@techsingularity.net \
--to=mgorman@techsingularity.net \
--cc=brouer@redhat.com \
--cc=chuck.lever@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-rt-users@vger.kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.