From: Byungchul Park <byungchul@sk.com>
To: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Cc: kernel_team@skhynix.com, akpm@linux-foundation.org,
ying.huang@intel.com, namit@vmware.com, xhao@linux.alibaba.com,
mgorman@techsingularity.net, hughd@google.com,
willy@infradead.org, david@redhat.com, peterz@infradead.org,
luto@kernel.org, dave.hansen@linux.intel.com
Subject: [RFC v2 4/6] mm, migrc: Ajust __zone_watermark_ok() with the amount of pending folios
Date: Thu, 17 Aug 2023 17:05:57 +0900 [thread overview]
Message-ID: <20230817080559.43200-5-byungchul@sk.com> (raw)
In-Reply-To: <20230817080559.43200-1-byungchul@sk.com>
CONFIG_MIGRC duplicates folios participated in migration to avoid TLB
flushes and provide a consistent view to CPUs that are still caching its
old mapping in TLB. However, the duplicated folios can be freed and
available right away through appropreate TLB flushes if needed.
Adjusted watermark check routine, __zone_watermark_ok(), with the number
of duplicated folios and made it perform TLB flushes and free the
duplicated folios if page allocation routine is in trouble due to memory
pressure, even more aggresively for high order allocation.
Signed-off-by: Byungchul Park <byungchul@sk.com>
---
include/linux/mm.h | 2 ++
include/linux/mmzone.h | 3 +++
mm/migrate.c | 12 ++++++++++++
mm/page_alloc.c | 16 ++++++++++++++++
4 files changed, 33 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ceec7f3591e..9df393074e6a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3827,6 +3827,7 @@ bool migrc_try_flush_free_folios(void);
void migrc_try_flush_free_folios_dirty(void);
struct migrc_req *fold_ubc_nowr_to_migrc(void);
void free_migrc_req(struct migrc_req *req);
+int migrc_pending_nr_in_zone(struct zone *z);
extern atomic_t migrc_gen;
extern struct llist_head migrc_reqs;
@@ -3842,6 +3843,7 @@ static inline bool migrc_try_flush_free_folios(void) { return false; }
static inline void migrc_try_flush_free_folios_dirty(void) {}
static inline struct migrc_req *fold_ubc_nowr_to_migrc(void) { return NULL; }
static inline void free_migrc_req(struct migrc_req *req) {}
+static inline int migrc_pending_nr_in_zone(struct zone *z) { return 0; }
#endif
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6d645beaf7a6..1ec79bb63ba7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -958,6 +958,9 @@ struct zone {
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifdef CONFIG_MIGRC
+ atomic_t migrc_pending_nr;
+#endif
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
diff --git a/mm/migrate.c b/mm/migrate.c
index c7b72d275b2a..badef3d89c6c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -117,9 +117,12 @@ void migrc_shrink(struct llist_head *h)
llist_for_each_entry_safe(p, p2, n, migrc_node) {
if (p->migrc_state == MIGRC_SRC_PENDING) {
struct pglist_data *node;
+ struct zone *zone;
node = NODE_DATA(page_to_nid(p));
+ zone = page_zone(p);
atomic_dec(&node->migrc_pending_nr);
+ atomic_dec(&zone->migrc_pending_nr);
}
if (WARN_ON(!migrc_pending(page_folio(p))))
@@ -172,6 +175,7 @@ static void migrc_expand_req(struct folio *fsrc, struct folio *fdst)
{
struct migrc_req *req;
struct pglist_data *node;
+ struct zone *zone;
req = fold_ubc_nowr_to_migrc();
if (!req)
@@ -190,7 +194,9 @@ static void migrc_expand_req(struct folio *fsrc, struct folio *fdst)
req->last = &fsrc->page.migrc_node;
node = NODE_DATA(folio_nid(fsrc));
+ zone = page_zone(&fsrc->page);
atomic_inc(&node->migrc_pending_nr);
+ atomic_inc(&zone->migrc_pending_nr);
if (migrc_is_full(folio_nid(fsrc)))
migrc_try_flush_free_folios();
@@ -275,6 +281,12 @@ bool migrc_req_processing(void)
{
return current->mreq && current->mreq_dirty;
}
+
+int migrc_pending_nr_in_zone(struct zone *z)
+{
+ return atomic_read(&z->migrc_pending_nr);
+
+}
#else
static inline bool migrc_src_pending(struct folio *f) { return false; }
static inline bool migrc_dst_pending(struct folio *f) { return false; }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c51cbdb45d86..9f791c0fa15d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3179,6 +3179,11 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
long min = mark;
int o;
+ /*
+ * There are pages that can be freed by migrc_try_flush_free_folios().
+ */
+ free_pages += migrc_pending_nr_in_zone(z);
+
/* free_pages may go negative - that's OK */
free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
@@ -4257,6 +4262,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned int zonelist_iter_cookie;
int reserve_flags;
+ migrc_try_flush_free_folios();
restart:
compaction_retries = 0;
no_progress_loops = 0;
@@ -4772,6 +4778,16 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
if (likely(page))
goto out;
+ if (order && migrc_try_flush_free_folios()) {
+ /*
+ * Try again after freeing migrc's pending pages in case
+ * of high order allocation.
+ */
+ page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
+ if (likely(page))
+ goto out;
+ }
+
alloc_gfp = gfp;
ac.spread_dirty_pages = false;
--
2.17.1
next prev parent reply other threads:[~2023-08-17 8:09 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-17 8:05 [RFC v2 0/6] Reduce TLB flushes under some specific conditions Byungchul Park
2023-08-17 8:05 ` [RFC v2 1/6] mm/rmap: Recognize non-writable TLB entries during TLB batch flush Byungchul Park
2023-08-17 8:05 ` [RFC v2 2/6] mm: Defer TLB flush by keeping both src and dst folios at migration Byungchul Park
2023-08-17 8:05 ` [RFC v2 3/6] mm, migrc: Skip TLB flushes at the CPUs that already have been done Byungchul Park
2023-08-17 8:05 ` Byungchul Park [this message]
2023-08-17 8:05 ` [RFC v2 5/6] mm, migrc: Add a sysctl knob to enable/disable MIGRC mechanism Byungchul Park
2023-08-17 8:05 ` [RFC v2 6/6] mm, migrc: Implement internal allocator to minimize impact onto vm Byungchul Park
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230817080559.43200-5-byungchul@sk.com \
--to=byungchul@sk.com \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=kernel_team@skhynix.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@kernel.org \
--cc=mgorman@techsingularity.net \
--cc=namit@vmware.com \
--cc=peterz@infradead.org \
--cc=willy@infradead.org \
--cc=xhao@linux.alibaba.com \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).