All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Zach O'Keefe" <zokeefe@google.com>
To: Alex Shi <alex.shi@linux.alibaba.com>,
	David Hildenbrand <david@redhat.com>,
	 David Rientjes <rientjes@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	 Michal Hocko <mhocko@suse.com>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	 Peter Xu <peterx@redhat.com>,
	Rongwei Wang <rongwei.wang@linux.alibaba.com>,
	 SeongJae Park <sj@kernel.org>, Song Liu <songliubraving@fb.com>,
	Vlastimil Babka <vbabka@suse.cz>,  Yang Shi <shy828301@gmail.com>,
	Zi Yan <ziy@nvidia.com>,
	linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	 Arnd Bergmann <arnd@arndb.de>,
	Axel Rasmussen <axelrasmussen@google.com>,
	 Chris Kennelly <ckennelly@google.com>,
	Chris Zankel <chris@zankel.net>, Helge Deller <deller@gmx.de>,
	 Hugh Dickins <hughd@google.com>,
	Ivan Kokshaysky <ink@jurassic.park.msu.ru>,
	 "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>,
	Jens Axboe <axboe@kernel.dk>,
	 "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Matt Turner <mattst88@gmail.com>,
	 Max Filippov <jcmvbkbc@gmail.com>,
	Miaohe Lin <linmiaohe@huawei.com>,
	 Minchan Kim <minchan@kernel.org>,
	Patrick Xia <patrickx@google.com>,
	 Pavel Begunkov <asml.silence@gmail.com>,
	Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	 "Zach O'Keefe" <zokeefe@google.com>
Subject: [PATCH v6 03/15] mm/khugepaged: add struct collapse_control
Date: Fri,  3 Jun 2022 17:39:52 -0700	[thread overview]
Message-ID: <20220604004004.954674-4-zokeefe@google.com> (raw)
In-Reply-To: <20220604004004.954674-1-zokeefe@google.com>

Modularize hugepage collapse by introducing struct collapse_control.
This structure serves to describe the properties of the requested
collapse, as well as serve as a local scratch pad to use during the
collapse itself.

Start by moving global per-node khugepaged statistics into this
new structure, and stack allocate one for khugepaged collapse
context.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 87 ++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 38 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7a914ca19e96..907d0b2bd4bd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -86,6 +86,14 @@ static struct kmem_cache *mm_slot_cache __read_mostly;
 
 #define MAX_PTE_MAPPED_THP 8
 
+struct collapse_control {
+	/* Num pages scanned per node */
+	int node_load[MAX_NUMNODES];
+
+	/* Last target selected in khugepaged_find_target_node() */
+	int last_target_node;
+};
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -777,9 +785,7 @@ static void khugepaged_alloc_sleep(void)
 	remove_wait_queue(&khugepaged_wait, &wait);
 }
 
-static int khugepaged_node_load[MAX_NUMNODES];
-
-static bool khugepaged_scan_abort(int nid)
+static bool khugepaged_scan_abort(int nid, struct collapse_control *cc)
 {
 	int i;
 
@@ -791,11 +797,11 @@ static bool khugepaged_scan_abort(int nid)
 		return false;
 
 	/* If there is a count for this node already, it must be acceptable */
-	if (khugepaged_node_load[nid])
+	if (cc->node_load[nid])
 		return false;
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
-		if (!khugepaged_node_load[i])
+		if (!cc->node_load[i])
 			continue;
 		if (node_distance(nid, i) > node_reclaim_distance)
 			return true;
@@ -810,32 +816,31 @@ static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
 }
 
 #ifdef CONFIG_NUMA
-static int khugepaged_find_target_node(void)
+static int khugepaged_find_target_node(struct collapse_control *cc)
 {
-	static int last_khugepaged_target_node = NUMA_NO_NODE;
 	int nid, target_node = 0, max_value = 0;
 
 	/* find first node with max normal pages hit */
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
-		if (khugepaged_node_load[nid] > max_value) {
-			max_value = khugepaged_node_load[nid];
+		if (cc->node_load[nid] > max_value) {
+			max_value = cc->node_load[nid];
 			target_node = nid;
 		}
 
 	/* do some balance if several nodes have the same hit record */
-	if (target_node <= last_khugepaged_target_node)
-		for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES;
-				nid++)
-			if (max_value == khugepaged_node_load[nid]) {
+	if (target_node <= cc->last_target_node)
+		for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES;
+		     nid++)
+			if (max_value == cc->node_load[nid]) {
 				target_node = nid;
 				break;
 			}
 
-	last_khugepaged_target_node = target_node;
+	cc->last_target_node = target_node;
 	return target_node;
 }
 #else
-static int khugepaged_find_target_node(void)
+static int khugepaged_find_target_node(struct collapse_control *cc)
 {
 	return 0;
 }
@@ -1155,10 +1160,9 @@ static void collapse_huge_page(struct mm_struct *mm,
 	return;
 }
 
-static int khugepaged_scan_pmd(struct mm_struct *mm,
-			       struct vm_area_struct *vma,
-			       unsigned long address,
-			       struct page **hpage)
+static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
+			       unsigned long address, struct page **hpage,
+			       struct collapse_control *cc)
 {
 	pmd_t *pmd;
 	pte_t *pte, *_pte;
@@ -1176,7 +1180,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 	if (result != SCAN_SUCCEED)
 		goto out;
 
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	memset(cc->node_load, 0, sizeof(cc->node_load));
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
 	for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR;
 	     _pte++, _address += PAGE_SIZE) {
@@ -1242,16 +1246,16 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 
 		/*
 		 * Record which node the original page is from and save this
-		 * information to khugepaged_node_load[].
+		 * information to cc->node_load[].
 		 * Khugepaged will allocate hugepage from the node has the max
 		 * hit record.
 		 */
 		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
+		if (khugepaged_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			goto out_unmap;
 		}
-		khugepaged_node_load[node]++;
+		cc->node_load[node]++;
 		if (!PageLRU(page)) {
 			result = SCAN_PAGE_LRU;
 			goto out_unmap;
@@ -1302,7 +1306,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
 	if (ret) {
-		node = khugepaged_find_target_node();
+		node = khugepaged_find_target_node(cc);
 		/* collapse_huge_page will return with the mmap_lock released */
 		collapse_huge_page(mm, address, hpage, node,
 				referenced, unmapped);
@@ -1958,8 +1962,9 @@ static void collapse_file(struct mm_struct *mm,
 	/* TODO: tracepoints */
 }
 
-static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
+				 pgoff_t start, struct page **hpage,
+				 struct collapse_control *cc)
 {
 	struct page *page = NULL;
 	struct address_space *mapping = file->f_mapping;
@@ -1970,7 +1975,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 
 	present = 0;
 	swap = 0;
-	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
+	memset(cc->node_load, 0, sizeof(cc->node_load));
 	rcu_read_lock();
 	xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
 		if (xas_retry(&xas, page))
@@ -1995,11 +2000,11 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 		}
 
 		node = page_to_nid(page);
-		if (khugepaged_scan_abort(node)) {
+		if (khugepaged_scan_abort(node, cc)) {
 			result = SCAN_SCAN_ABORT;
 			break;
 		}
-		khugepaged_node_load[node]++;
+		cc->node_load[node]++;
 
 		if (!PageLRU(page)) {
 			result = SCAN_PAGE_LRU;
@@ -2032,7 +2037,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 			result = SCAN_EXCEED_NONE_PTE;
 			count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
 		} else {
-			node = khugepaged_find_target_node();
+			node = khugepaged_find_target_node(cc);
 			collapse_file(mm, file, start, hpage, node);
 		}
 	}
@@ -2040,8 +2045,9 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 	/* TODO: tracepoints */
 }
 #else
-static void khugepaged_scan_file(struct mm_struct *mm,
-		struct file *file, pgoff_t start, struct page **hpage)
+static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
+				 pgoff_t start, struct page **hpage,
+				 struct collapse_control *cc)
 {
 	BUILD_BUG();
 }
@@ -2052,7 +2058,8 @@ static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
-					    struct page **hpage)
+					    struct page **hpage,
+					    struct collapse_control *cc)
 	__releases(&khugepaged_mm_lock)
 	__acquires(&khugepaged_mm_lock)
 {
@@ -2133,12 +2140,13 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 
 				mmap_read_unlock(mm);
 				ret = 1;
-				khugepaged_scan_file(mm, file, pgoff, hpage);
+				khugepaged_scan_file(mm, file, pgoff, hpage,
+						     cc);
 				fput(file);
 			} else {
 				ret = khugepaged_scan_pmd(mm, vma,
 						khugepaged_scan.address,
-						hpage);
+						hpage, cc);
 			}
 			/* move to next address */
 			khugepaged_scan.address += HPAGE_PMD_SIZE;
@@ -2194,7 +2202,7 @@ static int khugepaged_wait_event(void)
 		kthread_should_stop();
 }
 
-static void khugepaged_do_scan(void)
+static void khugepaged_do_scan(struct collapse_control *cc)
 {
 	struct page *hpage = NULL;
 	unsigned int progress = 0, pass_through_head = 0;
@@ -2218,7 +2226,7 @@ static void khugepaged_do_scan(void)
 		if (khugepaged_has_work() &&
 		    pass_through_head < 2)
 			progress += khugepaged_scan_mm_slot(pages - progress,
-							    &hpage);
+							    &hpage, cc);
 		else
 			progress = pages;
 		spin_unlock(&khugepaged_mm_lock);
@@ -2254,12 +2262,15 @@ static void khugepaged_wait_work(void)
 static int khugepaged(void *none)
 {
 	struct mm_slot *mm_slot;
+	struct collapse_control cc = {
+		.last_target_node = NUMA_NO_NODE,
+	};
 
 	set_freezable();
 	set_user_nice(current, MAX_NICE);
 
 	while (!kthread_should_stop()) {
-		khugepaged_do_scan();
+		khugepaged_do_scan(&cc);
 		khugepaged_wait_work();
 	}
 
-- 
2.36.1.255.ge46751e96f-goog



  parent reply	other threads:[~2022-06-04  0:40 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-04  0:39 [PATCH v6 00/15] mm: userspace hugepage collapse Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 01/15] mm: khugepaged: don't carry huge page to the next loop for !CONFIG_NUMA Zach O'Keefe
2022-06-06 18:25   ` Yang Shi
2022-06-29 20:49   ` Peter Xu
2022-06-30  1:15     ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 02/15] mm/khugepaged: record SCAN_PMD_MAPPED when scan_pmd() finds THP Zach O'Keefe
2022-06-06 20:45   ` Yang Shi
2022-06-07 16:01     ` Zach O'Keefe
2022-06-07 19:32       ` Zach O'Keefe
2022-06-07 21:27         ` Yang Shi
2022-06-08  0:27           ` Zach O'Keefe
2022-06-04  0:39 ` Zach O'Keefe [this message]
2022-06-06  2:41   ` [PATCH v6 03/15] mm/khugepaged: add struct collapse_control kernel test robot
2022-06-06 16:40     ` Zach O'Keefe
2022-06-06 16:40       ` Zach O'Keefe
2022-06-06 20:20       ` Yang Shi
2022-06-06 20:20         ` Yang Shi
2022-06-06 21:22         ` Yang Shi
2022-06-06 21:22           ` Yang Shi
2022-06-06 22:23       ` Andrew Morton
2022-06-06 22:23         ` Andrew Morton
2022-06-06 23:53         ` Yang Shi
2022-06-06 23:53           ` Yang Shi
2022-06-08  0:42           ` Zach O'Keefe
2022-06-08  0:42             ` Zach O'Keefe
2022-06-08  1:00             ` Yang Shi
2022-06-08  1:00               ` Yang Shi
2022-06-08  1:06               ` Zach O'Keefe
2022-06-08  1:06                 ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 04/15] mm/khugepaged: dedup and simplify hugepage alloc and charging Zach O'Keefe
2022-06-06 20:50   ` Yang Shi
2022-06-29 21:58   ` Peter Xu
2022-06-30 20:14     ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 05/15] mm/khugepaged: make allocation semantics context-specific Zach O'Keefe
2022-06-06 20:58   ` Yang Shi
2022-06-07 19:56     ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 06/15] mm/khugepaged: pipe enum scan_result codes back to callers Zach O'Keefe
2022-06-06 22:39   ` Yang Shi
2022-06-07  0:17     ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 07/15] mm/khugepaged: add flag to ignore khugepaged heuristics Zach O'Keefe
2022-06-06 22:51   ` Yang Shi
2022-06-04  0:39 ` [PATCH v6 08/15] mm/khugepaged: add flag to ignore THP sysfs enabled Zach O'Keefe
2022-06-06 23:02   ` Yang Shi
     [not found]   ` <YrzehlUoo2iMMLC2@xz-m1.local>
     [not found]     ` <CAAa6QmRXD5KboM8=ZZRPThOmcLEPtxzf0XyjkCeY_vgR7VOPqg@mail.gmail.com>
2022-06-30  2:32       ` Peter Xu
2022-06-30 14:17         ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 09/15] mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse Zach O'Keefe
2022-06-06 23:53   ` Yang Shi
2022-06-07 22:48     ` Zach O'Keefe
2022-06-08  0:39       ` Yang Shi
2022-06-09 17:35         ` Zach O'Keefe
2022-06-09 18:51           ` Yang Shi
2022-06-10 14:51             ` Zach O'Keefe
2022-06-04  0:39 ` [PATCH v6 10/15] mm/khugepaged: rename prefix of shared collapse functions Zach O'Keefe
2022-06-06 23:56   ` Yang Shi
2022-06-07  0:31     ` Zach O'Keefe
2022-06-04  0:40 ` [PATCH v6 11/15] mm/madvise: add MADV_COLLAPSE to process_madvise() Zach O'Keefe
2022-06-07 19:14   ` Yang Shi
2022-06-04  0:40 ` [PATCH v6 12/15] selftests/vm: modularize collapse selftests Zach O'Keefe
2022-06-04  0:40 ` [PATCH v6 13/15] selftests/vm: add MADV_COLLAPSE collapse context to selftests Zach O'Keefe
2022-06-04  0:40 ` [PATCH v6 14/15] selftests/vm: add selftest to verify recollapse of THPs Zach O'Keefe
2022-06-04  0:40 ` [PATCH v6 15/15] tools headers uapi: add MADV_COLLAPSE madvise mode to tools Zach O'Keefe
2022-06-06 23:58   ` Yang Shi
2022-06-07  0:24     ` Zach O'Keefe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220604004004.954674-4-zokeefe@google.com \
    --to=zokeefe@google.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@linux.alibaba.com \
    --cc=arnd@arndb.de \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=axelrasmussen@google.com \
    --cc=chris@zankel.net \
    --cc=ckennelly@google.com \
    --cc=david@redhat.com \
    --cc=deller@gmx.de \
    --cc=hughd@google.com \
    --cc=ink@jurassic.park.msu.ru \
    --cc=jcmvbkbc@gmail.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-mm@kvack.org \
    --cc=mattst88@gmail.com \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=pasha.tatashin@soleen.com \
    --cc=patrickx@google.com \
    --cc=peterx@redhat.com \
    --cc=rientjes@google.com \
    --cc=rongwei.wang@linux.alibaba.com \
    --cc=shy828301@gmail.com \
    --cc=sj@kernel.org \
    --cc=songliubraving@fb.com \
    --cc=tsbogend@alpha.franken.de \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.