[PATCH WIP v1 11/20] mm/rmap_id: support for 1, 2 and 3 values by manual calculation

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: David Hildenbrand <david@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org, David Hildenbrand <david@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Ryan Roberts <ryan.roberts@arm.com>,
	Matthew Wilcox <willy@infradead.org>,
	Hugh Dickins <hughd@google.com>,
	Yin Fengwei <fengwei.yin@intel.com>,
	Yang Shi <shy828301@gmail.com>, Ying Huang <ying.huang@intel.com>,
	Zi Yan <ziy@nvidia.com>, Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Waiman Long <longman@redhat.com>,
	"Paul E. McKenney" <paulmck@kernel.org>
Subject: [PATCH WIP v1 11/20] mm/rmap_id: support for 1, 2 and 3 values by manual calculation
Date: Fri, 24 Nov 2023 14:26:16 +0100	[thread overview]
Message-ID: <20231124132626.235350-12-david@redhat.com> (raw)
In-Reply-To: <20231124132626.235350-1-david@redhat.com>

For smaller folios, we can use less rmap values:
* <= order-2: 1x 64bit value
* <= order-5: 2x 64bit values
* <= order-9: 3x 64bit values

We end up with a lot of subids, so we cannot really use lookup tables.
Pre-calculate the subids per MM.

For order-9 we could think about having a lookup table with 128bit
entries. Further, we could calcualte them only when really required.

With 2 MiB THP this now implies only 3 instead of 4 values.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/linux/mm_types.h |  3 ++
 include/linux/rmap.h     | 58 ++++++++++++++++++++++++++++-
 kernel/fork.c            |  6 +++
 mm/rmap_id.c             | 79 +++++++++++++++++++++++++++++++++++++---
 4 files changed, 139 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 75305c57ef64..0ca5004e8f4a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1032,6 +1032,9 @@ struct mm_struct {
 
 #ifdef CONFIG_RMAP_ID
 		int mm_rmap_id;
+		unsigned long mm_rmap_subid_1;
+		unsigned long mm_rmap_subid_2[2];
+		unsigned long mm_rmap_subid_3[3];
 #endif /* CONFIG_RMAP_ID */
 	} __randomize_layout;
 
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a73e146d82d1..39aeab457f4a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -180,12 +180,54 @@ struct anon_vma *folio_get_anon_vma(struct folio *folio);
 void free_rmap_id(int id);
 int alloc_rmap_id(void);
 
+#define RMAP_SUBID_1_MAX_ORDER		2
+#define RMAP_SUBID_2_MIN_ORDER		3
+#define RMAP_SUBID_2_MAX_ORDER		5
+#define RMAP_SUBID_3_MIN_ORDER		6
+#define RMAP_SUBID_3_MAX_ORDER		9
+#define RMAP_SUBID_4_MIN_ORDER		10
 #define RMAP_SUBID_4_MAX_ORDER		10
 #define RMAP_SUBID_5_MIN_ORDER		11
 #define RMAP_SUBID_5_MAX_ORDER		12
 #define RMAP_SUBID_6_MIN_ORDER		13
 #define RMAP_SUBID_6_MAX_ORDER		15
 
+static inline unsigned long calc_rmap_subid(unsigned int n, unsigned int i)
+{
+	unsigned long nr = 0, mult = 1;
+
+	while (i) {
+		if (i & 1)
+			nr += mult;
+		mult *= (n + 1);
+		i >>= 1;
+	}
+	return nr;
+}
+
+static inline unsigned long calc_rmap_subid_1(int rmap_id)
+{
+	VM_WARN_ON_ONCE(rmap_id < RMAP_ID_MIN || rmap_id > RMAP_ID_MAX);
+
+	return calc_rmap_subid(1u << RMAP_SUBID_1_MAX_ORDER, rmap_id);
+}
+
+static inline unsigned long calc_rmap_subid_2(int rmap_id, int nr)
+{
+	VM_WARN_ON_ONCE(rmap_id < RMAP_ID_MIN || rmap_id > RMAP_ID_MAX || nr > 1);
+
+	return calc_rmap_subid(1u << RMAP_SUBID_2_MAX_ORDER,
+			       (rmap_id >> (nr * 12)) & 0xfff);
+}
+
+static inline unsigned long calc_rmap_subid_3(int rmap_id, int nr)
+{
+	VM_WARN_ON_ONCE(rmap_id < RMAP_ID_MIN || rmap_id > RMAP_ID_MAX || nr > 2);
+
+	return calc_rmap_subid(1u << RMAP_SUBID_3_MAX_ORDER,
+			       (rmap_id >> (nr * 8)) & 0xff);
+}
+
 static inline void __folio_prep_large_rmap(struct folio *folio)
 {
 	const unsigned int order = folio_order(folio);
@@ -202,10 +244,16 @@ static inline void __folio_prep_large_rmap(struct folio *folio)
 		atomic_long_set(&folio->_rmap_val4, 0);
 		fallthrough;
 #endif
-	default:
+	case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER:
 		atomic_long_set(&folio->_rmap_val3, 0);
+		fallthrough;
+	case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER:
 		atomic_long_set(&folio->_rmap_val2, 0);
+		fallthrough;
+	case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER:
 		atomic_long_set(&folio->_rmap_val1, 0);
+		fallthrough;
+	default:
 		atomic_long_set(&folio->_rmap_val0, 0);
 		break;
 	}
@@ -227,10 +275,16 @@ static inline void __folio_undo_large_rmap(struct folio *folio)
 		VM_WARN_ON_ONCE(atomic_long_read(&folio->_rmap_val4));
 		fallthrough;
 #endif
-	default:
+	case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER:
 		VM_WARN_ON_ONCE(atomic_long_read(&folio->_rmap_val3));
+		fallthrough;
+	case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER:
 		VM_WARN_ON_ONCE(atomic_long_read(&folio->_rmap_val2));
+		fallthrough;
+	case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER:
 		VM_WARN_ON_ONCE(atomic_long_read(&folio->_rmap_val1));
+		fallthrough;
+	default:
 		VM_WARN_ON_ONCE(atomic_long_read(&folio->_rmap_val0));
 		break;
 	}
diff --git a/kernel/fork.c b/kernel/fork.c
index 773c93613ca2..1d2f6248c83e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -822,6 +822,12 @@ static inline int mm_alloc_rmap_id(struct mm_struct *mm)
 	if (id < 0)
 		return id;
 	mm->mm_rmap_id = id;
+	mm->mm_rmap_subid_1 = calc_rmap_subid_1(id);
+	mm->mm_rmap_subid_2[0] = calc_rmap_subid_2(id, 0);
+	mm->mm_rmap_subid_2[1] = calc_rmap_subid_2(id, 1);
+	mm->mm_rmap_subid_3[0] = calc_rmap_subid_3(id, 0);
+	mm->mm_rmap_subid_3[1] = calc_rmap_subid_3(id, 1);
+	mm->mm_rmap_subid_3[2] = calc_rmap_subid_3(id, 2);
 	return 0;
 }
 
diff --git a/mm/rmap_id.c b/mm/rmap_id.c
index 85a61c830f19..6c3187547741 100644
--- a/mm/rmap_id.c
+++ b/mm/rmap_id.c
@@ -87,6 +87,39 @@ static DEFINE_IDA(rmap_ida);
  *       involved page tables are locked and stop any page table walkers.
  */
 
+/*
+ * With 4 (order-2) possible exclusive mappings per folio, we can have
+ * 16777216 = 16M sub-IDs per 64bit value.
+ */
+static unsigned long get_rmap_subid_1(struct mm_struct *mm)
+{
+	return mm->mm_rmap_subid_1;
+}
+
+/*
+ * With 32 (order-5) possible exclusive mappings per folio, we can have
+ * 4096 sub-IDs per 64bit value.
+ *
+ * With 2 such 64bit values, we can support 4096^2 == 16M IDs.
+ */
+static unsigned long get_rmap_subid_2(struct mm_struct *mm, int nr)
+{
+	VM_WARN_ON_ONCE(nr > 1);
+	return mm->mm_rmap_subid_2[nr];
+}
+
+/*
+ * With 512 (order-9) possible exclusive mappings per folio, we can have
+ * 128 sub-IDs per 64bit value.
+ *
+ * With 3 such 64bit values, we can support 128^3 == 16M IDs.
+ */
+static unsigned long get_rmap_subid_3(struct mm_struct *mm, int nr)
+{
+	VM_WARN_ON_ONCE(nr > 2);
+	return mm->mm_rmap_subid_3[nr];
+}
+
 /*
  * With 1024 (order-10) possible exclusive mappings per folio, we can have 64
  * sub-IDs per 64bit value.
@@ -279,12 +312,24 @@ void __folio_set_large_rmap_val(struct folio *folio, int count,
 		atomic_long_set(&folio->_rmap_val4, get_rmap_subid_5(mm, 4) * count);
 		break;
 #endif
-	default:
+	case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER:
 		atomic_long_set(&folio->_rmap_val0, get_rmap_subid_4(mm, 0) * count);
 		atomic_long_set(&folio->_rmap_val1, get_rmap_subid_4(mm, 1) * count);
 		atomic_long_set(&folio->_rmap_val2, get_rmap_subid_4(mm, 2) * count);
 		atomic_long_set(&folio->_rmap_val3, get_rmap_subid_4(mm, 3) * count);
 		break;
+	case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER:
+		atomic_long_set(&folio->_rmap_val0, get_rmap_subid_3(mm, 0) * count);
+		atomic_long_set(&folio->_rmap_val1, get_rmap_subid_3(mm, 1) * count);
+		atomic_long_set(&folio->_rmap_val2, get_rmap_subid_3(mm, 2) * count);
+		break;
+	case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER:
+		atomic_long_set(&folio->_rmap_val0, get_rmap_subid_2(mm, 0) * count);
+		atomic_long_set(&folio->_rmap_val1, get_rmap_subid_2(mm, 1) * count);
+		break;
+	default:
+		atomic_long_set(&folio->_rmap_val0, get_rmap_subid_1(mm) * count);
+		break;
 	}
 }
 
@@ -313,12 +358,24 @@ void __folio_add_large_rmap_val(struct folio *folio, int count,
 		atomic_long_add(get_rmap_subid_5(mm, 4) * count, &folio->_rmap_val4);
 		break;
 #endif
-	default:
+	case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER:
 		atomic_long_add(get_rmap_subid_4(mm, 0) * count, &folio->_rmap_val0);
 		atomic_long_add(get_rmap_subid_4(mm, 1) * count, &folio->_rmap_val1);
 		atomic_long_add(get_rmap_subid_4(mm, 2) * count, &folio->_rmap_val2);
 		atomic_long_add(get_rmap_subid_4(mm, 3) * count, &folio->_rmap_val3);
 		break;
+	case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER:
+		atomic_long_add(get_rmap_subid_3(mm, 0) * count, &folio->_rmap_val0);
+		atomic_long_add(get_rmap_subid_3(mm, 1) * count, &folio->_rmap_val1);
+		atomic_long_add(get_rmap_subid_3(mm, 2) * count, &folio->_rmap_val2);
+		break;
+	case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER:
+		atomic_long_add(get_rmap_subid_2(mm, 0) * count, &folio->_rmap_val0);
+		atomic_long_add(get_rmap_subid_2(mm, 1) * count, &folio->_rmap_val1);
+		break;
+	default:
+		atomic_long_add(get_rmap_subid_1(mm) * count, &folio->_rmap_val0);
+		break;
 	}
 }
 
@@ -330,7 +387,7 @@ bool __folio_has_large_matching_rmap_val(struct folio *folio, int count,
 
 	switch (order) {
 #if MAX_ORDER >= RMAP_SUBID_6_MIN_ORDER
-	case RMAP_SUBID_6_MIN_ORDER .. RMAP_SUBID_6_MAX_ORDER:
+	case RMAP_SUBID_6_MIN_ORDER ... RMAP_SUBID_6_MAX_ORDER:
 		diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_6(mm, 0) * count);
 		diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_6(mm, 1) * count);
 		diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_6(mm, 2) * count);
@@ -340,7 +397,7 @@ bool __folio_has_large_matching_rmap_val(struct folio *folio, int count,
 		break;
 #endif
 #if MAX_ORDER >= RMAP_SUBID_5_MIN_ORDER
-	case RMAP_SUBID_5_MIN_ORDER .. RMAP_SUBID_5_MAX_ORDER:
+	case RMAP_SUBID_5_MIN_ORDER ... RMAP_SUBID_5_MAX_ORDER:
 		diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_5(mm, 0) * count);
 		diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_5(mm, 1) * count);
 		diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_5(mm, 2) * count);
@@ -348,12 +405,24 @@ bool __folio_has_large_matching_rmap_val(struct folio *folio, int count,
 		diff |= atomic_long_read(&folio->_rmap_val4) ^ (get_rmap_subid_5(mm, 4) * count);
 		break;
 #endif
-	default:
+	case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER:
 		diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_4(mm, 0) * count);
 		diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_4(mm, 1) * count);
 		diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_4(mm, 2) * count);
 		diff |= atomic_long_read(&folio->_rmap_val3) ^ (get_rmap_subid_4(mm, 3) * count);
 		break;
+	case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER:
+		diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_3(mm, 0) * count);
+		diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_3(mm, 1) * count);
+		diff |= atomic_long_read(&folio->_rmap_val2) ^ (get_rmap_subid_3(mm, 2) * count);
+		break;
+	case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER:
+		diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_2(mm, 0) * count);
+		diff |= atomic_long_read(&folio->_rmap_val1) ^ (get_rmap_subid_2(mm, 1) * count);
+		break;
+	default:
+		diff |= atomic_long_read(&folio->_rmap_val0) ^ (get_rmap_subid_1(mm) * count);
+		break;
 	}
 	return !diff;
 }
-- 
2.41.0

next prev parent reply	other threads:[~2023-11-24 13:27 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-24 13:26 [PATCH WIP v1 00/20] mm: precise "mapped shared" vs. "mapped exclusively" detection for PTE-mapped THP / partially-mappable folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 01/20] mm/rmap: factor out adding folio range into __folio_add_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 02/20] mm: add a total mapcount for large folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 03/20] mm: convert folio_estimated_sharers() to folio_mapped_shared() and improve it David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 04/20] mm/rmap: pass dst_vma to page_try_dup_anon_rmap() and page_dup_file_rmap() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 05/20] mm/rmap: abstract total mapcount operations for partially-mappable folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 06/20] atomic_seqcount: new (raw) seqcount variant to support concurrent writers David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 07/20] mm/rmap_id: track if one ore multiple MMs map a partially-mappable folio David Hildenbrand
2023-12-17 19:13   ` Nadav Amit
2023-12-18 14:04     ` David Hildenbrand
2023-12-18 14:34       ` Nadav Amit
2023-11-24 13:26 ` [PATCH WIP v1 08/20] mm: pass MM to folio_mapped_shared() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 09/20] mm: improve folio_mapped_shared() for partially-mappable folios using rmap IDs David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 10/20] mm/memory: COW reuse support for PTE-mapped THP with " David Hildenbrand
2023-11-24 13:26 ` David Hildenbrand [this message]
2023-11-24 13:26 ` [PATCH WIP v1 12/20] mm/rmap: introduce folio_add_anon_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 13/20] mm/huge_memory: batch rmap operations in __split_huge_pmd_locked() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 14/20] mm/huge_memory: avoid folio_refcount() < folio_mapcount() " David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 15/20] mm/rmap_id: verify precalculated subids with CONFIG_DEBUG_VM David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 16/20] atomic_seqcount: support a single exclusive writer in the absence of other writers David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 17/20] mm/rmap_id: reduce atomic RMW operations when we are the exclusive writer David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 18/20] atomic_seqcount: use atomic add-return instead of atomic cmpxchg on 64bit David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 19/20] mm/rmap: factor out removing folio range into __folio_remove_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 20/20] mm/rmap: perform all mapcount operations of large folios under the rmap seqcount David Hildenbrand
2023-11-24 20:55 ` [PATCH WIP v1 00/20] mm: precise "mapped shared" vs. "mapped exclusively" detection for PTE-mapped THP / partially-mappable folios Linus Torvalds
2023-11-25 17:02   ` David Hildenbrand

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:75305c57ef6 dfblob:0ca5004e8f4 dfblob:a73e146d82d
dfblob:39aeab457f4 dfblob:773c93613ca dfblob:1d2f6248c83
dfblob:85a61c830f1 dfblob:6c318754774 )
 OR (
bs:"[PATCH WIP v1 11/20] mm/rmap_id: support for 1, 2 and 3 values by manual calculation" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231124132626.235350-12-david@redhat.com \
    --to=david@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=fengwei.yin@intel.com \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ryan.roberts@arm.com \
    --cc=shy828301@gmail.com \
    --cc=torvalds@linux-foundation.org \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).