[PATCH WIP v1 17/20] mm/rmap_id: reduce atomic RMW operations when we are the exclusive writer

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: David Hildenbrand <david@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org, David Hildenbrand <david@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Ryan Roberts <ryan.roberts@arm.com>,
	Matthew Wilcox <willy@infradead.org>,
	Hugh Dickins <hughd@google.com>,
	Yin Fengwei <fengwei.yin@intel.com>,
	Yang Shi <shy828301@gmail.com>, Ying Huang <ying.huang@intel.com>,
	Zi Yan <ziy@nvidia.com>, Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Waiman Long <longman@redhat.com>,
	"Paul E. McKenney" <paulmck@kernel.org>
Subject: [PATCH WIP v1 17/20] mm/rmap_id: reduce atomic RMW operations when we are the exclusive writer
Date: Fri, 24 Nov 2023 14:26:22 +0100	[thread overview]
Message-ID: <20231124132626.235350-18-david@redhat.com> (raw)
In-Reply-To: <20231124132626.235350-1-david@redhat.com>

We can reduce the number of atomic RMW operations when we are the
single exclusive writer -- the common case.

So instead of always requiring

(1) 2 atomic RMW operations for adjusting the atomic seqcount
(2) 1 atomic RMW operation for adjusting the total mapcount
(3) 1 to 6 atomic RMW operation for adjusting the rmap values

We can avoid (2) and (3) if we are the exclusive writer and limit it
to the 2 atomic RMW operations from (1).

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/linux/rmap.h | 81 +++++++++++++++++++++++++++++++++-----------
 mm/rmap_id.c         | 52 ++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 19 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 0758dddc5528..538c23d3c0c9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -291,23 +291,36 @@ static inline void __folio_undo_large_rmap(struct folio *folio)
 #endif
 }
 
-static inline void __folio_write_large_rmap_begin(struct folio *folio)
+static inline bool __folio_write_large_rmap_begin(struct folio *folio)
 {
+	bool exclusive;
+
 	VM_WARN_ON_FOLIO(!folio_test_large_rmappable(folio), folio);
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
-	raw_write_atomic_seqcount_begin(&folio->_rmap_atomic_seqcount,
-					false);
+
+	exclusive = raw_write_atomic_seqcount_begin(&folio->_rmap_atomic_seqcount,
+						    true);
+	if (likely(exclusive)) {
+		prefetchw(&folio->_rmap_val0);
+		if (unlikely(folio_order(folio) > RMAP_SUBID_4_MAX_ORDER))
+			prefetchw(&folio->_rmap_val4);
+	}
+	return exclusive;
 }
 
-static inline void __folio_write_large_rmap_end(struct folio *folio)
+static inline void __folio_write_large_rmap_end(struct folio *folio,
+		bool exclusive)
 {
-	raw_write_atomic_seqcount_end(&folio->_rmap_atomic_seqcount, false);
+	raw_write_atomic_seqcount_end(&folio->_rmap_atomic_seqcount,
+				      exclusive);
 }
 
 void __folio_set_large_rmap_val(struct folio *folio, int count,
 		struct mm_struct *mm);
 void __folio_add_large_rmap_val(struct folio *folio, int count,
 		struct mm_struct *mm);
+void __folio_add_large_rmap_val_exclusive(struct folio *folio, int count,
+		struct mm_struct *mm);
 bool __folio_has_large_matching_rmap_val(struct folio *folio, int count,
 		struct mm_struct *mm);
 #else
@@ -317,12 +330,14 @@ static inline void __folio_prep_large_rmap(struct folio *folio)
 static inline void __folio_undo_large_rmap(struct folio *folio)
 {
 }
-static inline void __folio_write_large_rmap_begin(struct folio *folio)
+static inline bool __folio_write_large_rmap_begin(struct folio *folio)
 {
 	VM_WARN_ON_FOLIO(!folio_test_large_rmappable(folio), folio);
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
+	return false;
 }
-static inline void __folio_write_large_rmap_end(struct folio *folio)
+static inline void __folio_write_large_rmap_end(struct folio *folio,
+		bool exclusive)
 {
 }
 static inline void __folio_set_large_rmap_val(struct folio *folio, int count,
@@ -333,6 +348,10 @@ static inline void __folio_add_large_rmap_val(struct folio *folio, int count,
 		struct mm_struct *mm)
 {
 }
+static inline void __folio_add_large_rmap_val_exclusive(struct folio *folio,
+		int count, struct mm_struct *mm)
+{
+}
 #endif /* CONFIG_RMAP_ID */
 
 static inline void folio_set_large_mapcount(struct folio *folio,
@@ -348,28 +367,52 @@ static inline void folio_set_large_mapcount(struct folio *folio,
 static inline void folio_inc_large_mapcount(struct folio *folio,
 		struct vm_area_struct *vma)
 {
-	__folio_write_large_rmap_begin(folio);
-	atomic_inc(&folio->_total_mapcount);
-	__folio_add_large_rmap_val(folio, 1, vma->vm_mm);
-	__folio_write_large_rmap_end(folio);
+	bool exclusive;
+
+	exclusive = __folio_write_large_rmap_begin(folio);
+	if (likely(exclusive)) {
+		atomic_set(&folio->_total_mapcount,
+			   atomic_read(&folio->_total_mapcount) + 1);
+		__folio_add_large_rmap_val_exclusive(folio, 1, vma->vm_mm);
+	} else {
+		atomic_inc(&folio->_total_mapcount);
+		__folio_add_large_rmap_val(folio, 1, vma->vm_mm);
+	}
+	__folio_write_large_rmap_end(folio, exclusive);
 }
 
 static inline void folio_add_large_mapcount(struct folio *folio,
 		int count, struct vm_area_struct *vma)
 {
-	__folio_write_large_rmap_begin(folio);
-	atomic_add(count, &folio->_total_mapcount);
-	__folio_add_large_rmap_val(folio, count, vma->vm_mm);
-	__folio_write_large_rmap_end(folio);
+	bool exclusive;
+
+	exclusive = __folio_write_large_rmap_begin(folio);
+	if (likely(exclusive)) {
+		atomic_set(&folio->_total_mapcount,
+			   atomic_read(&folio->_total_mapcount) + count);
+		__folio_add_large_rmap_val_exclusive(folio, count, vma->vm_mm);
+	} else {
+		atomic_add(count, &folio->_total_mapcount);
+		__folio_add_large_rmap_val(folio, count, vma->vm_mm);
+	}
+	__folio_write_large_rmap_end(folio, exclusive);
 }
 
 static inline void folio_dec_large_mapcount(struct folio *folio,
 		struct vm_area_struct *vma)
 {
-	__folio_write_large_rmap_begin(folio);
-	atomic_dec(&folio->_total_mapcount);
-	__folio_add_large_rmap_val(folio, -1, vma->vm_mm);
-	__folio_write_large_rmap_end(folio);
+	bool exclusive;
+
+	exclusive = __folio_write_large_rmap_begin(folio);
+	if (likely(exclusive)) {
+		atomic_set(&folio->_total_mapcount,
+			   atomic_read(&folio->_total_mapcount) - 1);
+		__folio_add_large_rmap_val_exclusive(folio, -1, vma->vm_mm);
+	} else {
+		atomic_dec(&folio->_total_mapcount);
+		__folio_add_large_rmap_val(folio, -1, vma->vm_mm);
+	}
+	__folio_write_large_rmap_end(folio, exclusive);
 }
 
 /* RMAP flags, currently only relevant for some anon rmap operations. */
diff --git a/mm/rmap_id.c b/mm/rmap_id.c
index 421d8d2b646c..5009c6e43965 100644
--- a/mm/rmap_id.c
+++ b/mm/rmap_id.c
@@ -379,6 +379,58 @@ void __folio_add_large_rmap_val(struct folio *folio, int count,
 	}
 }
 
+void __folio_add_large_rmap_val_exclusive(struct folio *folio, int count,
+		struct mm_struct *mm)
+{
+	const unsigned int order = folio_order(folio);
+
+	/*
+	 * Concurrent rmap value modifications are impossible. We don't care
+	 * about store tearing because readers will realize the concurrent
+	 * updates using the seqcount and simply retry. So adjust the bare
+	 * atomic counter instead.
+	 */
+	switch (order) {
+#if MAX_ORDER >= RMAP_SUBID_6_MIN_ORDER
+	case RMAP_SUBID_6_MIN_ORDER ... RMAP_SUBID_6_MAX_ORDER:
+		folio->_rmap_val0.counter += get_rmap_subid_6(mm, 0) * count;
+		folio->_rmap_val1.counter += get_rmap_subid_6(mm, 1) * count;
+		folio->_rmap_val2.counter += get_rmap_subid_6(mm, 2) * count;
+		folio->_rmap_val3.counter += get_rmap_subid_6(mm, 3) * count;
+		folio->_rmap_val4.counter += get_rmap_subid_6(mm, 4) * count;
+		folio->_rmap_val5.counter += get_rmap_subid_6(mm, 5) * count;
+		break;
+#endif
+#if MAX_ORDER >= RMAP_SUBID_5_MIN_ORDER
+	case RMAP_SUBID_5_MIN_ORDER ... RMAP_SUBID_5_MAX_ORDER:
+		folio->_rmap_val0.counter += get_rmap_subid_5(mm, 0) * count;
+		folio->_rmap_val1.counter += get_rmap_subid_5(mm, 1) * count;
+		folio->_rmap_val2.counter += get_rmap_subid_5(mm, 2) * count;
+		folio->_rmap_val3.counter += get_rmap_subid_5(mm, 3) * count;
+		folio->_rmap_val4.counter += get_rmap_subid_5(mm, 4) * count;
+		break;
+#endif
+	case RMAP_SUBID_4_MIN_ORDER ... RMAP_SUBID_4_MAX_ORDER:
+		folio->_rmap_val0.counter += get_rmap_subid_4(mm, 0) * count;
+		folio->_rmap_val1.counter += get_rmap_subid_4(mm, 1) * count;
+		folio->_rmap_val2.counter += get_rmap_subid_4(mm, 2) * count;
+		folio->_rmap_val3.counter += get_rmap_subid_4(mm, 3) * count;
+		break;
+	case RMAP_SUBID_3_MIN_ORDER ... RMAP_SUBID_3_MAX_ORDER:
+		folio->_rmap_val0.counter += get_rmap_subid_3(mm, 0) * count;
+		folio->_rmap_val1.counter += get_rmap_subid_3(mm, 1) * count;
+		folio->_rmap_val2.counter += get_rmap_subid_3(mm, 2) * count;
+		break;
+	case RMAP_SUBID_2_MIN_ORDER ... RMAP_SUBID_2_MAX_ORDER:
+		folio->_rmap_val0.counter += get_rmap_subid_2(mm, 0) * count;
+		folio->_rmap_val1.counter += get_rmap_subid_2(mm, 1) * count;
+		break;
+	default:
+		folio->_rmap_val0.counter += get_rmap_subid_1(mm);
+		break;
+	}
+}
+
 bool __folio_has_large_matching_rmap_val(struct folio *folio, int count,
 		 struct mm_struct *mm)
 {
-- 
2.41.0

next prev parent reply	other threads:[~2023-11-24 13:27 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-24 13:26 [PATCH WIP v1 00/20] mm: precise "mapped shared" vs. "mapped exclusively" detection for PTE-mapped THP / partially-mappable folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 01/20] mm/rmap: factor out adding folio range into __folio_add_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 02/20] mm: add a total mapcount for large folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 03/20] mm: convert folio_estimated_sharers() to folio_mapped_shared() and improve it David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 04/20] mm/rmap: pass dst_vma to page_try_dup_anon_rmap() and page_dup_file_rmap() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 05/20] mm/rmap: abstract total mapcount operations for partially-mappable folios David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 06/20] atomic_seqcount: new (raw) seqcount variant to support concurrent writers David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 07/20] mm/rmap_id: track if one ore multiple MMs map a partially-mappable folio David Hildenbrand
2023-12-17 19:13   ` Nadav Amit
2023-12-18 14:04     ` David Hildenbrand
2023-12-18 14:34       ` Nadav Amit
2023-11-24 13:26 ` [PATCH WIP v1 08/20] mm: pass MM to folio_mapped_shared() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 09/20] mm: improve folio_mapped_shared() for partially-mappable folios using rmap IDs David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 10/20] mm/memory: COW reuse support for PTE-mapped THP with " David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 11/20] mm/rmap_id: support for 1, 2 and 3 values by manual calculation David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 12/20] mm/rmap: introduce folio_add_anon_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 13/20] mm/huge_memory: batch rmap operations in __split_huge_pmd_locked() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 14/20] mm/huge_memory: avoid folio_refcount() < folio_mapcount() " David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 15/20] mm/rmap_id: verify precalculated subids with CONFIG_DEBUG_VM David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 16/20] atomic_seqcount: support a single exclusive writer in the absence of other writers David Hildenbrand
2023-11-24 13:26 ` David Hildenbrand [this message]
2023-11-24 13:26 ` [PATCH WIP v1 18/20] atomic_seqcount: use atomic add-return instead of atomic cmpxchg on 64bit David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 19/20] mm/rmap: factor out removing folio range into __folio_remove_rmap_range() David Hildenbrand
2023-11-24 13:26 ` [PATCH WIP v1 20/20] mm/rmap: perform all mapcount operations of large folios under the rmap seqcount David Hildenbrand
2023-11-24 20:55 ` [PATCH WIP v1 00/20] mm: precise "mapped shared" vs. "mapped exclusively" detection for PTE-mapped THP / partially-mappable folios Linus Torvalds
2023-11-25 17:02   ` David Hildenbrand

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0758dddc552 dfblob:538c23d3c0c dfblob:421d8d2b646
dfblob:5009c6e4396 )
 OR (
bs:"[PATCH WIP v1 17/20] mm/rmap_id: reduce atomic RMW operations when we are the exclusive writer" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231124132626.235350-18-david@redhat.com \
    --to=david@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=fengwei.yin@intel.com \
    --cc=hughd@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ryan.roberts@arm.com \
    --cc=shy828301@gmail.com \
    --cc=torvalds@linux-foundation.org \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).